A timing issue can occur where qib_mr_dereg can return -EBUSY if the
MR use count is not zero.
This can occur if the MR is de-registered while RDMA read response
packets are being progressed from the SDMA ring. The suspicion is
that the peer sent an RDMA read request, which has already been copied
across to the peer. The peer sees the completion of his request and
then communicates to the responder that the MR is not needed any
longer. The responder tries to de-register the MR, catching some
responses remaining in the SDMA ring holding the MR use count.
The code now uses a get/put paradigm to track MR use counts and
coordinates with the MR de-registration process using a completion
when the count has reached zero. A timeout on the delay is in place
to catch other EBUSY issues.
The reference count protocol is as follows:
- The return to the user counts as 1
- A reference from the lk_table or the qib_ibdev counts as 1.
- Transient I/O operations increase/decrease as necessary
A lot of code duplication has been folded into the new routines
init_qib_mregion() and deinit_qib_mregion(). Additionally, explicit
initialization of fields to zero is now handled by kzalloc().
Also, duplicated code 'while.*num_sge' that decrements reference
counts have been consolidated in qib_put_ss().
Reviewed-by: Ramkrishna Vepa <ramkrishna.vepa@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
/**
* qib_alloc_lkey - allocate an lkey
/**
* qib_alloc_lkey - allocate an lkey
- * @rkt: lkey table in which to allocate the lkey
* @mr: memory region that this lkey protects
* @mr: memory region that this lkey protects
+ * @dma_region: 0->normal key, 1->restricted DMA key
+ *
+ * Returns 0 if successful, otherwise returns -errno.
+ *
+ * Increments mr reference count and sets published
+ * as required.
+ *
+ * Sets the lkey field mr for non-dma regions.
- * Returns 1 if successful, otherwise returns 0.
-int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr)
+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
{
unsigned long flags;
u32 r;
u32 n;
{
unsigned long flags;
u32 r;
u32 n;
+ int ret = 0;
+ struct qib_ibdev *dev = to_idev(mr->pd->device);
+ struct qib_lkey_table *rkt = &dev->lk_table;
spin_lock_irqsave(&rkt->lock, flags);
spin_lock_irqsave(&rkt->lock, flags);
+ /* special case for dma_mr lkey == 0 */
+ if (dma_region) {
+ /* should the dma_mr be relative to the pd? */
+ if (!dev->dma_mr) {
+ qib_get_mr(mr);
+ dev->dma_mr = mr;
+ mr->lkey_published = 1;
+ }
+ goto success;
+ }
+
/* Find the next available LKEY */
r = rkt->next;
n = r;
/* Find the next available LKEY */
r = rkt->next;
n = r;
if (rkt->table[r] == NULL)
break;
r = (r + 1) & (rkt->max - 1);
if (rkt->table[r] == NULL)
break;
r = (r + 1) & (rkt->max - 1);
- if (r == n) {
- spin_unlock_irqrestore(&rkt->lock, flags);
- ret = 0;
}
rkt->next = (r + 1) & (rkt->max - 1);
/*
}
rkt->next = (r + 1) & (rkt->max - 1);
/*
mr->lkey |= 1 << 8;
rkt->gen++;
}
mr->lkey |= 1 << 8;
rkt->gen++;
}
+ mr->lkey_published = 1;
+success:
spin_unlock_irqrestore(&rkt->lock, flags);
spin_unlock_irqrestore(&rkt->lock, flags);
+bail:
+ spin_unlock_irqrestore(&rkt->lock, flags);
+ ret = -ENOMEM;
+ goto out;
}
/**
* qib_free_lkey - free an lkey
}
/**
* qib_free_lkey - free an lkey
- * @rkt: table from which to free the lkey
- * @lkey: lkey id to free
+ * @mr: mr to free from tables
-int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr)
+void qib_free_lkey(struct qib_mregion *mr)
{
unsigned long flags;
u32 lkey = mr->lkey;
u32 r;
{
unsigned long flags;
u32 lkey = mr->lkey;
u32 r;
+ struct qib_ibdev *dev = to_idev(mr->pd->device);
+ struct qib_lkey_table *rkt = &dev->lk_table;
+
+ spin_lock_irqsave(&rkt->lock, flags);
+ if (!mr->lkey_published)
+ goto out;
+ mr->lkey_published = 0;
+
spin_lock_irqsave(&dev->lk_table.lock, flags);
if (lkey == 0) {
if (dev->dma_mr && dev->dma_mr == mr) {
spin_lock_irqsave(&dev->lk_table.lock, flags);
if (lkey == 0) {
if (dev->dma_mr && dev->dma_mr == mr) {
- ret = atomic_read(&dev->dma_mr->refcount);
- if (!ret)
- dev->dma_mr = NULL;
- } else
- ret = 0;
+ qib_put_mr(dev->dma_mr);
+ dev->dma_mr = NULL;
+ }
} else {
r = lkey >> (32 - ib_qib_lkey_table_size);
} else {
r = lkey >> (32 - ib_qib_lkey_table_size);
- ret = atomic_read(&dev->lk_table.table[r]->refcount);
- if (!ret)
- dev->lk_table.table[r] = NULL;
+ qib_put_mr(dev->dma_mr);
+ rkt->table[r] = NULL;
spin_unlock_irqrestore(&dev->lk_table.lock, flags);
spin_unlock_irqrestore(&dev->lk_table.lock, flags);
-
- if (ret)
- ret = -EBUSY;
- return ret;
goto bail;
if (!dev->dma_mr)
goto bail;
goto bail;
if (!dev->dma_mr)
goto bail;
- atomic_inc(&dev->dma_mr->refcount);
+ qib_get_mr(dev->dma_mr);
spin_unlock_irqrestore(&rkt->lock, flags);
isge->mr = dev->dma_mr;
spin_unlock_irqrestore(&rkt->lock, flags);
isge->mr = dev->dma_mr;
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
- atomic_inc(&mr->refcount);
spin_unlock_irqrestore(&rkt->lock, flags);
off += mr->offset;
spin_unlock_irqrestore(&rkt->lock, flags);
off += mr->offset;
goto bail;
if (!dev->dma_mr)
goto bail;
goto bail;
if (!dev->dma_mr)
goto bail;
- atomic_inc(&dev->dma_mr->refcount);
+ qib_get_mr(dev->dma_mr);
spin_unlock_irqrestore(&rkt->lock, flags);
sge->mr = dev->dma_mr;
spin_unlock_irqrestore(&rkt->lock, flags);
sge->mr = dev->dma_mr;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
- atomic_inc(&mr->refcount);
spin_unlock_irqrestore(&rkt->lock, flags);
off += mr->offset;
spin_unlock_irqrestore(&rkt->lock, flags);
off += mr->offset;
return container_of(ibfmr, struct qib_fmr, ibfmr);
}
return container_of(ibfmr, struct qib_fmr, ibfmr);
}
+static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
+ int count)
+{
+ int m, i = 0;
+ int rval = 0;
+
+ m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
+ for (; i < m; i++) {
+ mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL);
+ if (!mr->map[i])
+ goto bail;
+ }
+ mr->mapsz = m;
+ init_completion(&mr->comp);
+ /* count returning the ptr to user */
+ atomic_set(&mr->refcount, 1);
+ mr->pd = pd;
+ mr->max_segs = count;
+out:
+ return rval;
+bail:
+ while (i)
+ kfree(mr->map[--i]);
+ rval = -ENOMEM;
+ goto out;
+}
+
+static void deinit_qib_mregion(struct qib_mregion *mr)
+{
+ int i = mr->mapsz;
+
+ mr->mapsz = 0;
+ while (i)
+ kfree(mr->map[--i]);
+}
+
+
/**
* qib_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
/**
* qib_get_dma_mr - get a DMA memory region
* @pd: protection domain for this memory region
*/
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
{
*/
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
{
- struct qib_ibdev *dev = to_idev(pd->device);
- struct qib_mr *mr;
+ struct qib_mr *mr = NULL;
if (to_ipd(pd)->user) {
ret = ERR_PTR(-EPERM);
if (to_ipd(pd)->user) {
ret = ERR_PTR(-EPERM);
- mr->mr.access_flags = acc;
- atomic_set(&mr->mr.refcount, 0);
+ rval = init_qib_mregion(&mr->mr, pd, 0);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail;
+ }
- spin_lock_irqsave(&dev->lk_table.lock, flags);
- if (!dev->dma_mr)
- dev->dma_mr = &mr->mr;
- spin_unlock_irqrestore(&dev->lk_table.lock, flags);
+ rval = qib_alloc_lkey(&mr->mr, 1);
+ if (rval) {
+ ret = ERR_PTR(rval);
+ goto bail_mregion;
+ }
+
+ mr->mr.access_flags = acc;
+bail_mregion:
+ deinit_qib_mregion(&mr->mr);
+ kfree(mr);
+ goto done;
-static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
+static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
+ int rval = -ENOMEM;
+ int m;
/* Allocate struct plus pointers to first level page tables. */
m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
/* Allocate struct plus pointers to first level page tables. */
m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
- mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
+ mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
- goto done;
-
- /* Allocate first level page tables. */
- for (; i < m; i++) {
- mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
- if (!mr->mr.map[i])
- goto bail;
- }
- mr->mr.mapsz = m;
- mr->mr.page_shift = 0;
- mr->mr.max_segs = count;
+ rval = init_qib_mregion(&mr->mr, pd, count);
+ if (rval)
+ goto bail;
/*
* ib_reg_phys_mr() will initialize mr->ibmr except for
* lkey and rkey.
*/
/*
* ib_reg_phys_mr() will initialize mr->ibmr except for
* lkey and rkey.
*/
- if (!qib_alloc_lkey(lk_table, &mr->mr))
- goto bail;
+ rval = qib_alloc_lkey(&mr->mr, 0);
+ if (rval)
+ goto bail_mregion;
mr->ibmr.lkey = mr->mr.lkey;
mr->ibmr.rkey = mr->mr.lkey;
mr->ibmr.lkey = mr->mr.lkey;
mr->ibmr.rkey = mr->mr.lkey;
- atomic_set(&mr->mr.refcount, 0);
- goto done;
-
+bail_mregion:
+ deinit_qib_mregion(&mr->mr);
- while (i)
- kfree(mr->mr.map[--i]);
- mr = NULL;
-
-done:
- return mr;
+ mr = ERR_PTR(rval);
+ goto done;
int n, m, i;
struct ib_mr *ret;
int n, m, i;
struct ib_mr *ret;
- mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
- if (mr == NULL) {
- ret = ERR_PTR(-ENOMEM);
+ mr = alloc_mr(num_phys_buf, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
mr->mr.user_base = *iova_start;
mr->mr.iova = *iova_start;
- mr->mr.length = 0;
- mr->mr.offset = 0;
mr->mr.access_flags = acc;
mr->mr.access_flags = acc;
* @pd: protection domain for this memory region
* @start: starting userspace address
* @length: length of region to register
* @pd: protection domain for this memory region
* @start: starting userspace address
* @length: length of region to register
- * @virt_addr: virtual address to use (from HCA's point of view)
* @mr_access_flags: access flags for this memory region
* @udata: unused by the QLogic_IB driver
*
* @mr_access_flags: access flags for this memory region
* @udata: unused by the QLogic_IB driver
*
list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents;
list_for_each_entry(chunk, &umem->chunk_list, list)
n += chunk->nents;
- mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
- if (!mr) {
- ret = ERR_PTR(-ENOMEM);
+ mr = alloc_mr(n, pd);
+ if (IS_ERR(mr)) {
+ ret = (struct ib_mr *)mr;
ib_umem_release(umem);
goto bail;
}
ib_umem_release(umem);
goto bail;
}
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
mr->mr.user_base = start;
mr->mr.iova = virt_addr;
mr->mr.length = length;
int qib_dereg_mr(struct ib_mr *ibmr)
{
struct qib_mr *mr = to_imr(ibmr);
int qib_dereg_mr(struct ib_mr *ibmr)
{
struct qib_mr *mr = to_imr(ibmr);
- struct qib_ibdev *dev = to_idev(ibmr->device);
- int ret;
- int i;
-
- ret = qib_free_lkey(dev, &mr->mr);
- if (ret)
- return ret;
-
- i = mr->mr.mapsz;
- while (i)
- kfree(mr->mr.map[--i]);
+ int ret = 0;
+ unsigned long timeout;
+
+ qib_free_lkey(&mr->mr);
+
+ qib_put_mr(&mr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&mr->mr.comp,
+ 5 * HZ);
+ if (!timeout) {
+ qib_get_mr(&mr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ deinit_qib_mregion(&mr->mr);
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
if (mr->umem)
ib_umem_release(mr->umem);
kfree(mr);
- mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table);
- if (mr == NULL)
- return ERR_PTR(-ENOMEM);
-
- mr->mr.pd = pd;
- mr->mr.user_base = 0;
- mr->mr.iova = 0;
- mr->mr.length = 0;
- mr->mr.offset = 0;
- mr->mr.access_flags = 0;
- mr->umem = NULL;
+ mr = alloc_mr(max_page_list_len, pd);
+ if (IS_ERR(mr))
+ return (struct ib_mr *)mr;
if (size > PAGE_SIZE)
return ERR_PTR(-EINVAL);
if (size > PAGE_SIZE)
return ERR_PTR(-EINVAL);
- pl = kmalloc(sizeof *pl, GFP_KERNEL);
+ pl = kzalloc(sizeof *pl, GFP_KERNEL);
if (!pl)
return ERR_PTR(-ENOMEM);
if (!pl)
return ERR_PTR(-ENOMEM);
- pl->page_list = kmalloc(size, GFP_KERNEL);
+ pl->page_list = kzalloc(size, GFP_KERNEL);
if (!pl->page_list)
goto err_free;
if (!pl->page_list)
goto err_free;
struct ib_fmr_attr *fmr_attr)
{
struct qib_fmr *fmr;
struct ib_fmr_attr *fmr_attr)
{
struct qib_fmr *fmr;
/* Allocate struct plus pointers to first level page tables. */
m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
/* Allocate struct plus pointers to first level page tables. */
m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
- fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
+ fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
- /* Allocate first level page tables. */
- for (; i < m; i++) {
- fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
- GFP_KERNEL);
- if (!fmr->mr.map[i])
- goto bail;
- }
- fmr->mr.mapsz = m;
+ rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+ if (rval)
+ goto bail;
/*
* ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
* rkey.
*/
/*
* ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
* rkey.
*/
- if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
- goto bail;
+ rval = qib_alloc_lkey(&fmr->mr, 0);
+ if (rval)
+ goto bail_mregion;
fmr->ibfmr.rkey = fmr->mr.lkey;
fmr->ibfmr.lkey = fmr->mr.lkey;
/*
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
fmr->ibfmr.rkey = fmr->mr.lkey;
fmr->ibfmr.lkey = fmr->mr.lkey;
/*
* Resources are allocated but no valid mapping (RKEY can't be
* used).
*/
- fmr->mr.pd = pd;
- fmr->mr.user_base = 0;
- fmr->mr.iova = 0;
- fmr->mr.length = 0;
- fmr->mr.offset = 0;
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
fmr->mr.page_shift = fmr_attr->page_shift;
fmr->mr.access_flags = mr_access_flags;
fmr->mr.max_segs = fmr_attr->max_pages;
fmr->mr.page_shift = fmr_attr->page_shift;
- atomic_set(&fmr->mr.refcount, 0);
+bail_mregion:
+ deinit_qib_mregion(&fmr->mr);
- while (i)
- kfree(fmr->mr.map[--i]);
- ret = ERR_PTR(-ENOMEM);
-
-done:
- return ret;
+ ret = ERR_PTR(rval);
+ goto done;
- if (atomic_read(&fmr->mr.refcount))
+ i = atomic_read(&fmr->mr.refcount);
+ if (i > 2)
return -EBUSY;
if (list_len > fmr->mr.max_segs) {
return -EBUSY;
if (list_len > fmr->mr.max_segs) {
int qib_dealloc_fmr(struct ib_fmr *ibfmr)
{
struct qib_fmr *fmr = to_ifmr(ibfmr);
int qib_dealloc_fmr(struct ib_fmr *ibfmr)
{
struct qib_fmr *fmr = to_ifmr(ibfmr);
- int ret;
- int i;
-
- ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr);
- if (ret)
- return ret;
-
- i = fmr->mr.mapsz;
- while (i)
- kfree(fmr->mr.map[--i]);
+ int ret = 0;
+ unsigned long timeout;
+
+ qib_free_lkey(&fmr->mr);
+ qib_put_mr(&fmr->mr); /* will set completion if last */
+ timeout = wait_for_completion_timeout(&fmr->mr.comp,
+ 5 * HZ);
+ if (!timeout) {
+ qib_get_mr(&fmr->mr);
+ ret = -EBUSY;
+ goto out;
+ }
+ deinit_qib_mregion(&fmr->mr);
unsigned n;
if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
unsigned n;
if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (clr_sends) {
while (qp->s_last != qp->s_head) {
if (clr_sends) {
while (qp->s_last != qp->s_head) {
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->s_last = 0;
}
if (qp->s_rdma_mr) {
qp->s_last = 0;
}
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
}
qp->s_rdma_mr = NULL;
}
}
if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
e->rdma_sge.mr) {
if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
}
e->rdma_sge.mr = NULL;
}
}
if (!(qp->s_flags & QIB_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
if (!(qp->s_flags & QIB_S_BUSY)) {
qp->s_hdrwords = 0;
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
if (qp->s_tx) {
qp->s_rdma_mr = NULL;
}
if (qp->s_tx) {
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->rdma_sge.mr) {
case OP(RDMA_READ_RESPONSE_ONLY):
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
if (e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
/* FALLTHROUGH */
e->rdma_sge.mr = NULL;
}
/* FALLTHROUGH */
/* Copy SGE state in case we need to resend */
qp->s_rdma_mr = e->rdma_sge.mr;
if (qp->s_rdma_mr)
/* Copy SGE state in case we need to resend */
qp->s_rdma_mr = e->rdma_sge.mr;
if (qp->s_rdma_mr)
- atomic_inc(&qp->s_rdma_mr->refcount);
+ qib_get_mr(qp->s_rdma_mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_cur_sge = &qp->s_ack_rdma_sge;
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
qp->s_cur_sge = &qp->s_ack_rdma_sge;
qp->s_cur_sge = &qp->s_ack_rdma_sge;
qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
if (qp->s_rdma_mr)
qp->s_cur_sge = &qp->s_ack_rdma_sge;
qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
if (qp->s_rdma_mr)
- atomic_inc(&qp->s_rdma_mr->refcount);
+ qib_get_mr(qp->s_rdma_mr);
len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu)
len = pmtu;
len = qp->s_ack_rdma_sge.sge.sge_length;
if (len > pmtu)
len = pmtu;
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
}
/* Post a send completion queue entry if requested. */
if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
if (unlikely(offset + len != e->rdma_sge.sge_length))
goto unlock_done;
if (e->rdma_sge.mr) {
if (unlikely(offset + len != e->rdma_sge.sge_length))
goto unlock_done;
if (e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
if (len != 0) {
e->rdma_sge.mr = NULL;
}
if (len != 0) {
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
qp->r_msn++;
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
break;
qp->r_msn++;
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
break;
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
reth = &ohdr->u.rc.reth;
e->rdma_sge.mr = NULL;
}
reth = &ohdr->u.rc.reth;
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
}
e = &qp->s_ack_queue[qp->r_head_ack_queue];
if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
- atomic_dec(&e->rdma_sge.mr->refcount);
+ qib_put_mr(e->rdma_sge.mr);
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
be64_to_cpu(ateth->compare_data),
sdata);
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
be64_to_cpu(ateth->compare_data),
sdata);
- atomic_dec(&qp->r_sge.sge.mr->refcount);
+ qib_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
e->opcode = opcode;
e->sent = 0;
qp->r_sge.num_sge = 0;
e->opcode = opcode;
e->sent = 0;
while (j) {
struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
while (j) {
struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
- atomic_dec(&sge->mr->refcount);
}
ss->num_sge = 0;
memset(&wc, 0, sizeof(wc));
}
ss->num_sge = 0;
memset(&wc, 0, sizeof(wc));
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
sdata, wqe->wr.wr.atomic.swap);
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
sdata, wqe->wr.wr.atomic.swap);
- atomic_dec(&qp->r_sge.sge.mr->refcount);
+ qib_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
goto send_comp;
qp->r_sge.num_sge = 0;
goto send_comp;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (!release)
- atomic_dec(&sge->mr->refcount);
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (--sqp->s_sge.num_sge)
*sge = *sqp->s_sge.sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
sqp->s_len -= len;
}
if (release)
sqp->s_len -= len;
}
if (release)
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
goto send_comp;
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
for (i = 0; i < wqe->wr.num_sge; i++) {
struct qib_sge *sge = &wqe->sg_list[i];
- atomic_dec(&sge->mr->refcount);
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
}
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
} else
set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
qp->r_sge.num_sge = 0;
} else
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
qp->r_state = OP(SEND_LAST);
switch (opcode) {
case OP(SEND_FIRST):
goto rewind;
wc.opcode = IB_WC_RECV;
qib_copy_sge(&qp->r_sge, data, tlen, 0);
goto rewind;
wc.opcode = IB_WC_RECV;
qib_copy_sge(&qp->r_sge, data, tlen, 0);
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
last_imm:
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
- while (qp->s_rdma_read_sge.num_sge) {
- atomic_dec(&qp->s_rdma_read_sge.sge.mr->
- refcount);
- if (--qp->s_rdma_read_sge.num_sge)
- qp->s_rdma_read_sge.sge =
- *qp->s_rdma_read_sge.sg_list++;
- }
+ qib_put_ss(&qp->s_rdma_read_sge);
else {
ret = qib_get_rwqe(qp, 1);
if (ret < 0)
else {
ret = qib_get_rwqe(qp, 1);
if (ret < 0)
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
goto last_imm;
case OP(RDMA_WRITE_LAST):
goto last_imm;
case OP(RDMA_WRITE_LAST):
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
qib_copy_sge(&qp->r_sge, data, tlen, 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
goto bail_unlock;
wc.wr_id = qp->r_wr_id;
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
goto bail_unlock;
wc.wr_id = qp->r_wr_id;
} else
qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
} else
qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
- while (qp->r_sge.num_sge) {
- atomic_dec(&qp->r_sge.sge.mr->refcount);
- if (--qp->r_sge.num_sge)
- qp->r_sge.sge = *qp->r_sge.sg_list++;
- }
+ qib_put_ss(&qp->r_sge);
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
return;
wc.wr_id = qp->r_wr_id;
if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
return;
wc.wr_id = qp->r_wr_id;
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- atomic_dec(&sge->mr->refcount);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
sge->sge_length -= len;
if (sge->sge_length == 0) {
if (release)
- atomic_dec(&sge->mr->refcount);
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
if (--ss->num_sge)
*sge = *ss->sg_list++;
} else if (sge->length == 0 && sge->mr->lkey) {
while (j) {
struct qib_sge *sge = &wqe->sg_list[--j];
while (j) {
struct qib_sge *sge = &wqe->sg_list[--j];
- atomic_dec(&sge->mr->refcount);
}
bail_inval:
ret = -EINVAL;
}
bail_inval:
ret = -EINVAL;
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
if (tx->mr) {
if (atomic_dec_and_test(&qp->refcount))
wake_up(&qp->wait);
if (tx->mr) {
- atomic_dec(&tx->mr->refcount);
tx->mr = NULL;
}
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
tx->mr = NULL;
}
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
}
qib_sendbuf_done(dd, pbufn);
if (qp->s_rdma_mr) {
}
qib_sendbuf_done(dd, pbufn);
if (qp->s_rdma_mr) {
- atomic_dec(&qp->s_rdma_mr->refcount);
+ qib_put_mr(qp->s_rdma_mr);
qp->s_rdma_mr = NULL;
}
if (qp->s_wqe) {
qp->s_rdma_mr = NULL;
}
if (qp->s_wqe) {
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <linux/kref.h>
#include <linux/workqueue.h>
+#include <linux/completion.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
u32 max_segs; /* number of qib_segs in all the arrays */
u32 mapsz; /* size of the map array */
u8 page_shift; /* 0 - non unform/non powerof2 sizes */
u32 max_segs; /* number of qib_segs in all the arrays */
u32 mapsz; /* size of the map array */
u8 page_shift; /* 0 - non unform/non powerof2 sizes */
+ u8 lkey_published; /* in global table */
+ struct completion comp; /* complete when refcount goes to zero */
atomic_t refcount;
struct qib_segarray *map[0]; /* the segments */
};
atomic_t refcount;
struct qib_segarray *map[0]; /* the segments */
};
void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct qib_qp *qp);
void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
int has_grh, void *data, u32 tlen, struct qib_qp *qp);
-int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr);
+int qib_alloc_lkey(struct qib_mregion *mr, int dma_region);
-int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr);
+void qib_free_lkey(struct qib_mregion *mr);
int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
struct qib_sge *isge, struct ib_sge *sge, int acc);
int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
struct qib_sge *isge, struct ib_sge *sge, int acc);
int qib_dealloc_fmr(struct ib_fmr *ibfmr);
int qib_dealloc_fmr(struct ib_fmr *ibfmr);
+static inline void qib_get_mr(struct qib_mregion *mr)
+{
+ atomic_inc(&mr->refcount);
+}
+
+static inline void qib_put_mr(struct qib_mregion *mr)
+{
+ if (unlikely(atomic_dec_and_test(&mr->refcount)))
+ complete(&mr->comp);
+}
+
+static inline void qib_put_ss(struct qib_sge_state *ss)
+{
+ while (ss->num_sge) {
+ qib_put_mr(ss->sge.mr);
+ if (--ss->num_sge)
+ ss->sge = *ss->sg_list++;
+ }
+}
+
+
void qib_release_mmap_info(struct kref *ref);
struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
void qib_release_mmap_info(struct kref *ref);
struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,