IB/qib: Optimize pio ack buffer allocation
authorMike Marciniszyn <mike.marciniszyn@intel.com>
Mon, 7 May 2012 18:02:42 +0000 (14:02 -0400)
committerRoland Dreier <roland@purestorage.com>
Mon, 14 May 2012 19:37:03 +0000 (12:37 -0700)
This patch optimizes pio buffer allocation in the kernel.

For qib, kernel pio buffers are used for sending acks.  The code to
allocate the buffer would always start at 0 until it found a buffer.

This means that an average of 64 comparisions were done on each
allocate, since the busy bit won't be cleared until the bits are
refreshed when buffers are exhausted.

This patch adds two new fields in the devdata struct, last_pio and
min_kernel_pio.  last_pio is the last buffer that was allocated.
min_kernel_pio is the lowest potential available buffer.

min_kernel_pio is modifed as contexts are allocated and deallocted.

Reviewed-by: Ramkrishna Vepa <ramkrishna.vepa@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_iba6120.c
drivers/infiniband/hw/qib/qib_iba7220.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_tx.c

index 6b811e3e8bd165c2d4af4f47a58d52496e682f0c..2d638877c4af617b97bc4325a309bf60340fae18 100644 (file)
@@ -873,7 +873,14 @@ struct qib_devdata {
         * pio_writing.
         */
        spinlock_t pioavail_lock;
-
+       /*
+        * index of last buffer to optimize search for next
+        */
+       u32 last_pio;
+       /*
+        * min kernel pio buffer to optimize search
+        */
+       u32 min_kernel_pio;
        /*
         * Shadow copies of registers; size indicates read access size.
         * Most of them are readonly, but some are write-only register,
index d0c64d514813909c7a863a0f29939e64123b7cba..4d352b90750a4595f7c73195cd7382fbbf872ea2 100644 (file)
@@ -3132,6 +3132,7 @@ static void get_6120_chip_params(struct qib_devdata *dd)
        val = qib_read_kreg64(dd, kr_sendpiobufcnt);
        dd->piobcnt2k = val & ~0U;
        dd->piobcnt4k = val >> 32;
+       dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1;
        /* these may be adjusted in init_chip_wc_pat() */
        dd->pio2kbase = (u32 __iomem *)
                (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase);
index 3c722f79d6f640cf4930a011bd5f3d8ccc2084cd..86a0ba7ca0c24f332e83f36ce559498863e47a3b 100644 (file)
@@ -4157,6 +4157,7 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
                dd->cspec->sdmabufcnt;
        dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
        dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
+       dd->last_pio = dd->cspec->lastbuf_for_pio;
        dd->pbufsctxt = dd->lastctxt_piobuf /
                (dd->cfgctxts - dd->first_user_ctxt);
 
index 060b96064469709ef7e4f92770b07fdfc235d7d0..e7b9ad34fe2e7db6976408174719c8d310ceeca1 100644 (file)
@@ -6379,6 +6379,7 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
                dd->cspec->sdmabufcnt;
        dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
        dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
+       dd->last_pio = dd->cspec->lastbuf_for_pio;
        dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ?
                dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0;
 
index 1bf626c401728e0925b1ac75cd931f1785dedc5f..31d3561400a49f6056eb6be5c082b05cc0edb0e4 100644 (file)
@@ -295,6 +295,7 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum,
 
        nbufs = last - first + 1; /* number in range to check */
        if (dd->upd_pio_shadow) {
+update_shadow:
                /*
                 * Minor optimization.  If we had no buffers on last call,
                 * start out by doing the update; continue and do scan even
@@ -304,37 +305,39 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum,
                updated++;
        }
        i = first;
-rescan:
        /*
         * While test_and_set_bit() is atomic, we do that and then the
         * change_bit(), and the pair is not.  See if this is the cause
         * of the remaining armlaunch errors.
         */
        spin_lock_irqsave(&dd->pioavail_lock, flags);
+       if (dd->last_pio >= first && dd->last_pio <= last)
+               i = dd->last_pio + 1;
+       if (!first)
+               /* adjust to min possible  */
+               nbufs = last - dd->min_kernel_pio + 1;
        for (j = 0; j < nbufs; j++, i++) {
                if (i > last)
-                       i = first;
+                       i = !first ? dd->min_kernel_pio : first;
                if (__test_and_set_bit((2 * i) + 1, shadow))
                        continue;
                /* flip generation bit */
                __change_bit(2 * i, shadow);
                /* remember that the buffer can be written to now */
                __set_bit(i, dd->pio_writing);
+               if (!first && first != last) /* first == last on VL15, avoid */
+                       dd->last_pio = i;
                break;
        }
        spin_unlock_irqrestore(&dd->pioavail_lock, flags);
 
        if (j == nbufs) {
-               if (!updated) {
+               if (!updated)
                        /*
                         * First time through; shadow exhausted, but may be
                         * buffers available, try an update and then rescan.
                         */
-                       update_send_bufs(dd);
-                       updated++;
-                       i = first;
-                       goto rescan;
-               }
+                       goto update_shadow;
                no_send_bufs(dd);
                buf = NULL;
        } else {
@@ -422,14 +425,20 @@ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start,
                                __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT
                                            + start, dd->pioavailshadow);
                        __set_bit(start, dd->pioavailkernel);
+                       if ((start >> 1) < dd->min_kernel_pio)
+                               dd->min_kernel_pio = start >> 1;
                } else {
                        __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT,
                                  dd->pioavailshadow);
                        __clear_bit(start, dd->pioavailkernel);
+                       if ((start >> 1) > dd->min_kernel_pio)
+                               dd->min_kernel_pio = start >> 1;
                }
                start += 2;
        }
 
+       if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1)
+               dd->last_pio = dd->min_kernel_pio - 1;
        spin_unlock_irqrestore(&dd->pioavail_lock, flags);
 
        dd->f_txchk_change(dd, ostart, len, avail, rcd);