IB/qib: Add congestion control agent implementation
authorMike Marciniszyn <mike.marciniszyn@intel.com>
Thu, 19 Jul 2012 13:04:04 +0000 (13:04 +0000)
committerRoland Dreier <roland@purestorage.com>
Thu, 19 Jul 2012 18:20:04 +0000 (11:20 -0700)
Add a congestion control agent in the driver that handles gets and
sets from the congestion control manager in the fabric for the
Performance Scale Messaging (PSM) library.

Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_mad.c
drivers/infiniband/hw/qib/qib_mad.h
drivers/infiniband/hw/qib/qib_sysfs.c

index cbe57715145795abc6c98bbf6b174cdaeaee392c..6e19ec844d9982122014df601ed64a975056502e 100644 (file)
@@ -519,6 +519,7 @@ struct qib_pportdata {
        struct qib_devdata *dd;
        struct qib_chippport_specific *cpspec; /* chip-specific per-port */
        struct kobject pport_kobj;
+       struct kobject pport_cc_kobj;
        struct kobject sl2vl_kobj;
        struct kobject diagc_kobj;
 
@@ -638,6 +639,39 @@ struct qib_pportdata {
        struct timer_list led_override_timer;
        struct xmit_wait cong_stats;
        struct timer_list symerr_clear_timer;
+
+       /* Synchronize access between driver writes and sysfs reads */
+       spinlock_t cc_shadow_lock
+               ____cacheline_aligned_in_smp;
+
+       /* Shadow copy of the congestion control table */
+       struct cc_table_shadow *ccti_entries_shadow;
+
+       /* Shadow copy of the congestion control entries */
+       struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
+
+       /* List of congestion control table entries */
+       struct ib_cc_table_entry_shadow *ccti_entries;
+
+       /* 16 congestion entries with each entry corresponding to a SL */
+       struct ib_cc_congestion_entry_shadow *congestion_entries;
+
+       /* Total number of congestion control table entries */
+       u16 total_cct_entry;
+
+       /* Bit map identifying service level */
+       u16 cc_sl_control_map;
+
+       /* maximum congestion control table index */
+       u16 ccti_limit;
+
+       /* CA's max number of 64 entry units in the congestion control table */
+       u8 cc_max_table_entries;
+
+       /* Maximum number of congestion control entries that the agent expects
+        * the manager to send.
+        */
+       u8 cc_supported_table_entries;
 };
 
 /* Observers. Not to be taken lightly, possibly not to ship. */
@@ -1078,6 +1112,7 @@ extern u32 qib_cpulist_count;
 extern unsigned long *qib_cpulist;
 
 extern unsigned qib_wc_pat;
+extern unsigned qib_cc_table_size;
 int qib_init(struct qib_devdata *, int);
 int init_chip_wc_pat(struct qib_devdata *dd, u32);
 int qib_enable_wc(struct qib_devdata *dd);
index 306e65e99e9999087423f1caba84a56a1c4ab654..24ad901c95c42cc2d55bd3a98710394525443e48 100644 (file)
@@ -41,6 +41,7 @@
 
 #include "qib.h"
 #include "qib_common.h"
+#include "qib_mad.h"
 
 /*
  * min buffers we want to have per context, after driver
@@ -71,6 +72,9 @@ unsigned qib_n_krcv_queues;
 module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
 MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
 
+unsigned qib_cc_table_size;
+module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
 /*
  * qib_wc_pat parameter:
  *      0 is WC via MTRR
@@ -199,6 +203,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
 void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
                        u8 hw_pidx, u8 port)
 {
+       int size;
        ppd->dd = dd;
        ppd->hw_pidx = hw_pidx;
        ppd->port = port; /* IB port number, not index */
@@ -212,6 +217,81 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
        ppd->symerr_clear_timer.data = (unsigned long)ppd;
 
        ppd->qib_wq = NULL;
+
+       spin_lock_init(&ppd->cc_shadow_lock);
+
+       if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
+               goto bail;
+
+       ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size,
+               IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT);
+
+       ppd->cc_max_table_entries =
+               ppd->cc_supported_table_entries/IB_CCT_ENTRIES;
+
+       size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
+               * IB_CCT_ENTRIES;
+       ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
+       if (!ppd->ccti_entries) {
+               qib_dev_err(dd,
+                 "failed to allocate congestion control table for port %d!\n",
+                 port);
+               goto bail;
+       }
+
+       size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
+       ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
+       if (!ppd->congestion_entries) {
+               qib_dev_err(dd,
+                "failed to allocate congestion setting list for port %d!\n",
+                port);
+               goto bail_1;
+       }
+
+       size = sizeof(struct cc_table_shadow);
+       ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
+       if (!ppd->ccti_entries_shadow) {
+               qib_dev_err(dd,
+                "failed to allocate shadow ccti list for port %d!\n",
+                port);
+               goto bail_2;
+       }
+
+       size = sizeof(struct ib_cc_congestion_setting_attr);
+       ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
+       if (!ppd->congestion_entries_shadow) {
+               qib_dev_err(dd,
+                "failed to allocate shadow congestion setting list for port %d!\n",
+                port);
+               goto bail_3;
+       }
+
+       return;
+
+bail_3:
+       kfree(ppd->ccti_entries_shadow);
+       ppd->ccti_entries_shadow = NULL;
+bail_2:
+       kfree(ppd->congestion_entries);
+       ppd->congestion_entries = NULL;
+bail_1:
+       kfree(ppd->ccti_entries);
+       ppd->ccti_entries = NULL;
+bail:
+       /* User is intentionally disabling the congestion control agent */
+       if (!qib_cc_table_size)
+               return;
+
+       if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
+               qib_cc_table_size = 0;
+               qib_dev_err(dd,
+                "Congestion Control table size %d less than minimum %d for port %d\n",
+                qib_cc_table_size, IB_CCT_MIN_ENTRIES, port);
+       }
+
+       qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
+               port);
+       return;
 }
 
 static int init_pioavailregs(struct qib_devdata *dd)
@@ -1164,10 +1244,24 @@ static void cleanup_device_data(struct qib_devdata *dd)
        unsigned long flags;
 
        /* users can't do anything more with chip */
-       for (pidx = 0; pidx < dd->num_pports; ++pidx)
+       for (pidx = 0; pidx < dd->num_pports; ++pidx) {
                if (dd->pport[pidx].statusp)
                        *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT;
 
+               spin_lock(&dd->pport[pidx].cc_shadow_lock);
+
+               kfree(dd->pport[pidx].congestion_entries);
+               dd->pport[pidx].congestion_entries = NULL;
+               kfree(dd->pport[pidx].ccti_entries);
+               dd->pport[pidx].ccti_entries = NULL;
+               kfree(dd->pport[pidx].ccti_entries_shadow);
+               dd->pport[pidx].ccti_entries_shadow = NULL;
+               kfree(dd->pport[pidx].congestion_entries_shadow);
+               dd->pport[pidx].congestion_entries_shadow = NULL;
+
+               spin_unlock(&dd->pport[pidx].cc_shadow_lock);
+       }
+
        if (!qib_wc_pat)
                qib_disable_wc(dd);
 
index 6e20b58b90b6e4d5ace6d3ee4eee97a882866598..19f1e6c45fb6847caaac2a45af4e69d4df981ca4 100644 (file)
@@ -49,6 +49,18 @@ static int reply(struct ib_smp *smp)
        return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 
+static int reply_failure(struct ib_smp *smp)
+{
+       /*
+        * The verbs framework will handle the directed/LID route
+        * packet changes.
+        */
+       smp->method = IB_MGMT_METHOD_GET_RESP;
+       if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+               smp->status |= IB_SMP_DIRECTION;
+       return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY;
+}
+
 static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
 {
        struct ib_mad_send_buf *send_buf;
@@ -2047,6 +2059,298 @@ bail:
        return ret;
 }
 
+static int cc_get_classportinfo(struct ib_cc_mad *ccp,
+                               struct ib_device *ibdev)
+{
+       struct ib_cc_classportinfo_attr *p =
+               (struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
+
+       memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+       p->base_version = 1;
+       p->class_version = 1;
+       p->cap_mask = 0;
+
+       /*
+        * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
+        */
+       p->resp_time_value = 18;
+
+       return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_info(struct ib_cc_mad *ccp,
+                               struct ib_device *ibdev, u8 port)
+{
+       struct ib_cc_info_attr *p =
+               (struct ib_cc_info_attr *)ccp->mgmt_data;
+       struct qib_ibport *ibp = to_iport(ibdev, port);
+       struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+       memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+       p->congestion_info = 0;
+       p->control_table_cap = ppd->cc_max_table_entries;
+
+       return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
+                               struct ib_device *ibdev, u8 port)
+{
+       int i;
+       struct ib_cc_congestion_setting_attr *p =
+               (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+       struct qib_ibport *ibp = to_iport(ibdev, port);
+       struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+       struct ib_cc_congestion_entry_shadow *entries;
+
+       memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+       spin_lock(&ppd->cc_shadow_lock);
+
+       entries = ppd->congestion_entries_shadow->entries;
+       p->port_control = cpu_to_be16(
+               ppd->congestion_entries_shadow->port_control);
+       p->control_map = cpu_to_be16(
+               ppd->congestion_entries_shadow->control_map);
+       for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+               p->entries[i].ccti_increase = entries[i].ccti_increase;
+               p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
+               p->entries[i].trigger_threshold = entries[i].trigger_threshold;
+               p->entries[i].ccti_min = entries[i].ccti_min;
+       }
+
+       spin_unlock(&ppd->cc_shadow_lock);
+
+       return reply((struct ib_smp *) ccp);
+}
+
+static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
+                               struct ib_device *ibdev, u8 port)
+{
+       struct ib_cc_table_attr *p =
+               (struct ib_cc_table_attr *)ccp->mgmt_data;
+       struct qib_ibport *ibp = to_iport(ibdev, port);
+       struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+       u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+       u32 max_cct_block;
+       u32 cct_entry;
+       struct ib_cc_table_entry_shadow *entries;
+       int i;
+
+       /* Is the table index more than what is supported? */
+       if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+               goto bail;
+
+       memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
+
+       spin_lock(&ppd->cc_shadow_lock);
+
+       max_cct_block =
+               (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES;
+       max_cct_block = max_cct_block ? max_cct_block - 1 : 0;
+
+       if (cct_block_index > max_cct_block) {
+               spin_unlock(&ppd->cc_shadow_lock);
+               goto bail;
+       }
+
+       ccp->attr_mod = cpu_to_be32(cct_block_index);
+
+       cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1);
+
+       cct_entry--;
+
+       p->ccti_limit = cpu_to_be16(cct_entry);
+
+       entries = &ppd->ccti_entries_shadow->
+                       entries[IB_CCT_ENTRIES * cct_block_index];
+       cct_entry %= IB_CCT_ENTRIES;
+
+       for (i = 0; i <= cct_entry; i++)
+               p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry);
+
+       spin_unlock(&ppd->cc_shadow_lock);
+
+       return reply((struct ib_smp *) ccp);
+
+bail:
+       return reply_failure((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_setting(struct ib_cc_mad *ccp,
+                               struct ib_device *ibdev, u8 port)
+{
+       struct ib_cc_congestion_setting_attr *p =
+               (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
+       struct qib_ibport *ibp = to_iport(ibdev, port);
+       struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+       int i;
+
+       ppd->cc_sl_control_map = be16_to_cpu(p->control_map);
+
+       for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
+               ppd->congestion_entries[i].ccti_increase =
+                       p->entries[i].ccti_increase;
+
+               ppd->congestion_entries[i].ccti_timer =
+                       be16_to_cpu(p->entries[i].ccti_timer);
+
+               ppd->congestion_entries[i].trigger_threshold =
+                       p->entries[i].trigger_threshold;
+
+               ppd->congestion_entries[i].ccti_min =
+                       p->entries[i].ccti_min;
+       }
+
+       return reply((struct ib_smp *) ccp);
+}
+
+static int cc_set_congestion_control_table(struct ib_cc_mad *ccp,
+                               struct ib_device *ibdev, u8 port)
+{
+       struct ib_cc_table_attr *p =
+               (struct ib_cc_table_attr *)ccp->mgmt_data;
+       struct qib_ibport *ibp = to_iport(ibdev, port);
+       struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+       u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
+       u32 cct_entry;
+       struct ib_cc_table_entry_shadow *entries;
+       int i;
+
+       /* Is the table index more than what is supported? */
+       if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
+               goto bail;
+
+       /* If this packet is the first in the sequence then
+        * zero the total table entry count.
+        */
+       if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES)
+               ppd->total_cct_entry = 0;
+
+       cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES;
+
+       /* ccti_limit is 0 to 63 */
+       ppd->total_cct_entry += (cct_entry + 1);
+
+       if (ppd->total_cct_entry > ppd->cc_supported_table_entries)
+               goto bail;
+
+       ppd->ccti_limit = be16_to_cpu(p->ccti_limit);
+
+       entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index);
+
+       for (i = 0; i <= cct_entry; i++)
+               entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry);
+
+       spin_lock(&ppd->cc_shadow_lock);
+
+       ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1;
+       memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries,
+               (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)));
+
+       ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED;
+       ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map;
+       memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries,
+               IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry));
+
+       spin_unlock(&ppd->cc_shadow_lock);
+
+       return reply((struct ib_smp *) ccp);
+
+bail:
+       return reply_failure((struct ib_smp *) ccp);
+}
+
+static int check_cc_key(struct qib_ibport *ibp,
+                       struct ib_cc_mad *ccp, int mad_flags)
+{
+       return 0;
+}
+
+static int process_cc(struct ib_device *ibdev, int mad_flags,
+                       u8 port, struct ib_mad *in_mad,
+                       struct ib_mad *out_mad)
+{
+       struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
+       struct qib_ibport *ibp = to_iport(ibdev, port);
+       int ret;
+
+       *out_mad = *in_mad;
+
+       if (ccp->class_version != 2) {
+               ccp->status |= IB_SMP_UNSUP_VERSION;
+               ret = reply((struct ib_smp *)ccp);
+               goto bail;
+       }
+
+       ret = check_cc_key(ibp, ccp, mad_flags);
+       if (ret)
+               goto bail;
+
+       switch (ccp->method) {
+       case IB_MGMT_METHOD_GET:
+               switch (ccp->attr_id) {
+               case IB_CC_ATTR_CLASSPORTINFO:
+                       ret = cc_get_classportinfo(ccp, ibdev);
+                       goto bail;
+
+               case IB_CC_ATTR_CONGESTION_INFO:
+                       ret = cc_get_congestion_info(ccp, ibdev, port);
+                       goto bail;
+
+               case IB_CC_ATTR_CA_CONGESTION_SETTING:
+                       ret = cc_get_congestion_setting(ccp, ibdev, port);
+                       goto bail;
+
+               case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+                       ret = cc_get_congestion_control_table(ccp, ibdev, port);
+                       goto bail;
+
+                       /* FALLTHROUGH */
+               default:
+                       ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+                       ret = reply((struct ib_smp *) ccp);
+                       goto bail;
+               }
+
+       case IB_MGMT_METHOD_SET:
+               switch (ccp->attr_id) {
+               case IB_CC_ATTR_CA_CONGESTION_SETTING:
+                       ret = cc_set_congestion_setting(ccp, ibdev, port);
+                       goto bail;
+
+               case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
+                       ret = cc_set_congestion_control_table(ccp, ibdev, port);
+                       goto bail;
+
+                       /* FALLTHROUGH */
+               default:
+                       ccp->status |= IB_SMP_UNSUP_METH_ATTR;
+                       ret = reply((struct ib_smp *) ccp);
+                       goto bail;
+               }
+
+       case IB_MGMT_METHOD_GET_RESP:
+               /*
+                * The ib_mad module will call us to process responses
+                * before checking for other consumers.
+                * Just tell the caller to process it normally.
+                */
+               ret = IB_MAD_RESULT_SUCCESS;
+               goto bail;
+
+       case IB_MGMT_METHOD_TRAP:
+       default:
+               ccp->status |= IB_SMP_UNSUP_METHOD;
+               ret = reply((struct ib_smp *) ccp);
+       }
+
+bail:
+       return ret;
+}
+
 /**
  * qib_process_mad - process an incoming MAD packet
  * @ibdev: the infiniband device this packet came in on
@@ -2071,6 +2375,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
                    struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
        int ret;
+       struct qib_ibport *ibp = to_iport(ibdev, port);
+       struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
        switch (in_mad->mad_hdr.mgmt_class) {
        case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
@@ -2082,6 +2388,15 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
                ret = process_perf(ibdev, port, in_mad, out_mad);
                goto bail;
 
+       case IB_MGMT_CLASS_CONG_MGMT:
+               if (!ppd->congestion_entries_shadow ||
+                        !qib_cc_table_size) {
+                       ret = IB_MAD_RESULT_SUCCESS;
+                       goto bail;
+               }
+               ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
+               goto bail;
+
        default:
                ret = IB_MAD_RESULT_SUCCESS;
        }
index ecc416cdbaaa52f3acf1d6f5b1f7a07a67bf7f93..57bd3fa016bc3dad3df93a144bbd9a4d3e7f8eec 100644 (file)
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
- * All rights reserved.
+ * Copyright (c) 2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -31,6 +31,8 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#ifndef _QIB_MAD_H
+#define _QIB_MAD_H
 
 #include <rdma/ib_pma.h>
 
@@ -222,6 +224,198 @@ struct ib_pma_portcounters_cong {
 #define IB_PMA_SEL_CONG_XMIT                    0x04
 #define IB_PMA_SEL_CONG_ROUTING                 0x08
 
+/*
+ * Congestion control class attributes
+ */
+#define IB_CC_ATTR_CLASSPORTINFO                       cpu_to_be16(0x0001)
+#define IB_CC_ATTR_NOTICE                              cpu_to_be16(0x0002)
+#define IB_CC_ATTR_CONGESTION_INFO                     cpu_to_be16(0x0011)
+#define IB_CC_ATTR_CONGESTION_KEY_INFO                 cpu_to_be16(0x0012)
+#define IB_CC_ATTR_CONGESTION_LOG                      cpu_to_be16(0x0013)
+#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING           cpu_to_be16(0x0014)
+#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING      cpu_to_be16(0x0015)
+#define IB_CC_ATTR_CA_CONGESTION_SETTING               cpu_to_be16(0x0016)
+#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE            cpu_to_be16(0x0017)
+#define IB_CC_ATTR_TIME_STAMP                          cpu_to_be16(0x0018)
+
+/* generalizations for threshold values */
+#define IB_CC_THRESHOLD_NONE 0x0
+#define IB_CC_THRESHOLD_MIN  0x1
+#define IB_CC_THRESHOLD_MAX  0xf
+
+/* CCA MAD header constants */
+#define IB_CC_MAD_LOGDATA_LEN 32
+#define IB_CC_MAD_MGMTDATA_LEN 192
+
+struct ib_cc_mad {
+       u8      base_version;
+       u8      mgmt_class;
+       u8      class_version;
+       u8      method;
+       __be16  status;
+       __be16  class_specific;
+       __be64  tid;
+       __be16  attr_id;
+       __be16  resv;
+       __be32  attr_mod;
+       __be64 cckey;
+
+       /* For CongestionLog attribute only */
+       u8 log_data[IB_CC_MAD_LOGDATA_LEN];
+
+       u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN];
+} __packed;
+
+/*
+ * Congestion Control class portinfo capability mask bits
+ */
+#define IB_CC_CPI_CM_TRAP_GEN          cpu_to_be16(1 << 0)
+#define IB_CC_CPI_CM_GET_SET_NOTICE    cpu_to_be16(1 << 1)
+#define IB_CC_CPI_CM_CAP2              cpu_to_be16(1 << 2)
+#define IB_CC_CPI_CM_ENHANCEDPORT0_CC  cpu_to_be16(1 << 8)
+
+struct ib_cc_classportinfo_attr {
+       u8 base_version;
+       u8 class_version;
+       __be16 cap_mask;
+       u8 reserved[3];
+       u8 resp_time_value;     /* only lower 5 bits */
+       union ib_gid redirect_gid;
+       __be32 redirect_tc_sl_fl;       /* 8, 4, 20 bits respectively */
+       __be16 redirect_lid;
+       __be16 redirect_pkey;
+       __be32 redirect_qp;     /* only lower 24 bits */
+       __be32 redirect_qkey;
+       union ib_gid trap_gid;
+       __be32 trap_tc_sl_fl;   /* 8, 4, 20 bits respectively */
+       __be16 trap_lid;
+       __be16 trap_pkey;
+       __be32 trap_hl_qp;      /* 8, 24 bits respectively */
+       __be32 trap_qkey;
+} __packed;
+
+/* Congestion control traps */
+#define IB_CC_TRAP_KEY_VIOLATION 0x0000
+
+struct ib_cc_trap_key_violation_attr {
+       __be16 source_lid;
+       u8 method;
+       u8 reserved1;
+       __be16 attrib_id;
+       __be32 attrib_mod;
+       __be32 qp;
+       __be64 cckey;
+       u8 sgid[16];
+       u8 padding[24];
+} __packed;
+
+/* Congestion info flags */
+#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1
+#define IB_CC_TABLE_CAP_DEFAULT 31
+
+struct ib_cc_info_attr {
+       __be16 congestion_info;
+       u8  control_table_cap; /* Multiple of 64 entry unit CCTs */
+} __packed;
+
+struct ib_cc_key_info_attr {
+       __be64 cckey;
+       u8  protect;
+       __be16 lease_period;
+       __be16 violations;
+} __packed;
+
+#define IB_CC_CL_CA_LOGEVENTS_LEN 208
+
+struct ib_cc_log_attr {
+       u8 log_type;
+       u8 congestion_flags;
+       __be16 threshold_event_counter;
+       __be16 threshold_congestion_event_map;
+       __be16 current_time_stamp;
+       u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN];
+} __packed;
+
+#define IB_CC_CLEC_SERVICETYPE_RC 0x0
+#define IB_CC_CLEC_SERVICETYPE_UC 0x1
+#define IB_CC_CLEC_SERVICETYPE_RD 0x2
+#define IB_CC_CLEC_SERVICETYPE_UD 0x3
+
+struct ib_cc_log_event {
+       u8 local_qp_cn_entry;
+       u8 remote_qp_number_cn_entry[3];
+       u8  sl_cn_entry:4;
+       u8  service_type_cn_entry:4;
+       __be32 remote_lid_cn_entry;
+       __be32 timestamp_cn_entry;
+} __packed;
+
+/* Sixteen congestion entries */
+#define IB_CC_CCS_ENTRIES 16
+
+/* Port control flags */
+#define IB_CC_CCS_PC_SL_BASED 0x01
+
+struct ib_cc_congestion_entry {
+       u8 ccti_increase;
+       __be16 ccti_timer;
+       u8 trigger_threshold;
+       u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_entry_shadow {
+       u8 ccti_increase;
+       u16 ccti_timer;
+       u8 trigger_threshold;
+       u8 ccti_min; /* min CCTI for cc table */
+} __packed;
+
+struct ib_cc_congestion_setting_attr {
+       __be16 port_control;
+       __be16 control_map;
+       struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+struct ib_cc_congestion_setting_attr_shadow {
+       u16 port_control;
+       u16 control_map;
+       struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES];
+} __packed;
+
+#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
+#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
+
+/* 64 Congestion Control table entries in a single MAD */
+#define IB_CCT_ENTRIES 64
+#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
+
+struct ib_cc_table_entry {
+       __be16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_entry_shadow {
+       u16 entry; /* shift:2, multiplier:14 */
+};
+
+struct ib_cc_table_attr {
+       __be16 ccti_limit; /* max CCTI for cc table */
+       struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+struct ib_cc_table_attr_shadow {
+       u16 ccti_limit; /* max CCTI for cc table */
+       struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
+} __packed;
+
+#define CC_TABLE_SHADOW_MAX \
+       (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
+
+struct cc_table_shadow {
+       u16 ccti_last_entry;
+       struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
+} __packed;
+
+#endif                         /* _QIB_MAD_H */
 /*
  * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
  * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
index dd9cd49d097927bf8816c5aedc836c3f31e4b5ea..ae78305b59d0c4954ab1fd2521a8e925395422f1 100644 (file)
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2012 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
  * Copyright (c) 2006 PathScale, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -33,6 +34,7 @@
 #include <linux/ctype.h>
 
 #include "qib.h"
+#include "qib_mad.h"
 
 /**
  * qib_parse_ushort - parse an unsigned short value in an arbitrary base
@@ -231,6 +233,98 @@ static struct attribute *port_default_attributes[] = {
        NULL
 };
 
+/*
+ * Start of per-port congestion control structures and support code
+ */
+
+/*
+ * Congestion control table size followed by table entries
+ */
+static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
+               struct bin_attribute *bin_attr,
+               char *buf, loff_t pos, size_t count)
+{
+       int ret;
+       struct qib_pportdata *ppd =
+               container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+       if (!qib_cc_table_size || !ppd->ccti_entries_shadow)
+               return -EINVAL;
+
+       ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
+                + sizeof(__be16);
+
+       if (pos > ret)
+               return -EINVAL;
+
+       if (count > ret - pos)
+               count = ret - pos;
+
+       if (!count)
+               return count;
+
+       spin_lock(&ppd->cc_shadow_lock);
+       memcpy(buf, ppd->ccti_entries_shadow, count);
+       spin_unlock(&ppd->cc_shadow_lock);
+
+       return count;
+}
+
+static void qib_port_release(struct kobject *kobj)
+{
+       /* nothing to do since memory is freed by qib_free_devdata() */
+}
+
+static struct kobj_type qib_port_cc_ktype = {
+       .release = qib_port_release,
+};
+
+static struct bin_attribute cc_table_bin_attr = {
+       .attr = {.name = "cc_table_bin", .mode = 0444},
+       .read = read_cc_table_bin,
+       .size = PAGE_SIZE,
+};
+
+/*
+ * Congestion settings: port control, control map and an array of 16
+ * entries for the congestion entries - increase, timer, event log
+ * trigger threshold and the minimum injection rate delay.
+ */
+static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
+               struct bin_attribute *bin_attr,
+               char *buf, loff_t pos, size_t count)
+{
+       int ret;
+       struct qib_pportdata *ppd =
+               container_of(kobj, struct qib_pportdata, pport_cc_kobj);
+
+       if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+               return -EINVAL;
+
+       ret = sizeof(struct ib_cc_congestion_setting_attr_shadow);
+
+       if (pos > ret)
+               return -EINVAL;
+       if (count > ret - pos)
+               count = ret - pos;
+
+       if (!count)
+               return count;
+
+       spin_lock(&ppd->cc_shadow_lock);
+       memcpy(buf, ppd->congestion_entries_shadow, count);
+       spin_unlock(&ppd->cc_shadow_lock);
+
+       return count;
+}
+
+static struct bin_attribute cc_setting_bin_attr = {
+       .attr = {.name = "cc_settings_bin", .mode = 0444},
+       .read = read_cc_setting_bin,
+       .size = PAGE_SIZE,
+};
+
+
 static ssize_t qib_portattr_show(struct kobject *kobj,
        struct attribute *attr, char *buf)
 {
@@ -253,10 +347,6 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
        return pattr->store(ppd, buf, len);
 }
 
-static void qib_port_release(struct kobject *kobj)
-{
-       /* nothing to do since memory is freed by qib_free_devdata() */
-}
 
 static const struct sysfs_ops qib_port_ops = {
        .show = qib_portattr_show,
@@ -670,7 +760,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
        if (ret) {
                qib_dev_err(dd, "Skipping sl2vl sysfs info, "
                            "(err %d) port %u\n", ret, port_num);
-               goto bail_sl;
+               goto bail_link;
        }
        kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
 
@@ -679,15 +769,57 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
        if (ret) {
                qib_dev_err(dd, "Skipping diag_counters sysfs info, "
                            "(err %d) port %u\n", ret, port_num);
-               goto bail_diagc;
+               goto bail_sl;
        }
        kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
 
+       if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
+               return 0;
+
+       ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype,
+                               kobj, "CCMgtA");
+       if (ret) {
+               qib_dev_err(dd,
+                "Skipping Congestion Control sysfs info, (err %d) port %u\n",
+                ret, port_num);
+               goto bail_diagc;
+       }
+
+       kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
+
+       ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+                               &cc_setting_bin_attr);
+       if (ret) {
+               qib_dev_err(dd,
+                "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
+                ret, port_num);
+               goto bail_cc;
+       }
+
+       ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
+                               &cc_table_bin_attr);
+       if (ret) {
+               qib_dev_err(dd,
+                "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
+                ret, port_num);
+               goto bail_cc_entry_bin;
+       }
+
+       qib_devinfo(dd->pcidev,
+               "IB%u: Congestion Control Agent enabled for port %d\n",
+               dd->unit, port_num);
+
        return 0;
 
+bail_cc_entry_bin:
+       sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
+bail_cc:
+       kobject_put(&ppd->pport_cc_kobj);
 bail_diagc:
-       kobject_put(&ppd->sl2vl_kobj);
+       kobject_put(&ppd->diagc_kobj);
 bail_sl:
+       kobject_put(&ppd->sl2vl_kobj);
+bail_link:
        kobject_put(&ppd->pport_kobj);
 bail:
        return ret;
@@ -720,7 +852,15 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
 
        for (i = 0; i < dd->num_pports; i++) {
                ppd = &dd->pport[i];
-               kobject_put(&ppd->pport_kobj);
+               if (qib_cc_table_size &&
+                       ppd->congestion_entries_shadow) {
+                       sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+                               &cc_setting_bin_attr);
+                       sysfs_remove_bin_file(&ppd->pport_cc_kobj,
+                               &cc_table_bin_attr);
+                       kobject_put(&ppd->pport_cc_kobj);
+               }
                kobject_put(&ppd->sl2vl_kobj);
+               kobject_put(&ppd->pport_kobj);
        }
 }