RDMA/ocrdma: Depend on async link events from CNA
authorDevesh Sharma <devesh.sharma@avagotech.com>
Thu, 24 Dec 2015 18:14:07 +0000 (13:14 -0500)
committerDoug Ledford <dledford@redhat.com>
Mon, 28 Dec 2015 16:45:54 +0000 (11:45 -0500)
Recently Dough Ledford reported a deadlock happening
between ocrdma-load sequence and NetworkManager service
issuing "open" on be2net interface.

The deadlock happens when any be2net hook (e.g. open/close) is called
in parallel to insmod ocrdma.ko.

A. be2net is sending administrative open/close event to ocrdma holding
   device_list_mutex. It does this from ndo_open/ndo_stop hooks of be2net.
   So sequence of locks is rtnl_lock---> device_list lock

B.  When new ocrdma roce device gets registered, infiniband stack now
    takes rtnl_lock in ib_register_device() in GID initialization routines.
    So sequence of locks in this path is device_list lock ---> rtnl_lock.

This improper locking sequence causes deadlock.

With this patch we stop using administrative open and close events
injected by be2net driver. These events were used to dispatch PORT_ACTIVE
and PORT_ERROR events to the IB-stack. This patch implements a logic
to receive async-link-events generated from CNA whenever link-state-change
is detected. Now on, these async-events will be used to dispatch
PORT_ACTIVE and PORT_ERROR events to IB-stack.

Depending on async-events from CNA removes the need to hold device-list-mutex
and thus breaks the busy-wait scenario.

Reported-by: Doug Ledford <dledford@redhat.com>
CC: Sathya Perla <sathya.perla@avagotech.com>
Signed-off-by: Padmanabh Ratnakar <padmanabh.ratnakar@avagotech.com>
Signed-off-by: Selvin Xavier <selvin.xavier@avagotech.com>
Signed-off-by: Devesh Sharma <devesh.sharma@avagotech.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.h
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_sli.h
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

index ae80590aabdf7db6df756c0ee07afa4721f6c37c..040bb8b5cb15a65c34b15ab67c3c5fe14e27524c 100644 (file)
@@ -232,6 +232,10 @@ struct phy_info {
        u16 interface_type;
 };
 
+enum ocrdma_flags {
+       OCRDMA_FLAGS_LINK_STATUS_INIT = 0x01
+};
+
 struct ocrdma_dev {
        struct ib_device ibdev;
        struct ocrdma_dev_attr attr;
@@ -287,6 +291,7 @@ struct ocrdma_dev {
        atomic_t update_sl;
        u16 pvid;
        u32 asic_id;
+       u32 flags;
 
        ulong last_stats_time;
        struct mutex stats_lock; /* provide synch for debugfs operations */
@@ -591,4 +596,9 @@ static inline u8 ocrdma_is_enabled_and_synced(u32 state)
                (state & OCRDMA_STATE_FLAG_SYNC);
 }
 
+static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
+{
+       return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
+}
+
 #endif
index 4fc2bb49c28ebd796a8605f9a47c014174d90f38..283ca842ff7498b308fcec85a1268dcb452d56b0 100644 (file)
@@ -579,6 +579,8 @@ static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
 
        cmd->async_event_bitmap = BIT(OCRDMA_ASYNC_GRP5_EVE_CODE);
        cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_RDMA_EVE_CODE);
+       /* Request link events on this  MQ. */
+       cmd->async_event_bitmap |= BIT(OCRDMA_ASYNC_LINK_EVE_CODE);
 
        cmd->async_cqid_ringsize = cq->id;
        cmd->async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
@@ -819,20 +821,42 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
        }
 }
 
+static void ocrdma_process_link_state(struct ocrdma_dev *dev,
+                                     struct ocrdma_ae_mcqe *cqe)
+{
+       struct ocrdma_ae_lnkst_mcqe *evt;
+       u8 lstate;
+
+       evt = (struct ocrdma_ae_lnkst_mcqe *)cqe;
+       lstate = ocrdma_get_ae_link_state(evt->speed_state_ptn);
+
+       if (!(lstate & OCRDMA_AE_LSC_LLINK_MASK))
+               return;
+
+       if (dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)
+               ocrdma_update_link_state(dev, (lstate & OCRDMA_LINK_ST_MASK));
+}
+
 static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
 {
        /* async CQE processing */
        struct ocrdma_ae_mcqe *cqe = ae_cqe;
        u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
                        OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
-
-       if (evt_code == OCRDMA_ASYNC_RDMA_EVE_CODE)
+       switch (evt_code) {
+       case OCRDMA_ASYNC_LINK_EVE_CODE:
+               ocrdma_process_link_state(dev, cqe);
+               break;
+       case OCRDMA_ASYNC_RDMA_EVE_CODE:
                ocrdma_dispatch_ibevent(dev, cqe);
-       else if (evt_code == OCRDMA_ASYNC_GRP5_EVE_CODE)
+               break;
+       case OCRDMA_ASYNC_GRP5_EVE_CODE:
                ocrdma_process_grp5_aync(dev, cqe);
-       else
+               break;
+       default:
                pr_err("%s(%d) invalid evt code=0x%x\n", __func__,
                       dev->id, evt_code);
+       }
 }
 
 static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
@@ -1363,7 +1387,8 @@ mbx_err:
        return status;
 }
 
-int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
+                             u8 *lnk_state)
 {
        int status = -ENOMEM;
        struct ocrdma_get_link_speed_rsp *rsp;
@@ -1384,8 +1409,11 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
                goto mbx_err;
 
        rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
-       *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
-                       >> OCRDMA_PHY_PS_SHIFT;
+       if (lnk_speed)
+               *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
+                             >> OCRDMA_PHY_PS_SHIFT;
+       if (lnk_state)
+               *lnk_state = (rsp->res_lnk_st & OCRDMA_LINK_ST_MASK);
 
 mbx_err:
        kfree(cmd);
index 7ed885c1851e28740b81a0588493c0d0ca92bc42..ebc1f442aec37aabd691d374deb3fdd41acc4302 100644 (file)
@@ -106,7 +106,8 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
                       bool solicited, u16 cqe_popped);
 
 /* verbs specific mailbox commands */
-int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
+int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed,
+                             u8 *lnk_st);
 int ocrdma_query_config(struct ocrdma_dev *,
                        struct ocrdma_mbx_query_config *config);
 
@@ -153,5 +154,6 @@ char *port_speed_string(struct ocrdma_dev *dev);
 void ocrdma_init_service_level(struct ocrdma_dev *);
 void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev);
 void ocrdma_free_pd_range(struct ocrdma_dev *dev);
+void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate);
 
 #endif                         /* __OCRDMA_HW_H__ */
index ebe40b414c9db5ee3587dcd8648e978c76c2fc6d..3afb40b85159bd2c10559443536f83af5e4fa9ca 100644 (file)
@@ -290,6 +290,7 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
 static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 {
        int status = 0, i;
+       u8 lstate = 0;
        struct ocrdma_dev *dev;
 
        dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
@@ -319,6 +320,11 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
        if (status)
                goto alloc_err;
 
+       /* Query Link state and update */
+       status = ocrdma_mbx_get_link_speed(dev, NULL, &lstate);
+       if (!status)
+               ocrdma_update_link_state(dev, lstate);
+
        for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
                if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
                        goto sysfs_err;
@@ -373,7 +379,7 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
        ocrdma_remove_free(dev);
 }
 
-static int ocrdma_open(struct ocrdma_dev *dev)
+static int ocrdma_dispatch_port_active(struct ocrdma_dev *dev)
 {
        struct ib_event port_event;
 
@@ -384,7 +390,7 @@ static int ocrdma_open(struct ocrdma_dev *dev)
        return 0;
 }
 
-static int ocrdma_close(struct ocrdma_dev *dev)
+static int ocrdma_dispatch_port_error(struct ocrdma_dev *dev)
 {
        struct ib_event err_event;
 
@@ -397,7 +403,7 @@ static int ocrdma_close(struct ocrdma_dev *dev)
 
 static void ocrdma_shutdown(struct ocrdma_dev *dev)
 {
-       ocrdma_close(dev);
+       ocrdma_dispatch_port_error(dev);
        ocrdma_remove(dev);
 }
 
@@ -408,18 +414,28 @@ static void ocrdma_shutdown(struct ocrdma_dev *dev)
 static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
 {
        switch (event) {
-       case BE_DEV_UP:
-               ocrdma_open(dev);
-               break;
-       case BE_DEV_DOWN:
-               ocrdma_close(dev);
-               break;
        case BE_DEV_SHUTDOWN:
                ocrdma_shutdown(dev);
                break;
+       default:
+               break;
        }
 }
 
+void ocrdma_update_link_state(struct ocrdma_dev *dev, u8 lstate)
+{
+       if (!(dev->flags & OCRDMA_FLAGS_LINK_STATUS_INIT)) {
+               dev->flags |= OCRDMA_FLAGS_LINK_STATUS_INIT;
+               if (!lstate)
+                       return;
+       }
+
+       if (!lstate)
+               ocrdma_dispatch_port_error(dev);
+       else
+               ocrdma_dispatch_port_active(dev);
+}
+
 static struct ocrdma_driver ocrdma_drv = {
        .name                   = "ocrdma_driver",
        .add                    = ocrdma_add,
index 6a38268bbe9fb6b981e27f2ec42da8fd10adbfc2..99dd6fdf06d7b44bcea2f0fb28b90edf54aefcc0 100644 (file)
@@ -465,8 +465,11 @@ struct ocrdma_ae_qp_mcqe {
        u32 valid_ae_event;
 };
 
-#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
-#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
+enum ocrdma_async_event_code {
+       OCRDMA_ASYNC_LINK_EVE_CODE      = 0x01,
+       OCRDMA_ASYNC_GRP5_EVE_CODE      = 0x05,
+       OCRDMA_ASYNC_RDMA_EVE_CODE      = 0x14
+};
 
 enum ocrdma_async_grp5_events {
        OCRDMA_ASYNC_EVENT_QOS_VALUE    = 0x01,
@@ -489,6 +492,44 @@ enum OCRDMA_ASYNC_EVENT_TYPE {
        OCRDMA_MAX_ASYNC_ERRORS
 };
 
+struct ocrdma_ae_lnkst_mcqe {
+       u32 speed_state_ptn;
+       u32 qos_reason_falut;
+       u32 evt_tag;
+       u32 valid_ae_event;
+};
+
+enum {
+       OCRDMA_AE_LSC_PORT_NUM_MASK     = 0x3F,
+       OCRDMA_AE_LSC_PT_SHIFT          = 0x06,
+       OCRDMA_AE_LSC_PT_MASK           = (0x03 <<
+                       OCRDMA_AE_LSC_PT_SHIFT),
+       OCRDMA_AE_LSC_LS_SHIFT          = 0x08,
+       OCRDMA_AE_LSC_LS_MASK           = (0xFF <<
+                       OCRDMA_AE_LSC_LS_SHIFT),
+       OCRDMA_AE_LSC_LD_SHIFT          = 0x10,
+       OCRDMA_AE_LSC_LD_MASK           = (0xFF <<
+                       OCRDMA_AE_LSC_LD_SHIFT),
+       OCRDMA_AE_LSC_PPS_SHIFT         = 0x18,
+       OCRDMA_AE_LSC_PPS_MASK          = (0xFF <<
+                       OCRDMA_AE_LSC_PPS_SHIFT),
+       OCRDMA_AE_LSC_PPF_MASK          = 0xFF,
+       OCRDMA_AE_LSC_ER_SHIFT          = 0x08,
+       OCRDMA_AE_LSC_ER_MASK           = (0xFF <<
+                       OCRDMA_AE_LSC_ER_SHIFT),
+       OCRDMA_AE_LSC_QOS_SHIFT         = 0x10,
+       OCRDMA_AE_LSC_QOS_MASK          = (0xFFFF <<
+                       OCRDMA_AE_LSC_QOS_SHIFT)
+};
+
+enum {
+       OCRDMA_AE_LSC_PLINK_DOWN        = 0x00,
+       OCRDMA_AE_LSC_PLINK_UP          = 0x01,
+       OCRDMA_AE_LSC_LLINK_DOWN        = 0x02,
+       OCRDMA_AE_LSC_LLINK_MASK        = 0x02,
+       OCRDMA_AE_LSC_LLINK_UP          = 0x03
+};
+
 /* mailbox command request and responses */
 enum {
        OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT          = 2,
@@ -676,7 +717,7 @@ enum {
        OCRDMA_PHY_PFLT_SHIFT   = 0x18,
        OCRDMA_QOS_LNKSP_MASK   = 0xFFFF0000,
        OCRDMA_QOS_LNKSP_SHIFT  = 0x10,
-       OCRDMA_LLST_MASK        = 0xFF,
+       OCRDMA_LINK_ST_MASK     = 0x01,
        OCRDMA_PLFC_MASK        = 0x00000400,
        OCRDMA_PLFC_SHIFT       = 0x8,
        OCRDMA_PLRFC_MASK       = 0x00000200,
@@ -691,7 +732,7 @@ struct ocrdma_get_link_speed_rsp {
 
        u32 pflt_pps_ld_pnum;
        u32 qos_lsp;
-       u32 res_lls;
+       u32 res_lnk_st;
 };
 
 enum {
index 583001bcfb8fc8c10a1e088b451d7de12adc2aa8..76e96f97b3f6459e68d13c444be6e1beb6b578a9 100644 (file)
@@ -171,7 +171,7 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
        int status;
        u8 speed;
 
-       status = ocrdma_mbx_get_link_speed(dev, &speed);
+       status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
        if (status)
                speed = OCRDMA_PHYS_LINK_SPEED_ZERO;