Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 18 Nov 2013 23:36:04 +0000 (15:36 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 18 Nov 2013 23:36:04 +0000 (15:36 -0800)
Pull infiniband/rdma updates from Roland Dreier:
 - Re-enable flow steering verbs with new improved userspace ABI
 - Fixes for slow connection due to GID lookup scalability
 - IPoIB fixes
 - Many fixes to HW drivers including mlx4, mlx5, ocrdma and qib
 - Further improvements to SRP error handling
 - Add new transport type for Cisco usNIC

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (66 commits)
  IB/core: Re-enable create_flow/destroy_flow uverbs
  IB/core: extended command: an improved infrastructure for uverbs commands
  IB/core: Remove ib_uverbs_flow_spec structure from userspace
  IB/core: Use a common header for uverbs flow_specs
  IB/core: Make uverbs flow structure use names like verbs ones
  IB/core: Rename 'flow' structs to match other uverbs structs
  IB/core: clarify overflow/underflow checks on ib_create/destroy_flow
  IB/ucma: Convert use of typedef ctl_table to struct ctl_table
  IB/cm: Convert to using idr_alloc_cyclic()
  IB/mlx5: Fix page shift in create CQ for userspace
  IB/mlx4: Fix device max capabilities check
  IB/mlx5: Fix list_del of empty list
  IB/mlx5: Remove dead code
  IB/core: Encorce MR access rights rules on kernel consumers
  IB/mlx4: Fix endless loop in resize CQ
  RDMA/cma: Remove unused argument and minor dead code
  RDMA/ucma: Discard events for IDs not yet claimed by user space
  IB/core: Add Cisco usNIC rdma node and transport types
  RDMA/nes: Remove self-assignment from nes_query_qp()
  IB/srp: Report receive errors correctly
  ...

1  2 
drivers/infiniband/core/cma.c
drivers/infiniband/hw/mlx4/main.c

index d2172e71f985fd9f8ec1418c1c719a5491b97a44,830c983fdeff422e7dad39dd5e793636ae3ee4da..8e49db690f33e9a67f71cbcca8b62c9d92fde9f4
@@@ -328,28 -328,6 +328,6 @@@ static int cma_set_qkey(struct rdma_id_
        return ret;
  }
  
- static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
- {
-       int i;
-       int err;
-       struct ib_port_attr props;
-       union ib_gid tmp;
-       err = ib_query_port(device, port_num, &props);
-       if (err)
-               return err;
-       for (i = 0; i < props.gid_tbl_len; ++i) {
-               err = ib_query_gid(device, port_num, i, &tmp);
-               if (err)
-                       return err;
-               if (!memcmp(&tmp, gid, sizeof tmp))
-                       return 0;
-       }
-       return -EADDRNOTAVAIL;
- }
  static void cma_translate_ib(struct sockaddr_ib *sib, struct rdma_dev_addr *dev_addr)
  {
        dev_addr->dev_type = ARPHRD_INFINIBAND;
@@@ -371,13 -349,14 +349,14 @@@ static int cma_translate_addr(struct so
        return ret;
  }
  
- static int cma_acquire_dev(struct rdma_id_private *id_priv)
+ static int cma_acquire_dev(struct rdma_id_private *id_priv,
+                          struct rdma_id_private *listen_id_priv)
  {
        struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
        struct cma_device *cma_dev;
        union ib_gid gid, iboe_gid;
        int ret = -ENODEV;
-       u8 port;
+       u8 port, found_port;
        enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
                IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
  
        iboe_addr_get_sgid(dev_addr, &iboe_gid);
        memcpy(&gid, dev_addr->src_dev_addr +
               rdma_addr_gid_offset(dev_addr), sizeof gid);
+       if (listen_id_priv &&
+           rdma_port_get_link_layer(listen_id_priv->id.device,
+                                    listen_id_priv->id.port_num) == dev_ll) {
+               cma_dev = listen_id_priv->cma_dev;
+               port = listen_id_priv->id.port_num;
+               if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
+                   rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
+                       ret = ib_find_cached_gid(cma_dev->device, &iboe_gid,
+                                                &found_port, NULL);
+               else
+                       ret = ib_find_cached_gid(cma_dev->device, &gid,
+                                                &found_port, NULL);
+               if (!ret && (port  == found_port)) {
+                       id_priv->id.port_num = found_port;
+                       goto out;
+               }
+       }
        list_for_each_entry(cma_dev, &dev_list, list) {
                for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
+                       if (listen_id_priv &&
+                           listen_id_priv->cma_dev == cma_dev &&
+                           listen_id_priv->id.port_num == port)
+                               continue;
                        if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
                                if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
                                    rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
-                                       ret = find_gid_port(cma_dev->device, &iboe_gid, port);
+                                       ret = ib_find_cached_gid(cma_dev->device, &iboe_gid, &found_port, NULL);
                                else
-                                       ret = find_gid_port(cma_dev->device, &gid, port);
+                                       ret = ib_find_cached_gid(cma_dev->device, &gid, &found_port, NULL);
  
-                               if (!ret) {
-                                       id_priv->id.port_num = port;
+                               if (!ret && (port == found_port)) {
+                                       id_priv->id.port_num = found_port;
                                        goto out;
                                }
                        }
@@@ -1292,7 -1293,7 +1293,7 @@@ static int cma_req_handler(struct ib_cm
        }
  
        mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
-       ret = cma_acquire_dev(conn_id);
+       ret = cma_acquire_dev(conn_id, listen_id);
        if (ret)
                goto err2;
  
@@@ -1451,7 -1452,6 +1452,6 @@@ static int iw_conn_req_handler(struct i
  {
        struct rdma_cm_id *new_cm_id;
        struct rdma_id_private *listen_id, *conn_id;
-       struct net_device *dev = NULL;
        struct rdma_cm_event event;
        int ret;
        struct ib_device_attr attr;
                goto out;
        }
  
-       ret = cma_acquire_dev(conn_id);
+       ret = cma_acquire_dev(conn_id, listen_id);
        if (ret) {
                mutex_unlock(&conn_id->handler_mutex);
                rdma_destroy_id(new_cm_id);
        cma_deref_id(conn_id);
  
  out:
-       if (dev)
-               dev_put(dev);
        mutex_unlock(&listen_id->handler_mutex);
        return ret;
  }
@@@ -1848,26 -1846,6 +1846,26 @@@ static int cma_resolve_iw_route(struct 
        return 0;
  }
  
 +static int iboe_tos_to_sl(struct net_device *ndev, int tos)
 +{
 +      int prio;
 +      struct net_device *dev;
 +
 +      prio = rt_tos2priority(tos);
 +      dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
 +              vlan_dev_real_dev(ndev) : ndev;
 +
 +      if (dev->num_tc)
 +              return netdev_get_prio_tc_map(dev, prio);
 +
 +#if IS_ENABLED(CONFIG_VLAN_8021Q)
 +      if (ndev->priv_flags & IFF_802_1Q_VLAN)
 +              return (vlan_dev_get_egress_qos_mask(ndev, prio) &
 +                      VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 +#endif
 +      return 0;
 +}
 +
  static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
  {
        struct rdma_route *route = &id_priv->id.route;
        route->path_rec->reversible = 1;
        route->path_rec->pkey = cpu_to_be16(0xffff);
        route->path_rec->mtu_selector = IB_SA_EQ;
 -      route->path_rec->sl = netdev_get_prio_tc_map(
 -                      ndev->priv_flags & IFF_802_1Q_VLAN ?
 -                              vlan_dev_real_dev(ndev) : ndev,
 -                      rt_tos2priority(id_priv->tos));
 -
 +      route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
        route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
        route->path_rec->rate_selector = IB_SA_EQ;
        route->path_rec->rate = iboe_get_rate(ndev);
@@@ -2066,7 -2048,7 +2064,7 @@@ static void addr_handler(int status, st
                goto out;
  
        if (!status && !id_priv->cma_dev)
-               status = cma_acquire_dev(id_priv);
+               status = cma_acquire_dev(id_priv, NULL);
  
        if (status) {
                if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@@ -2310,7 -2292,7 +2308,7 @@@ static int cma_alloc_any_port(struct id
        int low, high, remaining;
        unsigned int rover;
  
 -      inet_get_local_port_range(&low, &high);
 +      inet_get_local_port_range(&init_net, &low, &high);
        remaining = (high - low) + 1;
        rover = net_random() % remaining + low;
  retry:
@@@ -2563,7 -2545,7 +2561,7 @@@ int rdma_bind_addr(struct rdma_cm_id *i
                if (ret)
                        goto err1;
  
-               ret = cma_acquire_dev(id_priv);
+               ret = cma_acquire_dev(id_priv, NULL);
                if (ret)
                        goto err1;
        }
index 6a0a0d29660df5fe7d3cd90bf5c74eead9da4936,1aad9b3e6bdd3923a70f8809175a8e10169f0d07..1958c5ca792ad52a39ece6247040588c00b84ba6
@@@ -177,18 -177,18 +177,18 @@@ static int mlx4_ib_query_device(struct 
  
        props->max_mr_size         = ~0ull;
        props->page_size_cap       = dev->dev->caps.page_size_cap;
 -      props->max_qp              = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
 +      props->max_qp              = dev->dev->quotas.qp;
        props->max_qp_wr           = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
        props->max_sge             = min(dev->dev->caps.max_sq_sg,
                                         dev->dev->caps.max_rq_sg);
 -      props->max_cq              = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
 +      props->max_cq              = dev->dev->quotas.cq;
        props->max_cqe             = dev->dev->caps.max_cqes;
 -      props->max_mr              = dev->dev->caps.num_mpts - dev->dev->caps.reserved_mrws;
 +      props->max_mr              = dev->dev->quotas.mpt;
        props->max_pd              = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
        props->max_qp_rd_atom      = dev->dev->caps.max_qp_dest_rdma;
        props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
        props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
 -      props->max_srq             = dev->dev->caps.num_srqs - dev->dev->caps.reserved_srqs;
 +      props->max_srq             = dev->dev->quotas.srq;
        props->max_srq_wr          = dev->dev->caps.max_srq_wqes - 1;
        props->max_srq_sge         = dev->dev->caps.max_srq_sge;
        props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
@@@ -526,6 -526,7 +526,6 @@@ static int mlx4_ib_modify_device(struc
        if (IS_ERR(mailbox))
                return 0;
  
 -      memset(mailbox->buf, 0, 256);
        memcpy(mailbox->buf, props->node_desc, 64);
        mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
                 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
@@@ -546,6 -547,8 +546,6 @@@ static int mlx4_SET_PORT(struct mlx4_ib
        if (IS_ERR(mailbox))
                return PTR_ERR(mailbox);
  
 -      memset(mailbox->buf, 0, 256);
 -
        if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
                *(u8 *) mailbox->buf         = !!reset_qkey_viols << 6;
                ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
@@@ -876,6 -879,8 +876,6 @@@ static int __mlx4_ib_create_flow(struc
        struct mlx4_ib_dev *mdev = to_mdev(qp->device);
        struct mlx4_cmd_mailbox *mailbox;
        struct mlx4_net_trans_rule_hw_ctrl *ctrl;
 -      size_t rule_size = sizeof(struct mlx4_net_trans_rule_hw_ctrl) +
 -                         (sizeof(struct _rule_hw) * flow_attr->num_of_specs);
  
        static const u16 __mlx4_domain[] = {
                [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
        mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
        if (IS_ERR(mailbox))
                return PTR_ERR(mailbox);
 -      memset(mailbox->buf, 0, rule_size);
        ctrl = mailbox->buf;
  
        ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
@@@ -1685,11 -1691,9 +1685,9 @@@ static void *mlx4_ib_add(struct mlx4_de
                ibdev->ib_dev.create_flow       = mlx4_ib_create_flow;
                ibdev->ib_dev.destroy_flow      = mlx4_ib_destroy_flow;
  
- #ifdef CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING
-               ibdev->ib_dev.uverbs_cmd_mask   |=
-                       (1ull << IB_USER_VERBS_CMD_CREATE_FLOW) |
-                       (1ull << IB_USER_VERBS_CMD_DESTROY_FLOW);
- #endif /* CONFIG_INFINIBAND_EXPERIMENTAL_UVERBS_FLOW_STEERING */
+               ibdev->ib_dev.uverbs_ex_cmd_mask        |=
+                       (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+                       (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
        }
  
        mlx4_ib_alloc_eqs(dev, ibdev);