net/mlx4_core: Avoid command timeouts during VF driver device shutdown
authorJack Morgenstein <jackm@dev.mellanox.co.il>
Mon, 30 Jan 2017 13:11:45 +0000 (15:11 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 17 Jun 2017 04:39:37 +0000 (06:39 +0200)
[ Upstream commit d585df1c5ccf995fcee910705ad7a9cdd11d4152 ]

Some Hypervisors detach VFs from VMs by instantly causing an FLR event
to be generated for a VF.

In the mlx4 case, this will cause that VF's comm channel to be disabled
before the VM has an opportunity to invoke the VF device's "shutdown"
method.

The result is that the VF driver on the VM will experience a command
timeout during the shutdown process when the Hypervisor does not deliver
a command-completion event to the VM.

To avoid FW command timeouts on the VM when the driver's shutdown method
is invoked, we detect the absence of the VF's comm channel at the very
start of the shutdown process. If the comm-channel has already been
disabled, we cause all FW commands during the device shutdown process to
immediately return success (and thus avoid all command timeouts).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/net/ethernet/mellanox/mlx4/catas.c
drivers/net/ethernet/mellanox/mlx4/intf.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h

index 715de8affcc950e0ea18fd706bc8f04542d34a6f..e203d0c4e5a3ba14bb0ae958263db7cdd7417299 100644 (file)
@@ -158,7 +158,7 @@ static int mlx4_reset_slave(struct mlx4_dev *dev)
        return -ETIMEDOUT;
 }
 
-static int mlx4_comm_internal_err(u32 slave_read)
+int mlx4_comm_internal_err(u32 slave_read)
 {
        return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
                (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
index 0472941af82033852106e5ab3dfc2399ca82cdaa..1a134e08f010a994c414521d0b611f0bc9529f68 100644 (file)
@@ -218,6 +218,18 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
        struct mlx4_interface *intf;
 
        mlx4_stop_catas_poll(dev);
+       if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION &&
+           mlx4_is_slave(dev)) {
+               /* In mlx4_remove_one on a VF */
+               u32 slave_read =
+                       swab32(readl(&mlx4_priv(dev)->mfunc.comm->slave_read));
+
+               if (mlx4_comm_internal_err(slave_read)) {
+                       mlx4_dbg(dev, "%s: comm channel is down, entering error state.\n",
+                                __func__);
+                       mlx4_enter_error_state(dev->persist);
+               }
+       }
        mutex_lock(&intf_mutex);
 
        list_for_each_entry(intf, &intf_list, list)
index e1cf9036af225992c3961254cad0b474ca191e84..f5fdbd53d05232aeb982c17bed30138bfcb160c5 100644 (file)
@@ -1205,6 +1205,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
 void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 
 void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
+int mlx4_comm_internal_err(u32 slave_read);
 
 int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
                    enum mlx4_port_type *type);