--- /dev/null
+What: /config/rdma_cm
+Date: November 29, 2015
+KernelVersion: 4.4.0
+Description: Interface is used to configure RDMA-cable HCAs in respect to
+ RDMA-CM attributes.
+
+ Attributes are visible only when configfs is mounted. To mount
+ configfs in /config directory use:
+ # mount -t configfs none /config/
+
+ In order to set parameters related to a specific HCA, a directory
+ for this HCA has to be created:
+ mkdir -p /config/rdma_cm/<hca>
+
+
+What: /config/rdma_cm/<hca>/ports/<port-num>/default_roce_mode
+Date: November 29, 2015
+KernelVersion: 4.4.0
+Description: RDMA-CM based connections from HCA <hca> at port <port-num>
+ will be initiated with this RoCE type as default.
+ The possible RoCE types are either "IB/RoCE v1" or "RoCE v2".
+ This parameter has RW access.
--- /dev/null
+What: /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/ndevs/<gid-index>
+Date: November 29, 2015
+KernelVersion: 4.4.0
+Contact: linux-rdma@vger.kernel.org
+Description: The net-device's name associated with the GID resides
+ at index <gid-index>.
+
+What: /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/types/<gid-index>
+Date: November 29, 2015
+KernelVersion: 4.4.0
+Contact: linux-rdma@vger.kernel.org
+Description: The RoCE type of the associated GID resides at index <gid-index>.
+ This could either be "IB/RoCE v1" for IB and RoCE v1 based GODs
+ or "RoCE v2" for RoCE v2 based GIDs.
+
+
Optional properties:
- autorepeat: Boolean, Enable auto repeat feature of Linux input
subsystem.
+ - label: String, name of the input device.
Each button (key) is represented as a sub-node of "gpio-keys":
Subnode properties:
Required properties:
- compatible : should be "rockchip,<name>-tsadc"
+ "rockchip,rk3228-tsadc": found on RK3228 SoCs
"rockchip,rk3288-tsadc": found on RK3288 SoCs
"rockchip,rk3368-tsadc": found on RK3368 SoCs
+ "rockchip,rk3399-tsadc": found on RK3399 SoCs
- reg : physical base address of the controller and length of memory mapped
region.
- interrupts : The interrupt number to the cpu. The interrupt specifier format
modify_ah
query_ah
destroy_ah
- bind_mw
post_send
post_recv
poll_cq
ib_modify_ah
ib_query_ah
ib_destroy_ah
- ib_bind_mw
ib_post_send
ib_post_recv
ib_req_notify_cq
from being initiated from tasks that might run on the CPU to
be de-jittered. (It is OK to force this CPU offline and then
bring it back online before you start your application.)
-BLOCK_IOPOLL_SOFTIRQ: Do all of the following:
+IRQ_POLL_SOFTIRQ: Do all of the following:
1. Force block-device interrupts onto some other CPU.
2. Initiate any block I/O and block-I/O polling on other CPUs.
3. Once your application has started, prevent CPU-hotplug operations
- nr_open
- overflowuid
- overflowgid
+- pipe-user-pages-hard
+- pipe-user-pages-soft
- protected_hardlinks
- protected_symlinks
- suid_dumpable
==============================================================
+pipe-user-pages-hard:
+
+Maximum total number of pages a non-privileged user may allocate for pipes.
+Once this limit is reached, no new pipes may be allocated until usage goes
+below the limit again. When set to 0, no limit is applied, which is the default
+setting.
+
+==============================================================
+
+pipe-user-pages-soft:
+
+Maximum total number of pages a non-privileged user may allocate for pipes
+before the pipe size gets limited to a single page. Once this limit is reached,
+new pipes will be limited to a single page in size for this user in order to
+limit total memory usage, and trying to increase them using fcntl() will be
+denied until usage goes below the limit again. The default value allows to
+allocate up to 1024 pipes at their default size. When set to 0, no limit is
+applied.
+
+==============================================================
+
protected_hardlinks:
A long-standing class of security issues is the hardlink-based
F: drivers/scsi/dpt/
DRBD DRIVER
-P: Philipp Reisner
-P: Lars Ellenberg
-M: drbd-dev@lists.linbit.com
-L: drbd-user@lists.linbit.com
+M: Philipp Reisner <philipp.reisner@linbit.com>
+M: Lars Ellenberg <lars.ellenberg@linbit.com>
+L: drbd-dev@lists.linbit.com
W: http://www.drbd.org
-T: git git://git.drbd.org/linux-2.6-drbd.git drbd
-T: git git://git.drbd.org/drbd-8.3.git
+T: git git://git.linbit.com/linux-drbd.git
+T: git git://git.linbit.com/drbd-8.4.git
S: Supported
F: drivers/block/drbd/
F: lib/lru_cache.c
S: Odd Fixes
F: drivers/media/radio/radio-miropcm20*
-Mellanox MLX5 core VPI driver
-M: Eli Cohen <eli@mellanox.com>
+MELLANOX MLX4 core VPI driver
+M: Yishai Hadas <yishaih@mellanox.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
+S: Supported
+F: drivers/net/ethernet/mellanox/mlx4/
+F: include/linux/mlx4/
+
+MELLANOX MLX4 IB driver
+M: Yishai Hadas <yishaih@mellanox.com>
+L: linux-rdma@vger.kernel.org
+W: http://www.mellanox.com
Q: http://patchwork.kernel.org/project/linux-rdma/list/
-T: git git://openfabrics.org/~eli/connect-ib.git
+S: Supported
+F: drivers/infiniband/hw/mlx4/
+F: include/linux/mlx4/
+
+MELLANOX MLX5 core VPI driver
+M: Matan Barak <matanb@mellanox.com>
+M: Leon Romanovsky <leonro@mellanox.com>
+L: netdev@vger.kernel.org
+L: linux-rdma@vger.kernel.org
+W: http://www.mellanox.com
+Q: http://patchwork.ozlabs.org/project/netdev/list/
S: Supported
F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/
-Mellanox MLX5 IB driver
-M: Eli Cohen <eli@mellanox.com>
+MELLANOX MLX5 IB driver
+M: Matan Barak <matanb@mellanox.com>
+M: Leon Romanovsky <leonro@mellanox.com>
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.kernel.org/project/linux-rdma/list/
-T: git git://openfabrics.org/~eli/connect-ib.git
S: Supported
-F: include/linux/mlx5/
F: drivers/infiniband/hw/mlx5/
+F: include/linux/mlx5/
MELEXIS MLX90614 DRIVER
M: Crt Mori <cmo@melexis.com>
T: git git://github.com/jonmason/ntb.git
F: drivers/ntb/hw/intel/
+NTB AMD DRIVER
+M: Xiangliang Yu <Xiangliang.Yu@amd.com>
+L: linux-ntb@googlegroups.com
+S: Supported
+F: drivers/ntb/hw/amd/
+
NTFS FILESYSTEM
M: Anton Altaparmakov <anton@tuxera.com>
L: linux-ntfs-dev@lists.sourceforge.net
F: drivers/net/ethernet/dlink/sundance.c
SUPERH
+M: Yoshinori Sato <ysato@users.sourceforge.jp>
+M: Rich Felker <dalias@libc.org>
L: linux-sh@vger.kernel.org
Q: http://patchwork.kernel.org/project/linux-sh/list/
-S: Orphan
+S: Maintained
F: Documentation/sh/
F: arch/sh/
F: drivers/sh/
mobile SoCs in the Kona family of chips (e.g. bcm28155,
bcm11351, etc...)
- config DEBUG_BCM63XX
+ config DEBUG_BCM63XX_UART
bool "Kernel low-level debugging on BCM63XX UART"
depends on ARCH_BCM_63XX
- select DEBUG_UART_BCM63XX
config DEBUG_BERLIN_UART
bool "Marvell Berlin SoC Debug UART"
default "debug/vf.S" if DEBUG_VF_UART
default "debug/vt8500.S" if DEBUG_VT8500_UART0
default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1
- default "debug/bcm63xx.S" if DEBUG_UART_BCM63XX
+ default "debug/bcm63xx.S" if DEBUG_BCM63XX_UART
default "debug/digicolor.S" if DEBUG_DIGICOLOR_UA0
default "mach/debug-macro.S"
ARCH_IOP13XX || ARCH_IOP32X || ARCH_IOP33X || ARCH_IXP4XX || \
ARCH_RPC
-# Compatibility options for BCM63xx
-config DEBUG_UART_BCM63XX
- def_bool ARCH_BCM_63XX
-
config DEBUG_UART_PHYS
hex "Physical base address of debug UART"
default 0x00100a00 if DEBUG_NETX_UART
default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1
default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2
default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3
- default 0xfffe8600 if DEBUG_UART_BCM63XX
+ default 0xfffe8600 if DEBUG_BCM63XX_UART
default 0xfffff700 if ARCH_IOP33X
depends on ARCH_EP93XX || \
DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \
DEBUG_RMOBILE_SCIFA4 || DEBUG_S3C24XX_UART || \
DEBUG_S3C64XX_UART || \
- DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \
+ DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 || \
DEBUG_AT91_UART
default 0xfb10c000 if DEBUG_REALVIEW_PB1176_PORT
default 0xfc40ab00 if DEBUG_BRCMSTB_UART
default 0xfc705000 if DEBUG_ZTE_ZX
- default 0xfcfe8600 if DEBUG_UART_BCM63XX
+ default 0xfcfe8600 if DEBUG_BCM63XX_UART
default 0xfd000000 if DEBUG_SPEAR3XX || DEBUG_SPEAR13XX
default 0xfd012000 if DEBUG_MVEBU_UART0_ALTERNATE && ARCH_MV78XX0
default 0xfd883000 if DEBUG_ALPINE_UART0
DEBUG_NETX_UART || \
DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \
DEBUG_S3C64XX_UART || \
- DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \
+ DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0
config DEBUG_UART_8250_SHIFT
};
&extal1_clk {
- clock-frequency = <25000000>;
+ clock-frequency = <24000000>;
};
&extal2_clk {
clock-frequency = <48000000>;
ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-I$(srctree)/arch/arm/plat-versatile/include
-
+obj-y := core.o
obj-$(CONFIG_REALVIEW_DT) += realview-dt.o
obj-$(CONFIG_SMP) += platsmp-dt.o
-obj-y := core.o
ifdef CONFIG_ATAGS
obj-$(CONFIG_MACH_REALVIEW_EB) += realview_eb.o
select ARCH_HAS_RESET_CONTROLLER
select RESET_CONTROLLER
select SOC_BUS
- select USB_ULPI if USB_PHY
- select USB_ULPI_VIEWPORT if USB_PHY
help
This enables support for NVIDIA Tegra based systems.
-
-if ARCH_TEGRA
-
-config ARCH_TEGRA_2x_SOC
- bool "Enable support for Tegra20 family"
- select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
- select ARM_ERRATA_720789
- select ARM_ERRATA_754327 if SMP
- select ARM_ERRATA_764369 if SMP
- select PINCTRL_TEGRA20
- select PL310_ERRATA_727915 if CACHE_L2X0
- select PL310_ERRATA_769419 if CACHE_L2X0
- select TEGRA_TIMER
- help
- Support for NVIDIA Tegra AP20 and T20 processors, based on the
- ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
-
-config ARCH_TEGRA_3x_SOC
- bool "Enable support for Tegra30 family"
- select ARM_ERRATA_754322
- select ARM_ERRATA_764369 if SMP
- select PINCTRL_TEGRA30
- select PL310_ERRATA_769419 if CACHE_L2X0
- select TEGRA_TIMER
- help
- Support for NVIDIA Tegra T30 processor family, based on the
- ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
-
-config ARCH_TEGRA_114_SOC
- bool "Enable support for Tegra114 family"
- select ARM_ERRATA_798181 if SMP
- select ARM_L1_CACHE_SHIFT_6
- select HAVE_ARM_ARCH_TIMER
- select PINCTRL_TEGRA114
- select TEGRA_TIMER
- help
- Support for NVIDIA Tegra T114 processor family, based on the
- ARM CortexA15MP CPU
-
-config ARCH_TEGRA_124_SOC
- bool "Enable support for Tegra124 family"
- select ARM_L1_CACHE_SHIFT_6
- select HAVE_ARM_ARCH_TIMER
- select PINCTRL_TEGRA124
- select TEGRA_TIMER
- help
- Support for NVIDIA Tegra T124 processor family, based on the
- ARM CortexA15MP CPU
-
-endif
* tegra20_tear_down_core in IRAM
*/
ENTRY(tegra20_sleep_core_finish)
+ mov r4, r0
/* Flush, disable the L1 data cache and exit SMP */
+ mov r0, #TEGRA_FLUSH_CACHE_ALL
bl tegra_disable_clean_inv_dcache
+ mov r0, r4
mov32 r3, tegra_shut_off_mmu
add r3, r3, r0
* tegra30_tear_down_core in IRAM
*/
ENTRY(tegra30_sleep_core_finish)
+ mov r4, r0
/* Flush, disable the L1 data cache and exit SMP */
+ mov r0, #TEGRA_FLUSH_CACHE_ALL
bl tegra_disable_clean_inv_dcache
+ mov r0, r4
/*
* Preload all the address literals that are needed for the
while (i--)
if (pages[i])
__free_pages(pages[i], 0);
- if (array_size <= PAGE_SIZE)
- kfree(pages);
- else
- vfree(pages);
+ kvfree(pages);
return NULL;
}
size_t size, struct dma_attrs *attrs)
{
int count = size >> PAGE_SHIFT;
- int array_size = count * sizeof(struct page *);
int i;
if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
__free_pages(pages[i], 0);
}
- if (array_size <= PAGE_SIZE)
- kfree(pages);
- else
- vfree(pages);
+ kvfree(pages);
return 0;
}
help
This enables support for the NVIDIA Tegra SoC family.
-config ARCH_TEGRA_132_SOC
- bool "NVIDIA Tegra132 SoC"
- depends on ARCH_TEGRA
- select PINCTRL_TEGRA124
- select USB_ULPI if USB_PHY
- select USB_ULPI_VIEWPORT if USB_PHY
- help
- Enable support for NVIDIA Tegra132 SoC, based on the Denver
- ARMv8 CPU. The Tegra132 SoC is similar to the Tegra124 SoC,
- but contains an NVIDIA Denver CPU complex in place of
- Tegra124's "4+1" Cortex-A15 CPU complex.
-
config ARCH_SPRD
bool "Spreadtrum SoC platform"
help
dts-dirs += hisilicon
dts-dirs += marvell
dts-dirs += mediatek
+dts-dirs += nvidia
dts-dirs += qcom
dts-dirs += renesas
dts-dirs += rockchip
--- /dev/null
+dtb-$(CONFIG_ARCH_TEGRA_132_SOC) += tegra132-norrin.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-0000.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2371-2180.dtb
+dtb-$(CONFIG_ARCH_TEGRA_210_SOC) += tegra210-p2571.dtb
+
+always := $(dtb-y)
+clean-files := *.dtb
--- /dev/null
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "tegra132.dtsi"
+
+/ {
+ model = "NVIDIA Tegra132 Norrin";
+ compatible = "nvidia,norrin", "nvidia,tegra132", "nvidia,tegra124";
+
+ aliases {
+ rtc0 = "/i2c@0,7000d000/as3722@40";
+ rtc1 = "/rtc@0,7000e000";
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x0 0x80000000>;
+ };
+
+ host1x@0,50000000 {
+ hdmi@0,54280000 {
+ status = "disabled";
+
+ vdd-supply = <&vdd_3v3_hdmi>;
+ pll-supply = <&vdd_hdmi_pll>;
+ hdmi-supply = <&vdd_5v0_hdmi>;
+
+ nvidia,ddc-i2c-bus = <&hdmi_ddc>;
+ nvidia,hpd-gpio =
+ <&gpio TEGRA_GPIO(N, 7) GPIO_ACTIVE_HIGH>;
+ };
+
+ sor@0,54540000 {
+ status = "okay";
+
+ nvidia,dpaux = <&dpaux>;
+ nvidia,panel = <&panel>;
+ };
+
+ dpaux: dpaux@0,545c0000 {
+ vdd-supply = <&vdd_3v3_panel>;
+ status = "okay";
+ };
+ };
+
+ gpu@0,57000000 {
+ status = "okay";
+
+ vdd-supply = <&vdd_gpu>;
+ };
+
+ pinmux@0,70000868 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinmux_default>;
+
+ pinmux_default: pinmux@0 {
+ dap_mclk1_pw4 {
+ nvidia,pins = "dap_mclk1_pw4";
+ nvidia,function = "extperiph1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_din_pa4 {
+ nvidia,pins = "dap2_din_pa4";
+ nvidia,function = "i2s1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ dap2_dout_pa5 {
+ nvidia,pins = "dap2_dout_pa5",
+ "dap2_fs_pa2",
+ "dap2_sclk_pa3";
+ nvidia,function = "i2s1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ dap3_dout_pp2 {
+ nvidia,pins = "dap3_dout_pp2";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ dvfs_pwm_px0 {
+ nvidia,pins = "dvfs_pwm_px0",
+ "dvfs_clk_px2";
+ nvidia,function = "cldvfs";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ ulpi_clk_py0 {
+ nvidia,pins = "ulpi_clk_py0",
+ "ulpi_nxt_py2",
+ "ulpi_stp_py3";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ ulpi_dir_py1 {
+ nvidia,pins = "ulpi_dir_py1";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ cam_i2c_scl_pbb1 {
+ nvidia,pins = "cam_i2c_scl_pbb1",
+ "cam_i2c_sda_pbb2";
+ nvidia,function = "i2c3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ };
+ gen2_i2c_scl_pt5 {
+ nvidia,pins = "gen2_i2c_scl_pt5",
+ "gen2_i2c_sda_pt6";
+ nvidia,function = "i2c2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ };
+ pj7 {
+ nvidia,pins = "pj7";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ spdif_in_pk6 {
+ nvidia,pins = "spdif_in_pk6";
+ nvidia,function = "spdif";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ pk7 {
+ nvidia,pins = "pk7";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ pg4 {
+ nvidia,pins = "pg4",
+ "pg5",
+ "pg6",
+ "pi3";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ pg7 {
+ nvidia,pins = "pg7";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ ph1 {
+ nvidia,pins = "ph1";
+ nvidia,function = "pwm1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ pk0 {
+ nvidia,pins = "pk0",
+ "kb_row15_ps7",
+ "clk_32k_out_pa0";
+ nvidia,function = "soc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc1_clk_pz0 {
+ nvidia,pins = "sdmmc1_clk_pz0";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc1_cmd_pz1 {
+ nvidia,pins = "sdmmc1_cmd_pz1",
+ "sdmmc1_dat0_py7",
+ "sdmmc1_dat1_py6",
+ "sdmmc1_dat2_py5",
+ "sdmmc1_dat3_py4";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc3_clk_pa6 {
+ nvidia,pins = "sdmmc3_clk_pa6";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc3_cmd_pa7 {
+ nvidia,pins = "sdmmc3_cmd_pa7",
+ "sdmmc3_dat0_pb7",
+ "sdmmc3_dat1_pb6",
+ "sdmmc3_dat2_pb5",
+ "sdmmc3_dat3_pb4",
+ "kb_col4_pq4",
+ "sdmmc3_clk_lb_out_pee4",
+ "sdmmc3_clk_lb_in_pee5",
+ "sdmmc3_cd_n_pv2";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc4_clk_pcc4 {
+ nvidia,pins = "sdmmc4_clk_pcc4";
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sdmmc4_cmd_pt7 {
+ nvidia,pins = "sdmmc4_cmd_pt7",
+ "sdmmc4_dat0_paa0",
+ "sdmmc4_dat1_paa1",
+ "sdmmc4_dat2_paa2",
+ "sdmmc4_dat3_paa3",
+ "sdmmc4_dat4_paa4",
+ "sdmmc4_dat5_paa5",
+ "sdmmc4_dat6_paa6",
+ "sdmmc4_dat7_paa7";
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ mic_det_l {
+ nvidia,pins = "kb_row7_pr7";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ kb_row10_ps2 {
+ nvidia,pins = "kb_row10_ps2";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ kb_row9_ps1 {
+ nvidia,pins = "kb_row9_ps1";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_i2c_scl_pz6 {
+ nvidia,pins = "pwr_i2c_scl_pz6",
+ "pwr_i2c_sda_pz7";
+ nvidia,function = "i2cpwr";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ };
+ jtag_rtck {
+ nvidia,pins = "jtag_rtck";
+ nvidia,function = "rtck";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ clk_32k_in {
+ nvidia,pins = "clk_32k_in";
+ nvidia,function = "clk";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ core_pwr_req {
+ nvidia,pins = "core_pwr_req";
+ nvidia,function = "pwron";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ cpu_pwr_req {
+ nvidia,pins = "cpu_pwr_req";
+ nvidia,function = "cpu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ kb_col0_ap {
+ nvidia,pins = "kb_col0_pq0";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ en_vdd_sd {
+ nvidia,pins = "kb_row0_pr0";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ lid_open {
+ nvidia,pins = "kb_row4_pr4";
+ nvidia,function = "rsvd3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ pwr_int_n {
+ nvidia,pins = "pwr_int_n";
+ nvidia,function = "pmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ reset_out_n {
+ nvidia,pins = "reset_out_n";
+ nvidia,function = "reset_out_n";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ clk3_out_pee0 {
+ nvidia,pins = "clk3_out_pee0";
+ nvidia,function = "extperiph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ gen1_i2c_scl_pc4 {
+ nvidia,pins = "gen1_i2c_scl_pc4",
+ "gen1_i2c_sda_pc5";
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ };
+ hdmi_cec_pee3 {
+ nvidia,pins = "hdmi_cec_pee3";
+ nvidia,function = "cec";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ hdmi_int_pn7 {
+ nvidia,pins = "hdmi_int_pn7";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ ddc_scl_pv4 {
+ nvidia,pins = "ddc_scl_pv4",
+ "ddc_sda_pv5";
+ nvidia,function = "i2c4";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ nvidia,rcv-sel = <TEGRA_PIN_ENABLE>;
+ };
+ usb_vbus_en0_pn4 {
+ nvidia,pins = "usb_vbus_en0_pn4",
+ "usb_vbus_en1_pn5",
+ "usb_vbus_en2_pff1";
+ nvidia,function = "usb";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,lock = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ drive_sdio1 {
+ nvidia,pins = "drive_sdio1";
+ nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+ nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+ nvidia,pull-down-strength = <36>;
+ nvidia,pull-up-strength = <20>;
+ nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_SLOW>;
+ nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_SLOW>;
+ };
+ drive_sdio3 {
+ nvidia,pins = "drive_sdio3";
+ nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+ nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+ nvidia,pull-down-strength = <22>;
+ nvidia,pull-up-strength = <36>;
+ nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ };
+ drive_gma {
+ nvidia,pins = "drive_gma";
+ nvidia,high-speed-mode = <TEGRA_PIN_ENABLE>;
+ nvidia,schmitt = <TEGRA_PIN_DISABLE>;
+ nvidia,pull-down-strength = <2>;
+ nvidia,pull-up-strength = <1>;
+ nvidia,slew-rate-rising = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ nvidia,slew-rate-falling = <TEGRA_PIN_SLEW_RATE_FASTEST>;
+ nvidia,drive-type = <1>;
+ };
+ ac_ok {
+ nvidia,pins = "pj0";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ codec_irq_l {
+ nvidia,pins = "ph4";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ lcd_bl_en {
+ nvidia,pins = "ph2";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ touch_irq_l {
+ nvidia,pins = "gpio_w3_aud_pw3";
+ nvidia,function = "spi6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ tpm_davint_l {
+ nvidia,pins = "ph6";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ ts_irq_l {
+ nvidia,pins = "pk2";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ ts_reset_l {
+ nvidia,pins = "pk4";
+ nvidia,function = "gmi";
+ nvidia,pull = <1>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ ts_shdn_l {
+ nvidia,pins = "pk1";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ ph7 {
+ nvidia,pins = "ph7";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ sensor_irq_l {
+ nvidia,pins = "pi6";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ wifi_en {
+ nvidia,pins = "gpio_x7_aud_px7";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ chromeos_write_protect {
+ nvidia,pins = "kb_row1_pr1";
+ nvidia,function = "rsvd4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ hp_det_l {
+ nvidia,pins = "pi7";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ };
+ soc_warm_reset_l {
+ nvidia,pins = "pi5";
+ nvidia,function = "gmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ };
+ };
+ };
+
+ serial@0,70006000 {
+ status = "okay";
+ };
+
+ pwm: pwm@0,7000a000 {
+ status = "okay";
+ };
+
+ /* HDMI DDC */
+ hdmi_ddc: i2c@0,7000c700 {
+ status = "okay";
+ clock-frequency = <100000>;
+ };
+
+ i2c@0,7000d000 {
+ status = "okay";
+ clock-frequency = <400000>;
+
+ as3722: pmic@40 {
+ compatible = "ams,as3722";
+ reg = <0x40>;
+ interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
+
+ ams,system-power-controller;
+
+ #interrupt-cells = <2>;
+ interrupt-controller;
+
+ #gpio-cells = <2>;
+ gpio-controller;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&as3722_default>;
+
+ as3722_default: pinmux@0 {
+ gpio0 {
+ pins = "gpio0";
+ function = "gpio";
+ bias-pull-down;
+ };
+
+ gpio1 {
+ pins = "gpio1";
+ function = "gpio";
+ bias-pull-up;
+ };
+
+ gpio2_4_7 {
+ pins = "gpio2", "gpio4", "gpio7";
+ function = "gpio";
+ bias-pull-up;
+ };
+
+ gpio3 {
+ pins = "gpio3";
+ function = "gpio";
+ bias-high-impedance;
+ };
+
+ gpio5 {
+ pins = "gpio5";
+ function = "clk32k-out";
+ bias-pull-down;
+ };
+
+ gpio6 {
+ pins = "gpio6";
+ function = "clk32k-out";
+ bias-pull-down;
+ };
+ };
+
+ regulators {
+ vsup-sd2-supply = <&vdd_5v0_sys>;
+ vsup-sd3-supply = <&vdd_5v0_sys>;
+ vsup-sd4-supply = <&vdd_5v0_sys>;
+ vsup-sd5-supply = <&vdd_5v0_sys>;
+ vin-ldo0-supply = <&vdd_1v35_lp0>;
+ vin-ldo1-6-supply = <&vdd_3v3_sys>;
+ vin-ldo2-5-7-supply = <&vddio_1v8>;
+ vin-ldo3-4-supply = <&vdd_3v3_sys>;
+ vin-ldo9-10-supply = <&vdd_5v0_sys>;
+ vin-ldo11-supply = <&vdd_3v3_run>;
+
+ sd0 {
+ regulator-name = "+VDD_CPU_AP";
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-max-microamp = <3500000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ams,ext-control = <2>;
+ };
+
+ sd1 {
+ regulator-name = "+VDD_CORE";
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-max-microamp = <4000000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ams,ext-control = <1>;
+ };
+
+ vdd_1v35_lp0: sd2 {
+ regulator-name = "+1.35V_LP0(sd2)";
+ regulator-min-microvolt = <1350000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ sd3 {
+ regulator-name = "+1.35V_LP0(sd3)";
+ regulator-min-microvolt = <1350000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vdd_1v05_run: sd4 {
+ regulator-name = "+1.05V_RUN";
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ };
+
+ vddio_1v8: sd5 {
+ regulator-name = "+1.8V_VDDIO";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vdd_gpu: sd6 {
+ regulator-name = "+VDD_GPU_AP";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-min-microamp = <3500000>;
+ regulator-max-microamp = <3500000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ ldo0 {
+ regulator-name = "+1.05_RUN_AVDD";
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ams,ext-control = <1>;
+ };
+
+ ldo1 {
+ regulator-name = "+1.8V_RUN_CAM";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ ldo2 {
+ regulator-name = "+1.2V_GEN_AVDD";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ ldo3 {
+ regulator-name = "+1.00V_LP0_VDD_RTC";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ams,enable-tracking;
+ };
+
+ vdd_run_cam: ldo4 {
+ regulator-name = "+2.8V_RUN_CAM";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ };
+
+ ldo5 {
+ regulator-name = "+1.2V_RUN_CAM_FRONT";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ };
+
+ vddio_sdmmc3: ldo6 {
+ regulator-name = "+VDDIO_SDMMC3";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo7 {
+ regulator-name = "+1.05V_RUN_CAM_REAR";
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ };
+
+ ldo9 {
+ regulator-name = "+2.8V_RUN_TOUCH";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ };
+
+ ldo10 {
+ regulator-name = "+2.8V_RUN_CAM_AF";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ };
+
+ ldo11 {
+ regulator-name = "+1.8V_RUN_VPP_FUSE";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+ };
+ };
+ };
+
+ spi@0,7000d400 {
+ status = "okay";
+
+ ec: cros-ec@0 {
+ compatible = "google,cros-ec-spi";
+ spi-max-frequency = <3000000>;
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(C, 7) IRQ_TYPE_LEVEL_LOW>;
+ reg = <0>;
+
+ google,cros-ec-spi-msg-delay = <2000>;
+
+ i2c_20: i2c-tunnel {
+ compatible = "google,cros-ec-i2c-tunnel";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ google,remote-bus = <0>;
+
+ charger: bq24735 {
+ compatible = "ti,bq24735";
+ reg = <0x9>;
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(J, 0)
+ GPIO_ACTIVE_HIGH>;
+ ti,ac-detect-gpios = <&gpio
+ TEGRA_GPIO(J, 0)
+ GPIO_ACTIVE_HIGH>;
+ };
+
+ battery: smart-battery {
+ compatible = "sbs,sbs-battery";
+ reg = <0xb>;
+ battery-name = "battery";
+ sbs,i2c-retry-count = <2>;
+ sbs,poll-retry-count = <10>;
+ /* power-supplies = <&charger>; */
+ };
+ };
+
+ keyboard-controller {
+ compatible = "google,cros-ec-keyb";
+ keypad,num-rows = <8>;
+ keypad,num-columns = <13>;
+ google,needs-ghost-filter;
+ linux,keymap =
+ <MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)
+ MATRIX_KEY(0x00, 0x02, KEY_F1)
+ MATRIX_KEY(0x00, 0x03, KEY_B)
+ MATRIX_KEY(0x00, 0x04, KEY_F10)
+ MATRIX_KEY(0x00, 0x06, KEY_N)
+ MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
+ MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
+
+ MATRIX_KEY(0x01, 0x01, KEY_ESC)
+ MATRIX_KEY(0x01, 0x02, KEY_F4)
+ MATRIX_KEY(0x01, 0x03, KEY_G)
+ MATRIX_KEY(0x01, 0x04, KEY_F7)
+ MATRIX_KEY(0x01, 0x06, KEY_H)
+ MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
+ MATRIX_KEY(0x01, 0x09, KEY_F9)
+ MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
+
+ MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
+ MATRIX_KEY(0x02, 0x01, KEY_TAB)
+ MATRIX_KEY(0x02, 0x02, KEY_F3)
+ MATRIX_KEY(0x02, 0x03, KEY_T)
+ MATRIX_KEY(0x02, 0x04, KEY_F6)
+ MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)
+ MATRIX_KEY(0x02, 0x06, KEY_Y)
+ MATRIX_KEY(0x02, 0x07, KEY_102ND)
+ MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
+ MATRIX_KEY(0x02, 0x09, KEY_F8)
+
+ MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
+ MATRIX_KEY(0x03, 0x02, KEY_F2)
+ MATRIX_KEY(0x03, 0x03, KEY_5)
+ MATRIX_KEY(0x03, 0x04, KEY_F5)
+ MATRIX_KEY(0x03, 0x06, KEY_6)
+ MATRIX_KEY(0x03, 0x08, KEY_MINUS)
+ MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
+
+ MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
+ MATRIX_KEY(0x04, 0x01, KEY_A)
+ MATRIX_KEY(0x04, 0x02, KEY_D)
+ MATRIX_KEY(0x04, 0x03, KEY_F)
+ MATRIX_KEY(0x04, 0x04, KEY_S)
+ MATRIX_KEY(0x04, 0x05, KEY_K)
+ MATRIX_KEY(0x04, 0x06, KEY_J)
+ MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)
+ MATRIX_KEY(0x04, 0x09, KEY_L)
+ MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)
+ MATRIX_KEY(0x04, 0x0b, KEY_ENTER)
+
+ MATRIX_KEY(0x05, 0x01, KEY_Z)
+ MATRIX_KEY(0x05, 0x02, KEY_C)
+ MATRIX_KEY(0x05, 0x03, KEY_V)
+ MATRIX_KEY(0x05, 0x04, KEY_X)
+ MATRIX_KEY(0x05, 0x05, KEY_COMMA)
+ MATRIX_KEY(0x05, 0x06, KEY_M)
+ MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)
+ MATRIX_KEY(0x05, 0x08, KEY_SLASH)
+ MATRIX_KEY(0x05, 0x09, KEY_DOT)
+ MATRIX_KEY(0x05, 0x0b, KEY_SPACE)
+
+ MATRIX_KEY(0x06, 0x01, KEY_1)
+ MATRIX_KEY(0x06, 0x02, KEY_3)
+ MATRIX_KEY(0x06, 0x03, KEY_4)
+ MATRIX_KEY(0x06, 0x04, KEY_2)
+ MATRIX_KEY(0x06, 0x05, KEY_8)
+ MATRIX_KEY(0x06, 0x06, KEY_7)
+ MATRIX_KEY(0x06, 0x08, KEY_0)
+ MATRIX_KEY(0x06, 0x09, KEY_9)
+ MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT)
+ MATRIX_KEY(0x06, 0x0b, KEY_DOWN)
+ MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)
+
+ MATRIX_KEY(0x07, 0x01, KEY_Q)
+ MATRIX_KEY(0x07, 0x02, KEY_E)
+ MATRIX_KEY(0x07, 0x03, KEY_R)
+ MATRIX_KEY(0x07, 0x04, KEY_W)
+ MATRIX_KEY(0x07, 0x05, KEY_I)
+ MATRIX_KEY(0x07, 0x06, KEY_U)
+ MATRIX_KEY(0x07, 0x07, KEY_RIGHTSHIFT)
+ MATRIX_KEY(0x07, 0x08, KEY_P)
+ MATRIX_KEY(0x07, 0x09, KEY_O)
+ MATRIX_KEY(0x07, 0x0b, KEY_UP)
+ MATRIX_KEY(0x07, 0x0c, KEY_LEFT)>;
+ };
+ };
+ };
+
+ pmc@0,7000e400 {
+ nvidia,invert-interrupt;
+ nvidia,suspend-mode = <0>;
+ #wake-cells = <3>;
+ nvidia,cpu-pwr-good-time = <500>;
+ nvidia,cpu-pwr-off-time = <300>;
+ nvidia,core-pwr-good-time = <641 3845>;
+ nvidia,core-pwr-off-time = <61036>;
+ nvidia,core-power-req-active-high;
+ nvidia,sys-clock-req-active-high;
+ nvidia,reset-gpio = <&gpio TEGRA_GPIO(I, 5) GPIO_ACTIVE_LOW>;
+ };
+
+ /* WIFI/BT module */
+ sdhci@0,700b0000 {
+ status = "disabled";
+ };
+
+ /* external SD/MMC */
+ sdhci@0,700b0400 {
+ cd-gpios = <&gpio TEGRA_GPIO(V, 2) GPIO_ACTIVE_LOW>;
+ power-gpios = <&gpio TEGRA_GPIO(R, 0) GPIO_ACTIVE_HIGH>;
+ wp-gpios = <&gpio TEGRA_GPIO(Q, 4) GPIO_ACTIVE_HIGH>;
+ status = "okay";
+ bus-width = <4>;
+ vqmmc-supply = <&vddio_sdmmc3>;
+ };
+
+ /* EMMC 4.51 */
+ sdhci@0,700b0600 {
+ status = "okay";
+ bus-width = <8>;
+ non-removable;
+ };
+
+ usb@0,7d000000 {
+ status = "okay";
+ };
+
+ usb-phy@0,7d000000 {
+ status = "okay";
+ vbus-supply = <&vdd_usb1_vbus>;
+ };
+
+ usb@0,7d004000 {
+ status = "okay";
+ };
+
+ usb-phy@0,7d004000 {
+ status = "okay";
+ vbus-supply = <&vdd_run_cam>;
+ };
+
+ usb@0,7d008000 {
+ status = "okay";
+ };
+
+ usb-phy@0,7d008000 {
+ status = "okay";
+ vbus-supply = <&vdd_usb3_vbus>;
+ };
+
+ backlight: backlight {
+ compatible = "pwm-backlight";
+
+ enable-gpios = <&gpio TEGRA_GPIO(H, 2) GPIO_ACTIVE_HIGH>;
+ power-supply = <&vdd_led>;
+ pwms = <&pwm 1 1000000>;
+
+ brightness-levels = <0 4 8 16 32 64 128 255>;
+ default-brightness-level = <6>;
+
+ backlight-boot-off;
+ };
+
+ clocks {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clk32k_in: clock@0 {
+ compatible = "fixed-clock";
+ reg=<0>;
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ lid {
+ label = "Lid";
+ gpios = <&gpio TEGRA_GPIO(R, 4) GPIO_ACTIVE_LOW>;
+ linux,input-type = <5>;
+ linux,code = <0>;
+ debounce-interval = <1>;
+ gpio-key,wakeup;
+ };
+
+ power {
+ label = "Power";
+ gpios = <&gpio TEGRA_GPIO(Q, 0) GPIO_ACTIVE_LOW>;
+ linux,code = <KEY_POWER>;
+ debounce-interval = <10>;
+ gpio-key,wakeup;
+ };
+ };
+
+ panel: panel {
+ compatible = "innolux,n116bge", "simple-panel";
+ backlight = <&backlight>;
+ ddc-i2c-bus = <&dpaux>;
+ };
+
+ regulators {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ vdd_mux: regulator@0 {
+ compatible = "regulator-fixed";
+ reg = <0>;
+ regulator-name = "+VDD_MUX";
+ regulator-min-microvolt = <19000000>;
+ regulator-max-microvolt = <19000000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vdd_5v0_sys: regulator@1 {
+ compatible = "regulator-fixed";
+ reg = <1>;
+ regulator-name = "+5V_SYS";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-always-on;
+ regulator-boot-on;
+ vin-supply = <&vdd_mux>;
+ };
+
+ vdd_3v3_sys: regulator@2 {
+ compatible = "regulator-fixed";
+ reg = <2>;
+ regulator-name = "+3.3V_SYS";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ regulator-boot-on;
+ vin-supply = <&vdd_mux>;
+ };
+
+ vdd_3v3_run: regulator@3 {
+ compatible = "regulator-fixed";
+ reg = <3>;
+ regulator-name = "+3.3V_RUN";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ regulator-boot-on;
+ gpio = <&as3722 1 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_sys>;
+ };
+
+ vdd_3v3_hdmi: regulator@4 {
+ compatible = "regulator-fixed";
+ reg = <4>;
+ regulator-name = "+3.3V_AVDD_HDMI_AP_GATED";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ vin-supply = <&vdd_3v3_run>;
+ };
+
+ vdd_led: regulator@5 {
+ compatible = "regulator-fixed";
+ reg = <5>;
+ regulator-name = "+VDD_LED";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&gpio TEGRA_GPIO(P, 2) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_mux>;
+ };
+
+ vdd_usb1_vbus: regulator@6 {
+ compatible = "regulator-fixed";
+ reg = <6>;
+ regulator-name = "+5V_USB_HS";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ gpio = <&gpio TEGRA_GPIO(N, 4) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ gpio-open-drain;
+ vin-supply = <&vdd_5v0_sys>;
+ };
+
+ vdd_usb3_vbus: regulator@7 {
+ compatible = "regulator-fixed";
+ reg = <7>;
+ regulator-name = "+5V_USB_SS";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ gpio = <&gpio TEGRA_GPIO(N, 5) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ gpio-open-drain;
+ vin-supply = <&vdd_5v0_sys>;
+ };
+
+ vdd_3v3_panel: regulator@8 {
+ compatible = "regulator-fixed";
+ reg = <8>;
+ regulator-name = "+3.3V_PANEL";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ gpio = <&as3722 4 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_3v3_sys>;
+ };
+
+ vdd_hdmi_pll: regulator@9 {
+ compatible = "regulator-fixed";
+ reg = <9>;
+ regulator-name = "+1.05V_RUN_AVDD_HDMI_PLL_AP_GATE";
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ gpio = <&gpio TEGRA_GPIO(H, 7) GPIO_ACTIVE_LOW>;
+ vin-supply = <&vdd_1v05_run>;
+ };
+
+ vdd_5v0_hdmi: regulator@10 {
+ compatible = "regulator-fixed";
+ reg = <10>;
+ regulator-name = "+5V_HDMI_CON";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ gpio = <&gpio TEGRA_GPIO(K, 6) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ vin-supply = <&vdd_5v0_sys>;
+ };
+
+ vdd_5v0_ts: regulator@11 {
+ compatible = "regulator-fixed";
+ reg = <11>;
+ regulator-name = "+5V_VDD_TS";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-always-on;
+ regulator-boot-on;
+ gpio = <&gpio TEGRA_GPIO(K, 1) GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
+ };
+};
--- /dev/null
+#include <dt-bindings/clock/tegra124-car.h>
+#include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra124-mc.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra-xusb.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+ compatible = "nvidia,tegra132", "nvidia,tegra124";
+ interrupt-parent = <&lic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ pcie-controller@0,01003000 {
+ compatible = "nvidia,tegra124-pcie";
+ device_type = "pci";
+ reg = <0x0 0x01003000 0x0 0x00000800 /* PADS registers */
+ 0x0 0x01003800 0x0 0x00000800 /* AFI registers */
+ 0x0 0x02000000 0x0 0x10000000>; /* configuration space */
+ reg-names = "pads", "afi", "cs";
+ interrupts = <GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>, /* controller interrupt */
+ <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>; /* MSI interrupt */
+ interrupt-names = "intr", "msi";
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &gic GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+
+ bus-range = <0x00 0xff>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ranges = <0x82000000 0 0x01000000 0x0 0x01000000 0 0x00001000 /* port 0 configuration space */
+ 0x82000000 0 0x01001000 0x0 0x01001000 0 0x00001000 /* port 1 configuration space */
+ 0x81000000 0 0x0 0x0 0x12000000 0 0x00010000 /* downstream I/O (64 KiB) */
+ 0x82000000 0 0x13000000 0x0 0x13000000 0 0x0d000000 /* non-prefetchable memory (208 MiB) */
+ 0xc2000000 0 0x20000000 0x0 0x20000000 0 0x20000000>; /* prefetchable memory (512 MiB) */
+
+ clocks = <&tegra_car TEGRA124_CLK_PCIE>,
+ <&tegra_car TEGRA124_CLK_AFI>,
+ <&tegra_car TEGRA124_CLK_PLL_E>,
+ <&tegra_car TEGRA124_CLK_CML0>;
+ clock-names = "pex", "afi", "pll_e", "cml";
+ resets = <&tegra_car 70>,
+ <&tegra_car 72>,
+ <&tegra_car 74>;
+ reset-names = "pex", "afi", "pcie_x";
+ status = "disabled";
+
+ phys = <&padctl TEGRA_XUSB_PADCTL_PCIE>;
+ phy-names = "pcie";
+
+ pci@1,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82000800 0 0x01000000 0 0x1000>;
+ reg = <0x000800 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges;
+
+ nvidia,num-lanes = <2>;
+ };
+
+ pci@2,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82001000 0 0x01001000 0 0x1000>;
+ reg = <0x001000 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ ranges;
+
+ nvidia,num-lanes = <1>;
+ };
+ };
+
+ host1x@0,50000000 {
+ compatible = "nvidia,tegra124-host1x", "simple-bus";
+ reg = <0x0 0x50000000 0x0 0x00034000>;
+ interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, /* syncpt */
+ <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; /* general */
+ clocks = <&tegra_car TEGRA124_CLK_HOST1X>;
+ clock-names = "host1x";
+ resets = <&tegra_car 28>;
+ reset-names = "host1x";
+
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ ranges = <0 0x54000000 0 0x54000000 0 0x01000000>;
+
+ dc@0,54200000 {
+ compatible = "nvidia,tegra124-dc";
+ reg = <0x0 0x54200000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_DISP1>,
+ <&tegra_car TEGRA124_CLK_PLL_P>;
+ clock-names = "dc", "parent";
+ resets = <&tegra_car 27>;
+ reset-names = "dc";
+
+ iommus = <&mc TEGRA_SWGROUP_DC>;
+
+ nvidia,head = <0>;
+ };
+
+ dc@0,54240000 {
+ compatible = "nvidia,tegra124-dc";
+ reg = <0x0 0x54240000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_DISP2>,
+ <&tegra_car TEGRA124_CLK_PLL_P>;
+ clock-names = "dc", "parent";
+ resets = <&tegra_car 26>;
+ reset-names = "dc";
+
+ iommus = <&mc TEGRA_SWGROUP_DCB>;
+
+ nvidia,head = <1>;
+ };
+
+ hdmi@0,54280000 {
+ compatible = "nvidia,tegra124-hdmi";
+ reg = <0x0 0x54280000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_HDMI>,
+ <&tegra_car TEGRA124_CLK_PLL_D2_OUT0>;
+ clock-names = "hdmi", "parent";
+ resets = <&tegra_car 51>;
+ reset-names = "hdmi";
+ status = "disabled";
+ };
+
+ sor@0,54540000 {
+ compatible = "nvidia,tegra124-sor";
+ reg = <0x0 0x54540000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_SOR0>,
+ <&tegra_car TEGRA124_CLK_PLL_D_OUT0>,
+ <&tegra_car TEGRA124_CLK_PLL_DP>,
+ <&tegra_car TEGRA124_CLK_CLK_M>;
+ clock-names = "sor", "parent", "dp", "safe";
+ resets = <&tegra_car 182>;
+ reset-names = "sor";
+ status = "disabled";
+ };
+
+ dpaux: dpaux@0,545c0000 {
+ compatible = "nvidia,tegra124-dpaux";
+ reg = <0x0 0x545c0000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_DPAUX>,
+ <&tegra_car TEGRA124_CLK_PLL_DP>;
+ clock-names = "dpaux", "parent";
+ resets = <&tegra_car 181>;
+ reset-names = "dpaux";
+ status = "disabled";
+ };
+ };
+
+ gic: interrupt-controller@0,50041000 {
+ compatible = "arm,cortex-a15-gic";
+ #interrupt-cells = <3>;
+ interrupt-controller;
+ reg = <0x0 0x50041000 0x0 0x1000>,
+ <0x0 0x50042000 0x0 0x2000>,
+ <0x0 0x50044000 0x0 0x2000>,
+ <0x0 0x50046000 0x0 0x2000>;
+ interrupts = <GIC_PPI 9
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+ interrupt-parent = <&gic>;
+ };
+
+ gpu@0,57000000 {
+ compatible = "nvidia,gk20a";
+ reg = <0x0 0x57000000 0x0 0x01000000>,
+ <0x0 0x58000000 0x0 0x01000000>;
+ interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "stall", "nonstall";
+ clocks = <&tegra_car TEGRA124_CLK_GPU>,
+ <&tegra_car TEGRA124_CLK_PLL_P_OUT5>;
+ clock-names = "gpu", "pwr";
+ resets = <&tegra_car 184>;
+ reset-names = "gpu";
+ status = "disabled";
+ };
+
+ lic: interrupt-controller@60004000 {
+ compatible = "nvidia,tegra124-ictlr", "nvidia,tegra30-ictlr";
+ reg = <0x0 0x60004000 0x0 0x100>,
+ <0x0 0x60004100 0x0 0x100>,
+ <0x0 0x60004200 0x0 0x100>,
+ <0x0 0x60004300 0x0 0x100>,
+ <0x0 0x60004400 0x0 0x100>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ interrupt-parent = <&gic>;
+ };
+
+ timer@0,60005000 {
+ compatible = "nvidia,tegra124-timer", "nvidia,tegra20-timer";
+ reg = <0x0 0x60005000 0x0 0x400>;
+ interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_TIMER>;
+ clock-names = "timer";
+ };
+
+ tegra_car: clock@0,60006000 {
+ compatible = "nvidia,tegra132-car";
+ reg = <0x0 0x60006000 0x0 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ nvidia,external-memory-controller = <&emc>;
+ };
+
+ flow-controller@0,60007000 {
+ compatible = "nvidia,tegra124-flowctrl";
+ reg = <0x0 0x60007000 0x0 0x1000>;
+ };
+
+ actmon@0,6000c800 {
+ compatible = "nvidia,tegra124-actmon";
+ reg = <0x0 0x6000c800 0x0 0x400>;
+ interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
+ <&tegra_car TEGRA124_CLK_EMC>;
+ clock-names = "actmon", "emc";
+ resets = <&tegra_car 119>;
+ reset-names = "actmon";
+ };
+
+ gpio: gpio@0,6000d000 {
+ compatible = "nvidia,tegra124-gpio", "nvidia,tegra30-gpio";
+ reg = <0x0 0x6000d000 0x0 0x1000>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ };
+
+ apbdma: dma@0,60020000 {
+ compatible = "nvidia,tegra124-apbdma", "nvidia,tegra148-apbdma";
+ reg = <0x0 0x60020000 0x0 0x1400>;
+ interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_APBDMA>;
+ clock-names = "dma";
+ resets = <&tegra_car 34>;
+ reset-names = "dma";
+ #dma-cells = <1>;
+ };
+
+ apbmisc@0,70000800 {
+ compatible = "nvidia,tegra124-apbmisc", "nvidia,tegra20-apbmisc";
+ reg = <0x0 0x70000800 0x0 0x64>, /* Chip revision */
+ <0x0 0x7000e864 0x0 0x04>; /* Strapping options */
+ };
+
+ pinmux: pinmux@0,70000868 {
+ compatible = "nvidia,tegra124-pinmux";
+ reg = <0x0 0x70000868 0x0 0x164>, /* Pad control registers */
+ <0x0 0x70003000 0x0 0x434>, /* Mux registers */
+ <0x0 0x70000820 0x0 0x008>; /* MIPI pad control */
+ };
+
+ /*
+ * There are two serial driver i.e. 8250 based simple serial
+ * driver and APB DMA based serial driver for higher baudrate
+ * and performace. To enable the 8250 based driver, the compatible
+ * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
+ * the APB DMA based serial driver, the comptible is
+ * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
+ */
+ uarta: serial@0,70006000 {
+ compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+ reg = <0x0 0x70006000 0x0 0x40>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_UARTA>;
+ clock-names = "serial";
+ resets = <&tegra_car 6>;
+ reset-names = "serial";
+ dmas = <&apbdma 8>, <&apbdma 8>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ uartb: serial@0,70006040 {
+ compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+ reg = <0x0 0x70006040 0x0 0x40>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_UARTB>;
+ clock-names = "serial";
+ resets = <&tegra_car 7>;
+ reset-names = "serial";
+ dmas = <&apbdma 9>, <&apbdma 9>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ uartc: serial@0,70006200 {
+ compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+ reg = <0x0 0x70006200 0x0 0x40>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_UARTC>;
+ clock-names = "serial";
+ resets = <&tegra_car 55>;
+ reset-names = "serial";
+ dmas = <&apbdma 10>, <&apbdma 10>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ uartd: serial@0,70006300 {
+ compatible = "nvidia,tegra124-uart", "nvidia,tegra20-uart";
+ reg = <0x0 0x70006300 0x0 0x40>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_UARTD>;
+ clock-names = "serial";
+ resets = <&tegra_car 65>;
+ reset-names = "serial";
+ dmas = <&apbdma 19>, <&apbdma 19>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ pwm: pwm@0,7000a000 {
+ compatible = "nvidia,tegra124-pwm", "nvidia,tegra20-pwm";
+ reg = <0x0 0x7000a000 0x0 0x100>;
+ #pwm-cells = <2>;
+ clocks = <&tegra_car TEGRA124_CLK_PWM>;
+ clock-names = "pwm";
+ resets = <&tegra_car 17>;
+ reset-names = "pwm";
+ status = "disabled";
+ };
+
+ i2c@0,7000c000 {
+ compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000c000 0x0 0x100>;
+ interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_I2C1>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 12>;
+ reset-names = "i2c";
+ dmas = <&apbdma 21>, <&apbdma 21>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000c400 {
+ compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000c400 0x0 0x100>;
+ interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_I2C2>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 54>;
+ reset-names = "i2c";
+ dmas = <&apbdma 22>, <&apbdma 22>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000c500 {
+ compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000c500 0x0 0x100>;
+ interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_I2C3>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 67>;
+ reset-names = "i2c";
+ dmas = <&apbdma 23>, <&apbdma 23>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000c700 {
+ compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000c700 0x0 0x100>;
+ interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_I2C4>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 103>;
+ reset-names = "i2c";
+ dmas = <&apbdma 26>, <&apbdma 26>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000d000 {
+ compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000d000 0x0 0x100>;
+ interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_I2C5>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 47>;
+ reset-names = "i2c";
+ dmas = <&apbdma 24>, <&apbdma 24>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000d100 {
+ compatible = "nvidia,tegra124-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000d100 0x0 0x100>;
+ interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_I2C6>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 166>;
+ reset-names = "i2c";
+ dmas = <&apbdma 30>, <&apbdma 30>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000d400 {
+ compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000d400 0x0 0x200>;
+ interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_SBC1>;
+ clock-names = "spi";
+ resets = <&tegra_car 41>;
+ reset-names = "spi";
+ dmas = <&apbdma 15>, <&apbdma 15>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000d600 {
+ compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000d600 0x0 0x200>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_SBC2>;
+ clock-names = "spi";
+ resets = <&tegra_car 44>;
+ reset-names = "spi";
+ dmas = <&apbdma 16>, <&apbdma 16>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000d800 {
+ compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000d800 0x0 0x200>;
+ interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_SBC3>;
+ clock-names = "spi";
+ resets = <&tegra_car 46>;
+ reset-names = "spi";
+ dmas = <&apbdma 17>, <&apbdma 17>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000da00 {
+ compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000da00 0x0 0x200>;
+ interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_SBC4>;
+ clock-names = "spi";
+ resets = <&tegra_car 68>;
+ reset-names = "spi";
+ dmas = <&apbdma 18>, <&apbdma 18>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000dc00 {
+ compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000dc00 0x0 0x200>;
+ interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_SBC5>;
+ clock-names = "spi";
+ resets = <&tegra_car 104>;
+ reset-names = "spi";
+ dmas = <&apbdma 27>, <&apbdma 27>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000de00 {
+ compatible = "nvidia,tegra124-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000de00 0x0 0x200>;
+ interrupts = <GIC_SPI 79 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA124_CLK_SBC6>;
+ clock-names = "spi";
+ resets = <&tegra_car 105>;
+ reset-names = "spi";
+ dmas = <&apbdma 28>, <&apbdma 28>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ rtc@0,7000e000 {
+ compatible = "nvidia,tegra124-rtc", "nvidia,tegra20-rtc";
+ reg = <0x0 0x7000e000 0x0 0x100>;
+ interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_RTC>;
+ clock-names = "rtc";
+ };
+
+ pmc@0,7000e400 {
+ compatible = "nvidia,tegra124-pmc";
+ reg = <0x0 0x7000e400 0x0 0x400>;
+ clocks = <&tegra_car TEGRA124_CLK_PCLK>, <&clk32k_in>;
+ clock-names = "pclk", "clk32k_in";
+ };
+
+ fuse@0,7000f800 {
+ compatible = "nvidia,tegra124-efuse";
+ reg = <0x0 0x7000f800 0x0 0x400>;
+ clocks = <&tegra_car TEGRA124_CLK_FUSE>;
+ clock-names = "fuse";
+ resets = <&tegra_car 39>;
+ reset-names = "fuse";
+ };
+
+ mc: memory-controller@0,70019000 {
+ compatible = "nvidia,tegra132-mc";
+ reg = <0x0 0x70019000 0x0 0x1000>;
+ clocks = <&tegra_car TEGRA124_CLK_MC>;
+ clock-names = "mc";
+
+ interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+ #iommu-cells = <1>;
+ };
+
+ emc: emc@0,7001b000 {
+ compatible = "nvidia,tegra132-emc", "nvidia,tegra124-emc";
+ reg = <0x0 0x7001b000 0x0 0x1000>;
+
+ nvidia,memory-controller = <&mc>;
+ };
+
+ sata@0,70020000 {
+ compatible = "nvidia,tegra124-ahci";
+ reg = <0x0 0x70027000 0x0 0x2000>, /* AHCI */
+ <0x0 0x70020000 0x0 0x7000>; /* SATA */
+ interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_SATA>,
+ <&tegra_car TEGRA124_CLK_SATA_OOB>,
+ <&tegra_car TEGRA124_CLK_CML1>,
+ <&tegra_car TEGRA124_CLK_PLL_E>;
+ clock-names = "sata", "sata-oob", "cml1", "pll_e";
+ resets = <&tegra_car 124>,
+ <&tegra_car 123>,
+ <&tegra_car 129>;
+ reset-names = "sata", "sata-oob", "sata-cold";
+ phys = <&padctl TEGRA_XUSB_PADCTL_SATA>;
+ phy-names = "sata-phy";
+ status = "disabled";
+ };
+
+ hda@0,70030000 {
+ compatible = "nvidia,tegra132-hda", "nvidia,tegra124-hda",
+ "nvidia,tegra30-hda";
+ reg = <0x0 0x70030000 0x0 0x10000>;
+ interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_HDA>,
+ <&tegra_car TEGRA124_CLK_HDA2HDMI>,
+ <&tegra_car TEGRA124_CLK_HDA2CODEC_2X>;
+ clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+ resets = <&tegra_car 125>, /* hda */
+ <&tegra_car 128>, /* hda2hdmi */
+ <&tegra_car 111>; /* hda2codec_2x */
+ reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+ status = "disabled";
+ };
+
+ padctl: padctl@0,7009f000 {
+ compatible = "nvidia,tegra132-xusb-padctl",
+ "nvidia,tegra124-xusb-padctl";
+ reg = <0x0 0x7009f000 0x0 0x1000>;
+ resets = <&tegra_car 142>;
+ reset-names = "padctl";
+
+ #phy-cells = <1>;
+
+ phys {
+ pcie-0 {
+ status = "disabled";
+ };
+
+ sata-0 {
+ status = "disabled";
+ };
+
+ usb3-0 {
+ status = "disabled";
+ };
+
+ usb3-1 {
+ status = "disabled";
+ };
+
+ utmi-0 {
+ status = "disabled";
+ };
+
+ utmi-1 {
+ status = "disabled";
+ };
+
+ utmi-2 {
+ status = "disabled";
+ };
+ };
+ };
+
+ sdhci@0,700b0000 {
+ compatible = "nvidia,tegra124-sdhci";
+ reg = <0x0 0x700b0000 0x0 0x200>;
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_SDMMC1>;
+ clock-names = "sdhci";
+ resets = <&tegra_car 14>;
+ reset-names = "sdhci";
+ status = "disabled";
+ };
+
+ sdhci@0,700b0200 {
+ compatible = "nvidia,tegra124-sdhci";
+ reg = <0x0 0x700b0200 0x0 0x200>;
+ interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_SDMMC2>;
+ clock-names = "sdhci";
+ resets = <&tegra_car 9>;
+ reset-names = "sdhci";
+ status = "disabled";
+ };
+
+ sdhci@0,700b0400 {
+ compatible = "nvidia,tegra124-sdhci";
+ reg = <0x0 0x700b0400 0x0 0x200>;
+ interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_SDMMC3>;
+ clock-names = "sdhci";
+ resets = <&tegra_car 69>;
+ reset-names = "sdhci";
+ status = "disabled";
+ };
+
+ sdhci@0,700b0600 {
+ compatible = "nvidia,tegra124-sdhci";
+ reg = <0x0 0x700b0600 0x0 0x200>;
+ interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_SDMMC4>;
+ clock-names = "sdhci";
+ resets = <&tegra_car 15>;
+ reset-names = "sdhci";
+ status = "disabled";
+ };
+
+ soctherm: thermal-sensor@0,700e2000 {
+ compatible = "nvidia,tegra124-soctherm";
+ reg = <0x0 0x700e2000 0x0 0x1000>;
+ interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+ <&tegra_car TEGRA124_CLK_SOC_THERM>;
+ clock-names = "tsensor", "soctherm";
+ resets = <&tegra_car 78>;
+ reset-names = "soctherm";
+ #thermal-sensor-cells = <1>;
+ };
+
+ ahub@0,70300000 {
+ compatible = "nvidia,tegra124-ahub";
+ reg = <0x0 0x70300000 0x0 0x200>,
+ <0x0 0x70300800 0x0 0x800>,
+ <0x0 0x70300200 0x0 0x600>;
+ interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_D_AUDIO>,
+ <&tegra_car TEGRA124_CLK_APBIF>;
+ clock-names = "d_audio", "apbif";
+ resets = <&tegra_car 106>, /* d_audio */
+ <&tegra_car 107>, /* apbif */
+ <&tegra_car 30>, /* i2s0 */
+ <&tegra_car 11>, /* i2s1 */
+ <&tegra_car 18>, /* i2s2 */
+ <&tegra_car 101>, /* i2s3 */
+ <&tegra_car 102>, /* i2s4 */
+ <&tegra_car 108>, /* dam0 */
+ <&tegra_car 109>, /* dam1 */
+ <&tegra_car 110>, /* dam2 */
+ <&tegra_car 10>, /* spdif */
+ <&tegra_car 153>, /* amx */
+ <&tegra_car 185>, /* amx1 */
+ <&tegra_car 154>, /* adx */
+ <&tegra_car 180>, /* adx1 */
+ <&tegra_car 186>, /* afc0 */
+ <&tegra_car 187>, /* afc1 */
+ <&tegra_car 188>, /* afc2 */
+ <&tegra_car 189>, /* afc3 */
+ <&tegra_car 190>, /* afc4 */
+ <&tegra_car 191>; /* afc5 */
+ reset-names = "d_audio", "apbif", "i2s0", "i2s1", "i2s2",
+ "i2s3", "i2s4", "dam0", "dam1", "dam2",
+ "spdif", "amx", "amx1", "adx", "adx1",
+ "afc0", "afc1", "afc2", "afc3", "afc4", "afc5";
+ dmas = <&apbdma 1>, <&apbdma 1>,
+ <&apbdma 2>, <&apbdma 2>,
+ <&apbdma 3>, <&apbdma 3>,
+ <&apbdma 4>, <&apbdma 4>,
+ <&apbdma 6>, <&apbdma 6>,
+ <&apbdma 7>, <&apbdma 7>,
+ <&apbdma 12>, <&apbdma 12>,
+ <&apbdma 13>, <&apbdma 13>,
+ <&apbdma 14>, <&apbdma 14>,
+ <&apbdma 29>, <&apbdma 29>;
+ dma-names = "rx0", "tx0", "rx1", "tx1", "rx2", "tx2",
+ "rx3", "tx3", "rx4", "tx4", "rx5", "tx5",
+ "rx6", "tx6", "rx7", "tx7", "rx8", "tx8",
+ "rx9", "tx9";
+ ranges;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ tegra_i2s0: i2s@0,70301000 {
+ compatible = "nvidia,tegra124-i2s";
+ reg = <0x0 0x70301000 0x0 0x100>;
+ nvidia,ahub-cif-ids = <4 4>;
+ clocks = <&tegra_car TEGRA124_CLK_I2S0>;
+ clock-names = "i2s";
+ resets = <&tegra_car 30>;
+ reset-names = "i2s";
+ status = "disabled";
+ };
+
+ tegra_i2s1: i2s@0,70301100 {
+ compatible = "nvidia,tegra124-i2s";
+ reg = <0x0 0x70301100 0x0 0x100>;
+ nvidia,ahub-cif-ids = <5 5>;
+ clocks = <&tegra_car TEGRA124_CLK_I2S1>;
+ clock-names = "i2s";
+ resets = <&tegra_car 11>;
+ reset-names = "i2s";
+ status = "disabled";
+ };
+
+ tegra_i2s2: i2s@0,70301200 {
+ compatible = "nvidia,tegra124-i2s";
+ reg = <0x0 0x70301200 0x0 0x100>;
+ nvidia,ahub-cif-ids = <6 6>;
+ clocks = <&tegra_car TEGRA124_CLK_I2S2>;
+ clock-names = "i2s";
+ resets = <&tegra_car 18>;
+ reset-names = "i2s";
+ status = "disabled";
+ };
+
+ tegra_i2s3: i2s@0,70301300 {
+ compatible = "nvidia,tegra124-i2s";
+ reg = <0x0 0x70301300 0x0 0x100>;
+ nvidia,ahub-cif-ids = <7 7>;
+ clocks = <&tegra_car TEGRA124_CLK_I2S3>;
+ clock-names = "i2s";
+ resets = <&tegra_car 101>;
+ reset-names = "i2s";
+ status = "disabled";
+ };
+
+ tegra_i2s4: i2s@0,70301400 {
+ compatible = "nvidia,tegra124-i2s";
+ reg = <0x0 0x70301400 0x0 0x100>;
+ nvidia,ahub-cif-ids = <8 8>;
+ clocks = <&tegra_car TEGRA124_CLK_I2S4>;
+ clock-names = "i2s";
+ resets = <&tegra_car 102>;
+ reset-names = "i2s";
+ status = "disabled";
+ };
+ };
+
+ usb@0,7d000000 {
+ compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+ reg = <0x0 0x7d000000 0x0 0x4000>;
+ interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA124_CLK_USBD>;
+ clock-names = "usb";
+ resets = <&tegra_car 22>;
+ reset-names = "usb";
+ nvidia,phy = <&phy1>;
+ status = "disabled";
+ };
+
+ phy1: usb-phy@0,7d000000 {
+ compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+ reg = <0x0 0x7d000000 0x0 0x4000>,
+ <0x0 0x7d000000 0x0 0x4000>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA124_CLK_USBD>,
+ <&tegra_car TEGRA124_CLK_PLL_U>,
+ <&tegra_car TEGRA124_CLK_USBD>;
+ clock-names = "reg", "pll_u", "utmi-pads";
+ resets = <&tegra_car 22>, <&tegra_car 22>;
+ reset-names = "usb", "utmi-pads";
+ nvidia,hssync-start-delay = <0>;
+ nvidia,idle-wait-delay = <17>;
+ nvidia,elastic-limit = <16>;
+ nvidia,term-range-adj = <6>;
+ nvidia,xcvr-setup = <9>;
+ nvidia,xcvr-lsfslew = <0>;
+ nvidia,xcvr-lsrslew = <3>;
+ nvidia,hssquelch-level = <2>;
+ nvidia,hsdiscon-level = <5>;
+ nvidia,xcvr-hsslew = <12>;
+ nvidia,has-utmi-pad-registers;
+ status = "disabled";
+ };
+
+ usb@0,7d004000 {
+ compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+ reg = <0x0 0x7d004000 0x0 0x4000>;
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA124_CLK_USB2>;
+ clock-names = "usb";
+ resets = <&tegra_car 58>;
+ reset-names = "usb";
+ nvidia,phy = <&phy2>;
+ status = "disabled";
+ };
+
+ phy2: usb-phy@0,7d004000 {
+ compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+ reg = <0x0 0x7d004000 0x0 0x4000>,
+ <0x0 0x7d000000 0x0 0x4000>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA124_CLK_USB2>,
+ <&tegra_car TEGRA124_CLK_PLL_U>,
+ <&tegra_car TEGRA124_CLK_USBD>;
+ clock-names = "reg", "pll_u", "utmi-pads";
+ resets = <&tegra_car 58>, <&tegra_car 22>;
+ reset-names = "usb", "utmi-pads";
+ nvidia,hssync-start-delay = <0>;
+ nvidia,idle-wait-delay = <17>;
+ nvidia,elastic-limit = <16>;
+ nvidia,term-range-adj = <6>;
+ nvidia,xcvr-setup = <9>;
+ nvidia,xcvr-lsfslew = <0>;
+ nvidia,xcvr-lsrslew = <3>;
+ nvidia,hssquelch-level = <2>;
+ nvidia,hsdiscon-level = <5>;
+ nvidia,xcvr-hsslew = <12>;
+ status = "disabled";
+ };
+
+ usb@0,7d008000 {
+ compatible = "nvidia,tegra124-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+ reg = <0x0 0x7d008000 0x0 0x4000>;
+ interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA124_CLK_USB3>;
+ clock-names = "usb";
+ resets = <&tegra_car 59>;
+ reset-names = "usb";
+ nvidia,phy = <&phy3>;
+ status = "disabled";
+ };
+
+ phy3: usb-phy@0,7d008000 {
+ compatible = "nvidia,tegra124-usb-phy", "nvidia,tegra30-usb-phy";
+ reg = <0x0 0x7d008000 0x0 0x4000>,
+ <0x0 0x7d000000 0x0 0x4000>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA124_CLK_USB3>,
+ <&tegra_car TEGRA124_CLK_PLL_U>,
+ <&tegra_car TEGRA124_CLK_USBD>;
+ clock-names = "reg", "pll_u", "utmi-pads";
+ resets = <&tegra_car 59>, <&tegra_car 22>;
+ reset-names = "usb", "utmi-pads";
+ nvidia,hssync-start-delay = <0>;
+ nvidia,idle-wait-delay = <17>;
+ nvidia,elastic-limit = <16>;
+ nvidia,term-range-adj = <6>;
+ nvidia,xcvr-setup = <9>;
+ nvidia,xcvr-lsfslew = <0>;
+ nvidia,xcvr-lsrslew = <3>;
+ nvidia,hssquelch-level = <2>;
+ nvidia,hsdiscon-level = <5>;
+ nvidia,xcvr-hsslew = <12>;
+ status = "disabled";
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "nvidia,denver", "arm,armv8";
+ reg = <0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "nvidia,denver", "arm,armv8";
+ reg = <1>;
+ };
+ };
+
+ timer {
+ compatible = "arm,armv7-timer";
+ interrupts = <GIC_PPI 13
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 14
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 11
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 10
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+ interrupt-parent = <&gic>;
+ };
+};
--- /dev/null
+#include "tegra210.dtsi"
+
+/ {
+ model = "NVIDIA Jetson TX1";
+ compatible = "nvidia,p2180", "nvidia,tegra210";
+
+ aliases {
+ rtc1 = "/rtc@0,7000e000";
+ serial0 = &uarta;
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x1 0x0>;
+ };
+
+ /* debug port */
+ serial@0,70006000 {
+ status = "okay";
+ };
+
+ pmc@0,7000e400 {
+ nvidia,invert-interrupt;
+ };
+
+ /* eMMC */
+ sdhci@0,700b0600 {
+ status = "okay";
+ bus-width = <8>;
+ non-removable;
+ };
+
+ clocks {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clk32k_in: clock@0 {
+ compatible = "fixed-clock";
+ reg = <0>;
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+};
--- /dev/null
+/dts-v1/;
+
+#include "tegra210-p2530.dtsi"
+#include "tegra210-p2595.dtsi"
+
+/ {
+ model = "NVIDIA Tegra210 P2371 (P2530/P2595) reference design";
+ compatible = "nvidia,p2371-0000", "nvidia,tegra210";
+};
--- /dev/null
+/dts-v1/;
+
+#include "tegra210-p2180.dtsi"
+#include "tegra210-p2597.dtsi"
+
+/ {
+ model = "NVIDIA Jetson TX1 Developer Kit";
+ compatible = "nvidia,p2371-2180", "nvidia,tegra210";
+};
--- /dev/null
+#include "tegra210.dtsi"
+
+/ {
+ model = "NVIDIA Tegra210 P2530 main board";
+ compatible = "nvidia,p2530", "nvidia,tegra210";
+
+ aliases {
+ rtc1 = "/rtc@0,7000e000";
+ serial0 = &uarta;
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x0 0xc0000000>;
+ };
+
+ /* debug port */
+ serial@0,70006000 {
+ status = "okay";
+ };
+
+ i2c@0,7000d000 {
+ status = "okay";
+ clock-frequency = <400000>;
+ };
+
+ pmc@0,7000e400 {
+ nvidia,invert-interrupt;
+ };
+
+ /* eMMC */
+ sdhci@0,700b0600 {
+ status = "okay";
+ bus-width = <8>;
+ non-removable;
+ };
+
+ clocks {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clk32k_in: clock@0 {
+ compatible = "fixed-clock";
+ reg = <0>;
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+};
--- /dev/null
+/dts-v1/;
+
+#include <dt-bindings/input/input.h>
+#include "tegra210-p2530.dtsi"
+
+/ {
+ model = "NVIDIA Tegra210 P2571 reference design";
+ compatible = "nvidia,p2571", "nvidia,tegra210";
+
+ pinmux: pinmux@0,700008d4 {
+ pinctrl-names = "boot";
+ pinctrl-0 = <&state_boot>;
+
+ state_boot: pinmux {
+ pex_l0_rst_n_pa0 {
+ nvidia,pins = "pex_l0_rst_n_pa0";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ pex_l0_clkreq_n_pa1 {
+ nvidia,pins = "pex_l0_clkreq_n_pa1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ pex_wake_n_pa2 {
+ nvidia,pins = "pex_wake_n_pa2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ pex_l1_rst_n_pa3 {
+ nvidia,pins = "pex_l1_rst_n_pa3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ pex_l1_clkreq_n_pa4 {
+ nvidia,pins = "pex_l1_clkreq_n_pa4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ sata_led_active_pa5 {
+ nvidia,pins = "sata_led_active_pa5";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pa6 {
+ nvidia,pins = "pa6";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_fs_pb0 {
+ nvidia,pins = "dap1_fs_pb0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_din_pb1 {
+ nvidia,pins = "dap1_din_pb1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_dout_pb2 {
+ nvidia,pins = "dap1_dout_pb2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_sclk_pb3 {
+ nvidia,pins = "dap1_sclk_pb3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_mosi_pb4 {
+ nvidia,pins = "spi2_mosi_pb4";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_miso_pb5 {
+ nvidia,pins = "spi2_miso_pb5";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_sck_pb6 {
+ nvidia,pins = "spi2_sck_pb6";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_cs0_pb7 {
+ nvidia,pins = "spi2_cs0_pb7";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_mosi_pc0 {
+ nvidia,pins = "spi1_mosi_pc0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_miso_pc1 {
+ nvidia,pins = "spi1_miso_pc1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_sck_pc2 {
+ nvidia,pins = "spi1_sck_pc2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_cs0_pc3 {
+ nvidia,pins = "spi1_cs0_pc3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_cs1_pc4 {
+ nvidia,pins = "spi1_cs1_pc4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_sck_pc5 {
+ nvidia,pins = "spi4_sck_pc5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_cs0_pc6 {
+ nvidia,pins = "spi4_cs0_pc6";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_mosi_pc7 {
+ nvidia,pins = "spi4_mosi_pc7";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_miso_pd0 {
+ nvidia,pins = "spi4_miso_pd0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_tx_pd1 {
+ nvidia,pins = "uart3_tx_pd1";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_rx_pd2 {
+ nvidia,pins = "uart3_rx_pd2";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_rts_pd3 {
+ nvidia,pins = "uart3_rts_pd3";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_cts_pd4 {
+ nvidia,pins = "uart3_cts_pd4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic1_clk_pe0 {
+ nvidia,pins = "dmic1_clk_pe0";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic1_dat_pe1 {
+ nvidia,pins = "dmic1_dat_pe1";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic2_clk_pe2 {
+ nvidia,pins = "dmic2_clk_pe2";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic2_dat_pe3 {
+ nvidia,pins = "dmic2_dat_pe3";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic3_clk_pe4 {
+ nvidia,pins = "dmic3_clk_pe4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic3_dat_pe5 {
+ nvidia,pins = "dmic3_dat_pe5";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pe6 {
+ nvidia,pins = "pe6";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pe7 {
+ nvidia,pins = "pe7";
+ nvidia,function = "pwm3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gen3_i2c_scl_pf0 {
+ nvidia,pins = "gen3_i2c_scl_pf0";
+ nvidia,function = "i2c3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen3_i2c_sda_pf1 {
+ nvidia,pins = "gen3_i2c_sda_pf1";
+ nvidia,function = "i2c3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_tx_pg0 {
+ nvidia,pins = "uart2_tx_pg0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_rx_pg1 {
+ nvidia,pins = "uart2_rx_pg1";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_rts_pg2 {
+ nvidia,pins = "uart2_rts_pg2";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_cts_pg3 {
+ nvidia,pins = "uart2_cts_pg3";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_en_ph0 {
+ nvidia,pins = "wifi_en_ph0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_rst_ph1 {
+ nvidia,pins = "wifi_rst_ph1";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_wake_ap_ph2 {
+ nvidia,pins = "wifi_wake_ap_ph2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_wake_bt_ph3 {
+ nvidia,pins = "ap_wake_bt_ph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ bt_rst_ph4 {
+ nvidia,pins = "bt_rst_ph4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ bt_wake_ap_ph5 {
+ nvidia,pins = "bt_wake_ap_ph5";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ph6 {
+ nvidia,pins = "ph6";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_wake_nfc_ph7 {
+ nvidia,pins = "ap_wake_nfc_ph7";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ nfc_en_pi0 {
+ nvidia,pins = "nfc_en_pi0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ nfc_int_pi1 {
+ nvidia,pins = "nfc_int_pi1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gps_en_pi2 {
+ nvidia,pins = "gps_en_pi2";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gps_rst_pi3 {
+ nvidia,pins = "gps_rst_pi3";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_tx_pi4 {
+ nvidia,pins = "uart4_tx_pi4";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_rx_pi5 {
+ nvidia,pins = "uart4_rx_pi5";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_rts_pi6 {
+ nvidia,pins = "uart4_rts_pi6";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_cts_pi7 {
+ nvidia,pins = "uart4_cts_pi7";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gen1_i2c_sda_pj0 {
+ nvidia,pins = "gen1_i2c_sda_pj0";
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen1_i2c_scl_pj1 {
+ nvidia,pins = "gen1_i2c_scl_pj1";
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen2_i2c_scl_pj2 {
+ nvidia,pins = "gen2_i2c_scl_pj2";
+ nvidia,function = "i2c2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ gen2_i2c_sda_pj3 {
+ nvidia,pins = "gen2_i2c_sda_pj3";
+ nvidia,function = "i2c2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ dap4_fs_pj4 {
+ nvidia,pins = "dap4_fs_pj4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_din_pj5 {
+ nvidia,pins = "dap4_din_pj5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_dout_pj6 {
+ nvidia,pins = "dap4_dout_pj6";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_sclk_pj7 {
+ nvidia,pins = "dap4_sclk_pj7";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk0 {
+ nvidia,pins = "pk0";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk1 {
+ nvidia,pins = "pk1";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk2 {
+ nvidia,pins = "pk2";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk3 {
+ nvidia,pins = "pk3";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk4 {
+ nvidia,pins = "pk4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk5 {
+ nvidia,pins = "pk5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk6 {
+ nvidia,pins = "pk6";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk7 {
+ nvidia,pins = "pk7";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pl0 {
+ nvidia,pins = "pl0";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pl1 {
+ nvidia,pins = "pl1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_clk_pm0 {
+ nvidia,pins = "sdmmc1_clk_pm0";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_cmd_pm1 {
+ nvidia,pins = "sdmmc1_cmd_pm1";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat3_pm2 {
+ nvidia,pins = "sdmmc1_dat3_pm2";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat2_pm3 {
+ nvidia,pins = "sdmmc1_dat2_pm3";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat1_pm4 {
+ nvidia,pins = "sdmmc1_dat1_pm4";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat0_pm5 {
+ nvidia,pins = "sdmmc1_dat0_pm5";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_clk_pp0 {
+ nvidia,pins = "sdmmc3_clk_pp0";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_cmd_pp1 {
+ nvidia,pins = "sdmmc3_cmd_pp1";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat3_pp2 {
+ nvidia,pins = "sdmmc3_dat3_pp2";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat2_pp3 {
+ nvidia,pins = "sdmmc3_dat2_pp3";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat1_pp4 {
+ nvidia,pins = "sdmmc3_dat1_pp4";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat0_pp5 {
+ nvidia,pins = "sdmmc3_dat0_pp5";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_mclk_ps0 {
+ nvidia,pins = "cam1_mclk_ps0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam2_mclk_ps1 {
+ nvidia,pins = "cam2_mclk_ps1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_i2c_scl_ps2 {
+ nvidia,pins = "cam_i2c_scl_ps2";
+ nvidia,function = "i2cvi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ cam_i2c_sda_ps3 {
+ nvidia,pins = "cam_i2c_sda_ps3";
+ nvidia,function = "i2cvi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ cam_rst_ps4 {
+ nvidia,pins = "cam_rst_ps4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_af_en_ps5 {
+ nvidia,pins = "cam_af_en_ps5";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_flash_en_ps6 {
+ nvidia,pins = "cam_flash_en_ps6";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_pwdn_ps7 {
+ nvidia,pins = "cam1_pwdn_ps7";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam2_pwdn_pt0 {
+ nvidia,pins = "cam2_pwdn_pt0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_strobe_pt1 {
+ nvidia,pins = "cam1_strobe_pt1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_tx_pu0 {
+ nvidia,pins = "uart1_tx_pu0";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_rx_pu1 {
+ nvidia,pins = "uart1_rx_pu1";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_rts_pu2 {
+ nvidia,pins = "uart1_rts_pu2";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_cts_pu3 {
+ nvidia,pins = "uart1_cts_pu3";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_bl_pwm_pv0 {
+ nvidia,pins = "lcd_bl_pwm_pv0";
+ nvidia,function = "pwm0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_bl_en_pv1 {
+ nvidia,pins = "lcd_bl_en_pv1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_rst_pv2 {
+ nvidia,pins = "lcd_rst_pv2";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_gpio1_pv3 {
+ nvidia,pins = "lcd_gpio1_pv3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_gpio2_pv4 {
+ nvidia,pins = "lcd_gpio2_pv4";
+ nvidia,function = "pwm1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_ready_pv5 {
+ nvidia,pins = "ap_ready_pv5";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_rst_pv6 {
+ nvidia,pins = "touch_rst_pv6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_clk_pv7 {
+ nvidia,pins = "touch_clk_pv7";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ modem_wake_ap_px0 {
+ nvidia,pins = "modem_wake_ap_px0";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_int_px1 {
+ nvidia,pins = "touch_int_px1";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ motion_int_px2 {
+ nvidia,pins = "motion_int_px2";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ als_prox_int_px3 {
+ nvidia,pins = "als_prox_int_px3";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ temp_alert_px4 {
+ nvidia,pins = "temp_alert_px4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_power_on_px5 {
+ nvidia,pins = "button_power_on_px5";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_vol_up_px6 {
+ nvidia,pins = "button_vol_up_px6";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_vol_down_px7 {
+ nvidia,pins = "button_vol_down_px7";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_slide_sw_py0 {
+ nvidia,pins = "button_slide_sw_py0";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_home_py1 {
+ nvidia,pins = "button_home_py1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_te_py2 {
+ nvidia,pins = "lcd_te_py2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_i2c_scl_py3 {
+ nvidia,pins = "pwr_i2c_scl_py3";
+ nvidia,function = "i2cpmu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_i2c_sda_py4 {
+ nvidia,pins = "pwr_i2c_sda_py4";
+ nvidia,function = "i2cpmu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ clk_32k_out_py5 {
+ nvidia,pins = "clk_32k_out_py5";
+ nvidia,function = "soc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz0 {
+ nvidia,pins = "pz0";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz1 {
+ nvidia,pins = "pz1";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz2 {
+ nvidia,pins = "pz2";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz3 {
+ nvidia,pins = "pz3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz4 {
+ nvidia,pins = "pz4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz5 {
+ nvidia,pins = "pz5";
+ nvidia,function = "soc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_fs_paa0 {
+ nvidia,pins = "dap2_fs_paa0";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_sclk_paa1 {
+ nvidia,pins = "dap2_sclk_paa1";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_din_paa2 {
+ nvidia,pins = "dap2_din_paa2";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_dout_paa3 {
+ nvidia,pins = "dap2_dout_paa3";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ aud_mclk_pbb0 {
+ nvidia,pins = "aud_mclk_pbb0";
+ nvidia,function = "aud";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dvfs_pwm_pbb1 {
+ nvidia,pins = "dvfs_pwm_pbb1";
+ nvidia,function = "cldvfs";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dvfs_clk_pbb2 {
+ nvidia,pins = "dvfs_clk_pbb2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gpio_x1_aud_pbb3 {
+ nvidia,pins = "gpio_x1_aud_pbb3";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gpio_x3_aud_pbb4 {
+ nvidia,pins = "gpio_x3_aud_pbb4";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ hdmi_cec_pcc0 {
+ nvidia,pins = "hdmi_cec_pcc0";
+ nvidia,function = "cec";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ hdmi_int_dp_hpd_pcc1 {
+ nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ spdif_out_pcc2 {
+ nvidia,pins = "spdif_out_pcc2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spdif_in_pcc3 {
+ nvidia,pins = "spdif_in_pcc3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ usb_vbus_en0_pcc4 {
+ nvidia,pins = "usb_vbus_en0_pcc4";
+ nvidia,function = "usb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ usb_vbus_en1_pcc5 {
+ nvidia,pins = "usb_vbus_en1_pcc5";
+ nvidia,function = "usb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ dp_hpd0_pcc6 {
+ nvidia,pins = "dp_hpd0_pcc6";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pcc7 {
+ nvidia,pins = "pcc7";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_cs1_pdd0 {
+ nvidia,pins = "spi2_cs1_pdd0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_sck_pee0 {
+ nvidia,pins = "qspi_sck_pee0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_cs_n_pee1 {
+ nvidia,pins = "qspi_cs_n_pee1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io0_pee2 {
+ nvidia,pins = "qspi_io0_pee2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io1_pee3 {
+ nvidia,pins = "qspi_io1_pee3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io2_pee4 {
+ nvidia,pins = "qspi_io2_pee4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io3_pee5 {
+ nvidia,pins = "qspi_io3_pee5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ core_pwr_req {
+ nvidia,pins = "core_pwr_req";
+ nvidia,function = "core";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cpu_pwr_req {
+ nvidia,pins = "cpu_pwr_req";
+ nvidia,function = "cpu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_int_n {
+ nvidia,pins = "pwr_int_n";
+ nvidia,function = "pmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ clk_32k_in {
+ nvidia,pins = "clk_32k_in";
+ nvidia,function = "clk";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ jtag_rtck {
+ nvidia,pins = "jtag_rtck";
+ nvidia,function = "jtag";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ clk_req {
+ nvidia,pins = "clk_req";
+ nvidia,function = "sys";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ shutdown {
+ nvidia,pins = "shutdown";
+ nvidia,function = "shutdown";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ };
+ };
+};
--- /dev/null
+/ {
+ model = "NVIDIA Tegra210 P2595 I/O board";
+ compatible = "nvidia,p2595", "nvidia,tegra210";
+
+ pinmux: pinmux@0,700008d4 {
+ pinctrl-names = "boot";
+ pinctrl-0 = <&state_boot>;
+
+ state_boot: pinmux {
+ pex_l0_rst_n_pa0 {
+ nvidia,pins = "pex_l0_rst_n_pa0";
+ nvidia,function = "pe0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ pex_l0_clkreq_n_pa1 {
+ nvidia,pins = "pex_l0_clkreq_n_pa1";
+ nvidia,function = "pe0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ pex_wake_n_pa2 {
+ nvidia,pins = "pex_wake_n_pa2";
+ nvidia,function = "pe";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ pex_l1_rst_n_pa3 {
+ nvidia,pins = "pex_l1_rst_n_pa3";
+ nvidia,function = "pe1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ pex_l1_clkreq_n_pa4 {
+ nvidia,pins = "pex_l1_clkreq_n_pa4";
+ nvidia,function = "pe1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ sata_led_active_pa5 {
+ nvidia,pins = "sata_led_active_pa5";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pa6 {
+ nvidia,pins = "pa6";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_fs_pb0 {
+ nvidia,pins = "dap1_fs_pb0";
+ nvidia,function = "i2s1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_din_pb1 {
+ nvidia,pins = "dap1_din_pb1";
+ nvidia,function = "i2s1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_dout_pb2 {
+ nvidia,pins = "dap1_dout_pb2";
+ nvidia,function = "i2s1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_sclk_pb3 {
+ nvidia,pins = "dap1_sclk_pb3";
+ nvidia,function = "i2s1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_mosi_pb4 {
+ nvidia,pins = "spi2_mosi_pb4";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_miso_pb5 {
+ nvidia,pins = "spi2_miso_pb5";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_sck_pb6 {
+ nvidia,pins = "spi2_sck_pb6";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_cs0_pb7 {
+ nvidia,pins = "spi2_cs0_pb7";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_mosi_pc0 {
+ nvidia,pins = "spi1_mosi_pc0";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_miso_pc1 {
+ nvidia,pins = "spi1_miso_pc1";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_sck_pc2 {
+ nvidia,pins = "spi1_sck_pc2";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_cs0_pc3 {
+ nvidia,pins = "spi1_cs0_pc3";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_cs1_pc4 {
+ nvidia,pins = "spi1_cs1_pc4";
+ nvidia,function = "spi1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_sck_pc5 {
+ nvidia,pins = "spi4_sck_pc5";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_cs0_pc6 {
+ nvidia,pins = "spi4_cs0_pc6";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_mosi_pc7 {
+ nvidia,pins = "spi4_mosi_pc7";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_miso_pd0 {
+ nvidia,pins = "spi4_miso_pd0";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_tx_pd1 {
+ nvidia,pins = "uart3_tx_pd1";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_rx_pd2 {
+ nvidia,pins = "uart3_rx_pd2";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_rts_pd3 {
+ nvidia,pins = "uart3_rts_pd3";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_cts_pd4 {
+ nvidia,pins = "uart3_cts_pd4";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic1_clk_pe0 {
+ nvidia,pins = "dmic1_clk_pe0";
+ nvidia,function = "dmic1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic1_dat_pe1 {
+ nvidia,pins = "dmic1_dat_pe1";
+ nvidia,function = "dmic1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic2_clk_pe2 {
+ nvidia,pins = "dmic2_clk_pe2";
+ nvidia,function = "dmic2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic2_dat_pe3 {
+ nvidia,pins = "dmic2_dat_pe3";
+ nvidia,function = "dmic2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic3_clk_pe4 {
+ nvidia,pins = "dmic3_clk_pe4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic3_dat_pe5 {
+ nvidia,pins = "dmic3_dat_pe5";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pe6 {
+ nvidia,pins = "pe6";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pe7 {
+ nvidia,pins = "pe7";
+ nvidia,function = "pwm3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gen3_i2c_scl_pf0 {
+ nvidia,pins = "gen3_i2c_scl_pf0";
+ nvidia,function = "i2c3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen3_i2c_sda_pf1 {
+ nvidia,pins = "gen3_i2c_sda_pf1";
+ nvidia,function = "i2c3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_tx_pg0 {
+ nvidia,pins = "uart2_tx_pg0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_rx_pg1 {
+ nvidia,pins = "uart2_rx_pg1";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_rts_pg2 {
+ nvidia,pins = "uart2_rts_pg2";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_cts_pg3 {
+ nvidia,pins = "uart2_cts_pg3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_en_ph0 {
+ nvidia,pins = "wifi_en_ph0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_rst_ph1 {
+ nvidia,pins = "wifi_rst_ph1";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_wake_ap_ph2 {
+ nvidia,pins = "wifi_wake_ap_ph2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_wake_bt_ph3 {
+ nvidia,pins = "ap_wake_bt_ph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ bt_rst_ph4 {
+ nvidia,pins = "bt_rst_ph4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ bt_wake_ap_ph5 {
+ nvidia,pins = "bt_wake_ap_ph5";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ph6 {
+ nvidia,pins = "ph6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_wake_nfc_ph7 {
+ nvidia,pins = "ap_wake_nfc_ph7";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ nfc_en_pi0 {
+ nvidia,pins = "nfc_en_pi0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ nfc_int_pi1 {
+ nvidia,pins = "nfc_int_pi1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gps_en_pi2 {
+ nvidia,pins = "gps_en_pi2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gps_rst_pi3 {
+ nvidia,pins = "gps_rst_pi3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_tx_pi4 {
+ nvidia,pins = "uart4_tx_pi4";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_rx_pi5 {
+ nvidia,pins = "uart4_rx_pi5";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_rts_pi6 {
+ nvidia,pins = "uart4_rts_pi6";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_cts_pi7 {
+ nvidia,pins = "uart4_cts_pi7";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gen1_i2c_sda_pj0 {
+ nvidia,pins = "gen1_i2c_sda_pj0";
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen1_i2c_scl_pj1 {
+ nvidia,pins = "gen1_i2c_scl_pj1";
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen2_i2c_scl_pj2 {
+ nvidia,pins = "gen2_i2c_scl_pj2";
+ nvidia,function = "i2c2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ gen2_i2c_sda_pj3 {
+ nvidia,pins = "gen2_i2c_sda_pj3";
+ nvidia,function = "i2c2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ dap4_fs_pj4 {
+ nvidia,pins = "dap4_fs_pj4";
+ nvidia,function = "i2s4b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_din_pj5 {
+ nvidia,pins = "dap4_din_pj5";
+ nvidia,function = "i2s4b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_dout_pj6 {
+ nvidia,pins = "dap4_dout_pj6";
+ nvidia,function = "i2s4b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_sclk_pj7 {
+ nvidia,pins = "dap4_sclk_pj7";
+ nvidia,function = "i2s4b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk0 {
+ nvidia,pins = "pk0";
+ nvidia,function = "i2s5b";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk1 {
+ nvidia,pins = "pk1";
+ nvidia,function = "i2s5b";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk2 {
+ nvidia,pins = "pk2";
+ nvidia,function = "i2s5b";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk3 {
+ nvidia,pins = "pk3";
+ nvidia,function = "i2s5b";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk4 {
+ nvidia,pins = "pk4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk5 {
+ nvidia,pins = "pk5";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk6 {
+ nvidia,pins = "pk6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk7 {
+ nvidia,pins = "pk7";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pl0 {
+ nvidia,pins = "pl0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pl1 {
+ nvidia,pins = "pl1";
+ nvidia,function = "soc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_clk_pm0 {
+ nvidia,pins = "sdmmc1_clk_pm0";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_cmd_pm1 {
+ nvidia,pins = "sdmmc1_cmd_pm1";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat3_pm2 {
+ nvidia,pins = "sdmmc1_dat3_pm2";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat2_pm3 {
+ nvidia,pins = "sdmmc1_dat2_pm3";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat1_pm4 {
+ nvidia,pins = "sdmmc1_dat1_pm4";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat0_pm5 {
+ nvidia,pins = "sdmmc1_dat0_pm5";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_clk_pp0 {
+ nvidia,pins = "sdmmc3_clk_pp0";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_cmd_pp1 {
+ nvidia,pins = "sdmmc3_cmd_pp1";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat3_pp2 {
+ nvidia,pins = "sdmmc3_dat3_pp2";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat2_pp3 {
+ nvidia,pins = "sdmmc3_dat2_pp3";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat1_pp4 {
+ nvidia,pins = "sdmmc3_dat1_pp4";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat0_pp5 {
+ nvidia,pins = "sdmmc3_dat0_pp5";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_mclk_ps0 {
+ nvidia,pins = "cam1_mclk_ps0";
+ nvidia,function = "extperiph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam2_mclk_ps1 {
+ nvidia,pins = "cam2_mclk_ps1";
+ nvidia,function = "extperiph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_i2c_scl_ps2 {
+ nvidia,pins = "cam_i2c_scl_ps2";
+ nvidia,function = "i2cvi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ cam_i2c_sda_ps3 {
+ nvidia,pins = "cam_i2c_sda_ps3";
+ nvidia,function = "i2cvi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ cam_rst_ps4 {
+ nvidia,pins = "cam_rst_ps4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_af_en_ps5 {
+ nvidia,pins = "cam_af_en_ps5";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_flash_en_ps6 {
+ nvidia,pins = "cam_flash_en_ps6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_pwdn_ps7 {
+ nvidia,pins = "cam1_pwdn_ps7";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam2_pwdn_pt0 {
+ nvidia,pins = "cam2_pwdn_pt0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_strobe_pt1 {
+ nvidia,pins = "cam1_strobe_pt1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_tx_pu0 {
+ nvidia,pins = "uart1_tx_pu0";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_rx_pu1 {
+ nvidia,pins = "uart1_rx_pu1";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_rts_pu2 {
+ nvidia,pins = "uart1_rts_pu2";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_cts_pu3 {
+ nvidia,pins = "uart1_cts_pu3";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_bl_pwm_pv0 {
+ nvidia,pins = "lcd_bl_pwm_pv0";
+ nvidia,function = "pwm0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_bl_en_pv1 {
+ nvidia,pins = "lcd_bl_en_pv1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_rst_pv2 {
+ nvidia,pins = "lcd_rst_pv2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_gpio1_pv3 {
+ nvidia,pins = "lcd_gpio1_pv3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_gpio2_pv4 {
+ nvidia,pins = "lcd_gpio2_pv4";
+ nvidia,function = "pwm1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_ready_pv5 {
+ nvidia,pins = "ap_ready_pv5";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_rst_pv6 {
+ nvidia,pins = "touch_rst_pv6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_clk_pv7 {
+ nvidia,pins = "touch_clk_pv7";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ modem_wake_ap_px0 {
+ nvidia,pins = "modem_wake_ap_px0";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_int_px1 {
+ nvidia,pins = "touch_int_px1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ motion_int_px2 {
+ nvidia,pins = "motion_int_px2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ als_prox_int_px3 {
+ nvidia,pins = "als_prox_int_px3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ temp_alert_px4 {
+ nvidia,pins = "temp_alert_px4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_power_on_px5 {
+ nvidia,pins = "button_power_on_px5";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_vol_up_px6 {
+ nvidia,pins = "button_vol_up_px6";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_vol_down_px7 {
+ nvidia,pins = "button_vol_down_px7";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_slide_sw_py0 {
+ nvidia,pins = "button_slide_sw_py0";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_home_py1 {
+ nvidia,pins = "button_home_py1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_te_py2 {
+ nvidia,pins = "lcd_te_py2";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_i2c_scl_py3 {
+ nvidia,pins = "pwr_i2c_scl_py3";
+ nvidia,function = "i2cpmu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_i2c_sda_py4 {
+ nvidia,pins = "pwr_i2c_sda_py4";
+ nvidia,function = "i2cpmu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ clk_32k_out_py5 {
+ nvidia,pins = "clk_32k_out_py5";
+ nvidia,function = "soc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz0 {
+ nvidia,pins = "pz0";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz1 {
+ nvidia,pins = "pz1";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz2 {
+ nvidia,pins = "pz2";
+ nvidia,function = "rsvd2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz3 {
+ nvidia,pins = "pz3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz4 {
+ nvidia,pins = "pz4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz5 {
+ nvidia,pins = "pz5";
+ nvidia,function = "soc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_fs_paa0 {
+ nvidia,pins = "dap2_fs_paa0";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_sclk_paa1 {
+ nvidia,pins = "dap2_sclk_paa1";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_din_paa2 {
+ nvidia,pins = "dap2_din_paa2";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_dout_paa3 {
+ nvidia,pins = "dap2_dout_paa3";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ aud_mclk_pbb0 {
+ nvidia,pins = "aud_mclk_pbb0";
+ nvidia,function = "aud";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dvfs_pwm_pbb1 {
+ nvidia,pins = "dvfs_pwm_pbb1";
+ nvidia,function = "cldvfs";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dvfs_clk_pbb2 {
+ nvidia,pins = "dvfs_clk_pbb2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gpio_x1_aud_pbb3 {
+ nvidia,pins = "gpio_x1_aud_pbb3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gpio_x3_aud_pbb4 {
+ nvidia,pins = "gpio_x3_aud_pbb4";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ hdmi_cec_pcc0 {
+ nvidia,pins = "hdmi_cec_pcc0";
+ nvidia,function = "cec";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ hdmi_int_dp_hpd_pcc1 {
+ nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ spdif_out_pcc2 {
+ nvidia,pins = "spdif_out_pcc2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spdif_in_pcc3 {
+ nvidia,pins = "spdif_in_pcc3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ usb_vbus_en0_pcc4 {
+ nvidia,pins = "usb_vbus_en0_pcc4";
+ nvidia,function = "usb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ usb_vbus_en1_pcc5 {
+ nvidia,pins = "usb_vbus_en1_pcc5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ dp_hpd0_pcc6 {
+ nvidia,pins = "dp_hpd0_pcc6";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pcc7 {
+ nvidia,pins = "pcc7";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_cs1_pdd0 {
+ nvidia,pins = "spi2_cs1_pdd0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_sck_pee0 {
+ nvidia,pins = "qspi_sck_pee0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_cs_n_pee1 {
+ nvidia,pins = "qspi_cs_n_pee1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io0_pee2 {
+ nvidia,pins = "qspi_io0_pee2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io1_pee3 {
+ nvidia,pins = "qspi_io1_pee3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io2_pee4 {
+ nvidia,pins = "qspi_io2_pee4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io3_pee5 {
+ nvidia,pins = "qspi_io3_pee5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ core_pwr_req {
+ nvidia,pins = "core_pwr_req";
+ nvidia,function = "core";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cpu_pwr_req {
+ nvidia,pins = "cpu_pwr_req";
+ nvidia,function = "cpu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_int_n {
+ nvidia,pins = "pwr_int_n";
+ nvidia,function = "pmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ clk_32k_in {
+ nvidia,pins = "clk_32k_in";
+ nvidia,function = "clk";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ jtag_rtck {
+ nvidia,pins = "jtag_rtck";
+ nvidia,function = "jtag";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ clk_req {
+ nvidia,pins = "clk_req";
+ nvidia,function = "sys";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ shutdown {
+ nvidia,pins = "shutdown";
+ nvidia,function = "shutdown";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ };
+ };
+};
--- /dev/null
+/ {
+ model = "NVIDIA Tegra210 P2597 I/O board";
+ compatible = "nvidia,p2597", "nvidia,tegra210";
+
+ pinmux: pinmux@0,700008d4 {
+ pinctrl-names = "boot";
+ pinctrl-0 = <&state_boot>;
+
+ state_boot: pinmux {
+ pex_l0_rst_n_pa0 {
+ nvidia,pins = "pex_l0_rst_n_pa0";
+ nvidia,function = "pe0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ pex_l0_clkreq_n_pa1 {
+ nvidia,pins = "pex_l0_clkreq_n_pa1";
+ nvidia,function = "pe0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ pex_wake_n_pa2 {
+ nvidia,pins = "pex_wake_n_pa2";
+ nvidia,function = "pe";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ pex_l1_rst_n_pa3 {
+ nvidia,pins = "pex_l1_rst_n_pa3";
+ nvidia,function = "pe1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ pex_l1_clkreq_n_pa4 {
+ nvidia,pins = "pex_l1_clkreq_n_pa4";
+ nvidia,function = "pe1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ sata_led_active_pa5 {
+ nvidia,pins = "sata_led_active_pa5";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pa6 {
+ nvidia,pins = "pa6";
+ nvidia,function = "sata";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_fs_pb0 {
+ nvidia,pins = "dap1_fs_pb0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_din_pb1 {
+ nvidia,pins = "dap1_din_pb1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_dout_pb2 {
+ nvidia,pins = "dap1_dout_pb2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap1_sclk_pb3 {
+ nvidia,pins = "dap1_sclk_pb3";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_mosi_pb4 {
+ nvidia,pins = "spi2_mosi_pb4";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_miso_pb5 {
+ nvidia,pins = "spi2_miso_pb5";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_sck_pb6 {
+ nvidia,pins = "spi2_sck_pb6";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_cs0_pb7 {
+ nvidia,pins = "spi2_cs0_pb7";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_mosi_pc0 {
+ nvidia,pins = "spi1_mosi_pc0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_miso_pc1 {
+ nvidia,pins = "spi1_miso_pc1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_sck_pc2 {
+ nvidia,pins = "spi1_sck_pc2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_cs0_pc3 {
+ nvidia,pins = "spi1_cs0_pc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi1_cs1_pc4 {
+ nvidia,pins = "spi1_cs1_pc4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_sck_pc5 {
+ nvidia,pins = "spi4_sck_pc5";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_cs0_pc6 {
+ nvidia,pins = "spi4_cs0_pc6";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_mosi_pc7 {
+ nvidia,pins = "spi4_mosi_pc7";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spi4_miso_pd0 {
+ nvidia,pins = "spi4_miso_pd0";
+ nvidia,function = "spi4";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_tx_pd1 {
+ nvidia,pins = "uart3_tx_pd1";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_rx_pd2 {
+ nvidia,pins = "uart3_rx_pd2";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_rts_pd3 {
+ nvidia,pins = "uart3_rts_pd3";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart3_cts_pd4 {
+ nvidia,pins = "uart3_cts_pd4";
+ nvidia,function = "uartc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic1_clk_pe0 {
+ nvidia,pins = "dmic1_clk_pe0";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic1_dat_pe1 {
+ nvidia,pins = "dmic1_dat_pe1";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic2_clk_pe2 {
+ nvidia,pins = "dmic2_clk_pe2";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic2_dat_pe3 {
+ nvidia,pins = "dmic2_dat_pe3";
+ nvidia,function = "i2s3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic3_clk_pe4 {
+ nvidia,pins = "dmic3_clk_pe4";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dmic3_dat_pe5 {
+ nvidia,pins = "dmic3_dat_pe5";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pe6 {
+ nvidia,pins = "pe6";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pe7 {
+ nvidia,pins = "pe7";
+ nvidia,function = "pwm3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gen3_i2c_scl_pf0 {
+ nvidia,pins = "gen3_i2c_scl_pf0";
+ nvidia,function = "i2c3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen3_i2c_sda_pf1 {
+ nvidia,pins = "gen3_i2c_sda_pf1";
+ nvidia,function = "i2c3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_tx_pg0 {
+ nvidia,pins = "uart2_tx_pg0";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_rx_pg1 {
+ nvidia,pins = "uart2_rx_pg1";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_rts_pg2 {
+ nvidia,pins = "uart2_rts_pg2";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart2_cts_pg3 {
+ nvidia,pins = "uart2_cts_pg3";
+ nvidia,function = "uartb";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_en_ph0 {
+ nvidia,pins = "wifi_en_ph0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_rst_ph1 {
+ nvidia,pins = "wifi_rst_ph1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ wifi_wake_ap_ph2 {
+ nvidia,pins = "wifi_wake_ap_ph2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_wake_bt_ph3 {
+ nvidia,pins = "ap_wake_bt_ph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ bt_rst_ph4 {
+ nvidia,pins = "bt_rst_ph4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ bt_wake_ap_ph5 {
+ nvidia,pins = "bt_wake_ap_ph5";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ph6 {
+ nvidia,pins = "ph6";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_wake_nfc_ph7 {
+ nvidia,pins = "ap_wake_nfc_ph7";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ nfc_en_pi0 {
+ nvidia,pins = "nfc_en_pi0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ nfc_int_pi1 {
+ nvidia,pins = "nfc_int_pi1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gps_en_pi2 {
+ nvidia,pins = "gps_en_pi2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gps_rst_pi3 {
+ nvidia,pins = "gps_rst_pi3";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_tx_pi4 {
+ nvidia,pins = "uart4_tx_pi4";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_rx_pi5 {
+ nvidia,pins = "uart4_rx_pi5";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_rts_pi6 {
+ nvidia,pins = "uart4_rts_pi6";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart4_cts_pi7 {
+ nvidia,pins = "uart4_cts_pi7";
+ nvidia,function = "uartd";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gen1_i2c_sda_pj0 {
+ nvidia,pins = "gen1_i2c_sda_pj0";
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen1_i2c_scl_pj1 {
+ nvidia,pins = "gen1_i2c_scl_pj1";
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ gen2_i2c_scl_pj2 {
+ nvidia,pins = "gen2_i2c_scl_pj2";
+ nvidia,function = "i2c2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ gen2_i2c_sda_pj3 {
+ nvidia,pins = "gen2_i2c_sda_pj3";
+ nvidia,function = "i2c2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ dap4_fs_pj4 {
+ nvidia,pins = "dap4_fs_pj4";
+ nvidia,function = "i2s4b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_din_pj5 {
+ nvidia,pins = "dap4_din_pj5";
+ nvidia,function = "i2s4b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_dout_pj6 {
+ nvidia,pins = "dap4_dout_pj6";
+ nvidia,function = "i2s4b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap4_sclk_pj7 {
+ nvidia,pins = "dap4_sclk_pj7";
+ nvidia,function = "i2s4b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk0 {
+ nvidia,pins = "pk0";
+ nvidia,function = "i2s5b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk1 {
+ nvidia,pins = "pk1";
+ nvidia,function = "i2s5b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk2 {
+ nvidia,pins = "pk2";
+ nvidia,function = "i2s5b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk3 {
+ nvidia,pins = "pk3";
+ nvidia,function = "i2s5b";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk4 {
+ nvidia,pins = "pk4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk5 {
+ nvidia,pins = "pk5";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk6 {
+ nvidia,pins = "pk6";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pk7 {
+ nvidia,pins = "pk7";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pl0 {
+ nvidia,pins = "pl0";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pl1 {
+ nvidia,pins = "pl1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_clk_pm0 {
+ nvidia,pins = "sdmmc1_clk_pm0";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_cmd_pm1 {
+ nvidia,pins = "sdmmc1_cmd_pm1";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat3_pm2 {
+ nvidia,pins = "sdmmc1_dat3_pm2";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat2_pm3 {
+ nvidia,pins = "sdmmc1_dat2_pm3";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat1_pm4 {
+ nvidia,pins = "sdmmc1_dat1_pm4";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc1_dat0_pm5 {
+ nvidia,pins = "sdmmc1_dat0_pm5";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_clk_pp0 {
+ nvidia,pins = "sdmmc3_clk_pp0";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_cmd_pp1 {
+ nvidia,pins = "sdmmc3_cmd_pp1";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat3_pp2 {
+ nvidia,pins = "sdmmc3_dat3_pp2";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat2_pp3 {
+ nvidia,pins = "sdmmc3_dat2_pp3";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat1_pp4 {
+ nvidia,pins = "sdmmc3_dat1_pp4";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ sdmmc3_dat0_pp5 {
+ nvidia,pins = "sdmmc3_dat0_pp5";
+ nvidia,function = "sdmmc3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_mclk_ps0 {
+ nvidia,pins = "cam1_mclk_ps0";
+ nvidia,function = "extperiph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam2_mclk_ps1 {
+ nvidia,pins = "cam2_mclk_ps1";
+ nvidia,function = "extperiph3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_i2c_scl_ps2 {
+ nvidia,pins = "cam_i2c_scl_ps2";
+ nvidia,function = "i2cvi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ cam_i2c_sda_ps3 {
+ nvidia,pins = "cam_i2c_sda_ps3";
+ nvidia,function = "i2cvi";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ cam_rst_ps4 {
+ nvidia,pins = "cam_rst_ps4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_af_en_ps5 {
+ nvidia,pins = "cam_af_en_ps5";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam_flash_en_ps6 {
+ nvidia,pins = "cam_flash_en_ps6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_pwdn_ps7 {
+ nvidia,pins = "cam1_pwdn_ps7";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam2_pwdn_pt0 {
+ nvidia,pins = "cam2_pwdn_pt0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cam1_strobe_pt1 {
+ nvidia,pins = "cam1_strobe_pt1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_tx_pu0 {
+ nvidia,pins = "uart1_tx_pu0";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_rx_pu1 {
+ nvidia,pins = "uart1_rx_pu1";
+ nvidia,function = "uarta";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_rts_pu2 {
+ nvidia,pins = "uart1_rts_pu2";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ uart1_cts_pu3 {
+ nvidia,pins = "uart1_cts_pu3";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_bl_pwm_pv0 {
+ nvidia,pins = "lcd_bl_pwm_pv0";
+ nvidia,function = "pwm0";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_bl_en_pv1 {
+ nvidia,pins = "lcd_bl_en_pv1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_rst_pv2 {
+ nvidia,pins = "lcd_rst_pv2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_gpio1_pv3 {
+ nvidia,pins = "lcd_gpio1_pv3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_gpio2_pv4 {
+ nvidia,pins = "lcd_gpio2_pv4";
+ nvidia,function = "pwm1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ ap_ready_pv5 {
+ nvidia,pins = "ap_ready_pv5";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_rst_pv6 {
+ nvidia,pins = "touch_rst_pv6";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_clk_pv7 {
+ nvidia,pins = "touch_clk_pv7";
+ nvidia,function = "touch";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ modem_wake_ap_px0 {
+ nvidia,pins = "modem_wake_ap_px0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ touch_int_px1 {
+ nvidia,pins = "touch_int_px1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ motion_int_px2 {
+ nvidia,pins = "motion_int_px2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ als_prox_int_px3 {
+ nvidia,pins = "als_prox_int_px3";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ temp_alert_px4 {
+ nvidia,pins = "temp_alert_px4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_power_on_px5 {
+ nvidia,pins = "button_power_on_px5";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_vol_up_px6 {
+ nvidia,pins = "button_vol_up_px6";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_vol_down_px7 {
+ nvidia,pins = "button_vol_down_px7";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_slide_sw_py0 {
+ nvidia,pins = "button_slide_sw_py0";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ button_home_py1 {
+ nvidia,pins = "button_home_py1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ lcd_te_py2 {
+ nvidia,pins = "lcd_te_py2";
+ nvidia,function = "displaya";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_i2c_scl_py3 {
+ nvidia,pins = "pwr_i2c_scl_py3";
+ nvidia,function = "i2cpmu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_i2c_sda_py4 {
+ nvidia,pins = "pwr_i2c_sda_py4";
+ nvidia,function = "i2cpmu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ clk_32k_out_py5 {
+ nvidia,pins = "clk_32k_out_py5";
+ nvidia,function = "soc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz0 {
+ nvidia,pins = "pz0";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz1 {
+ nvidia,pins = "pz1";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz2 {
+ nvidia,pins = "pz2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz3 {
+ nvidia,pins = "pz3";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz4 {
+ nvidia,pins = "pz4";
+ nvidia,function = "sdmmc1";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pz5 {
+ nvidia,pins = "pz5";
+ nvidia,function = "soc";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_fs_paa0 {
+ nvidia,pins = "dap2_fs_paa0";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_sclk_paa1 {
+ nvidia,pins = "dap2_sclk_paa1";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_din_paa2 {
+ nvidia,pins = "dap2_din_paa2";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dap2_dout_paa3 {
+ nvidia,pins = "dap2_dout_paa3";
+ nvidia,function = "i2s2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ aud_mclk_pbb0 {
+ nvidia,pins = "aud_mclk_pbb0";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dvfs_pwm_pbb1 {
+ nvidia,pins = "dvfs_pwm_pbb1";
+ nvidia,function = "cldvfs";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ dvfs_clk_pbb2 {
+ nvidia,pins = "dvfs_clk_pbb2";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gpio_x1_aud_pbb3 {
+ nvidia,pins = "gpio_x1_aud_pbb3";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ gpio_x3_aud_pbb4 {
+ nvidia,pins = "gpio_x3_aud_pbb4";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ hdmi_cec_pcc0 {
+ nvidia,pins = "hdmi_cec_pcc0";
+ nvidia,function = "cec";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ hdmi_int_dp_hpd_pcc1 {
+ nvidia,pins = "hdmi_int_dp_hpd_pcc1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ spdif_out_pcc2 {
+ nvidia,pins = "spdif_out_pcc2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ spdif_in_pcc3 {
+ nvidia,pins = "spdif_in_pcc3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ usb_vbus_en0_pcc4 {
+ nvidia,pins = "usb_vbus_en0_pcc4";
+ nvidia,function = "usb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ usb_vbus_en1_pcc5 {
+ nvidia,pins = "usb_vbus_en1_pcc5";
+ nvidia,function = "usb";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ dp_hpd0_pcc6 {
+ nvidia,pins = "dp_hpd0_pcc6";
+ nvidia,function = "dp";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pcc7 {
+ nvidia,pins = "pcc7";
+ nvidia,function = "rsvd0";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ nvidia,io-hv = <TEGRA_PIN_DISABLE>;
+ };
+ spi2_cs1_pdd0 {
+ nvidia,pins = "spi2_cs1_pdd0";
+ nvidia,function = "spi2";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_sck_pee0 {
+ nvidia,pins = "qspi_sck_pee0";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_cs_n_pee1 {
+ nvidia,pins = "qspi_cs_n_pee1";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io0_pee2 {
+ nvidia,pins = "qspi_io0_pee2";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io1_pee3 {
+ nvidia,pins = "qspi_io1_pee3";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io2_pee4 {
+ nvidia,pins = "qspi_io2_pee4";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ qspi_io3_pee5 {
+ nvidia,pins = "qspi_io3_pee5";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ core_pwr_req {
+ nvidia,pins = "core_pwr_req";
+ nvidia,function = "core";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ cpu_pwr_req {
+ nvidia,pins = "cpu_pwr_req";
+ nvidia,function = "cpu";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ pwr_int_n {
+ nvidia,pins = "pwr_int_n";
+ nvidia,function = "pmi";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ clk_32k_in {
+ nvidia,pins = "clk_32k_in";
+ nvidia,function = "clk";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ jtag_rtck {
+ nvidia,pins = "jtag_rtck";
+ nvidia,function = "jtag";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ clk_req {
+ nvidia,pins = "clk_req";
+ nvidia,function = "rsvd1";
+ nvidia,pull = <TEGRA_PIN_PULL_DOWN>;
+ nvidia,tristate = <TEGRA_PIN_ENABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ shutdown {
+ nvidia,pins = "shutdown";
+ nvidia,function = "shutdown";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_DISABLE>;
+ nvidia,open-drain = <TEGRA_PIN_DISABLE>;
+ };
+ };
+ };
+
+ /* MMC/SD */
+ sdhci@0,700b0000 {
+ status = "okay";
+ bus-width = <4>;
+ no-1-8-v;
+
+ cd-gpios = <&gpio TEGRA_GPIO(Z, 1) GPIO_ACTIVE_LOW>;
+ };
+};
--- /dev/null
+#include <dt-bindings/clock/tegra210-car.h>
+#include <dt-bindings/gpio/tegra-gpio.h>
+#include <dt-bindings/memory/tegra210-mc.h>
+#include <dt-bindings/pinctrl/pinctrl-tegra.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/ {
+ compatible = "nvidia,tegra210";
+ interrupt-parent = <&lic>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ host1x@0,50000000 {
+ compatible = "nvidia,tegra210-host1x", "simple-bus";
+ reg = <0x0 0x50000000 0x0 0x00034000>;
+ interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>, /* syncpt */
+ <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>; /* general */
+ clocks = <&tegra_car TEGRA210_CLK_HOST1X>;
+ clock-names = "host1x";
+ resets = <&tegra_car 28>;
+ reset-names = "host1x";
+
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ ranges = <0x0 0x54000000 0x0 0x54000000 0x0 0x01000000>;
+
+ dpaux1: dpaux@0,54040000 {
+ compatible = "nvidia,tegra210-dpaux";
+ reg = <0x0 0x54040000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_DPAUX1>,
+ <&tegra_car TEGRA210_CLK_PLL_DP>;
+ clock-names = "dpaux", "parent";
+ resets = <&tegra_car 207>;
+ reset-names = "dpaux";
+ status = "disabled";
+ };
+
+ vi@0,54080000 {
+ compatible = "nvidia,tegra210-vi";
+ reg = <0x0 0x54080000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 69 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ tsec@0,54100000 {
+ compatible = "nvidia,tegra210-tsec";
+ reg = <0x0 0x54100000 0x0 0x00040000>;
+ };
+
+ dc@0,54200000 {
+ compatible = "nvidia,tegra210-dc";
+ reg = <0x0 0x54200000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_DISP1>,
+ <&tegra_car TEGRA210_CLK_PLL_P>;
+ clock-names = "dc", "parent";
+ resets = <&tegra_car 27>;
+ reset-names = "dc";
+
+ iommus = <&mc TEGRA_SWGROUP_DC>;
+
+ nvidia,head = <0>;
+ };
+
+ dc@0,54240000 {
+ compatible = "nvidia,tegra210-dc";
+ reg = <0x0 0x54240000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_DISP2>,
+ <&tegra_car TEGRA210_CLK_PLL_P>;
+ clock-names = "dc", "parent";
+ resets = <&tegra_car 26>;
+ reset-names = "dc";
+
+ iommus = <&mc TEGRA_SWGROUP_DCB>;
+
+ nvidia,head = <1>;
+ };
+
+ dsi@0,54300000 {
+ compatible = "nvidia,tegra210-dsi";
+ reg = <0x0 0x54300000 0x0 0x00040000>;
+ clocks = <&tegra_car TEGRA210_CLK_DSIA>,
+ <&tegra_car TEGRA210_CLK_DSIALP>,
+ <&tegra_car TEGRA210_CLK_PLL_D_OUT0>;
+ clock-names = "dsi", "lp", "parent";
+ resets = <&tegra_car 48>;
+ reset-names = "dsi";
+ nvidia,mipi-calibrate = <&mipi 0x0c0>; /* DSIA & DSIB pads */
+
+ status = "disabled";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ vic@0,54340000 {
+ compatible = "nvidia,tegra210-vic";
+ reg = <0x0 0x54340000 0x0 0x00040000>;
+ status = "disabled";
+ };
+
+ nvjpg@0,54380000 {
+ compatible = "nvidia,tegra210-nvjpg";
+ reg = <0x0 0x54380000 0x0 0x00040000>;
+ status = "disabled";
+ };
+
+ dsi@0,54400000 {
+ compatible = "nvidia,tegra210-dsi";
+ reg = <0x0 0x54400000 0x0 0x00040000>;
+ clocks = <&tegra_car TEGRA210_CLK_DSIB>,
+ <&tegra_car TEGRA210_CLK_DSIBLP>,
+ <&tegra_car TEGRA210_CLK_PLL_D_OUT0>;
+ clock-names = "dsi", "lp", "parent";
+ resets = <&tegra_car 82>;
+ reset-names = "dsi";
+ nvidia,mipi-calibrate = <&mipi 0x300>; /* DSIC & DSID pads */
+
+ status = "disabled";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ nvdec@0,54480000 {
+ compatible = "nvidia,tegra210-nvdec";
+ reg = <0x0 0x54480000 0x0 0x00040000>;
+ status = "disabled";
+ };
+
+ nvenc@0,544c0000 {
+ compatible = "nvidia,tegra210-nvenc";
+ reg = <0x0 0x544c0000 0x0 0x00040000>;
+ status = "disabled";
+ };
+
+ tsec@0,54500000 {
+ compatible = "nvidia,tegra210-tsec";
+ reg = <0x0 0x54500000 0x0 0x00040000>;
+ status = "disabled";
+ };
+
+ sor@0,54540000 {
+ compatible = "nvidia,tegra210-sor";
+ reg = <0x0 0x54540000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SOR0>,
+ <&tegra_car TEGRA210_CLK_PLL_D_OUT0>,
+ <&tegra_car TEGRA210_CLK_PLL_DP>,
+ <&tegra_car TEGRA210_CLK_SOR_SAFE>;
+ clock-names = "sor", "parent", "dp", "safe";
+ resets = <&tegra_car 182>;
+ reset-names = "sor";
+ status = "disabled";
+ };
+
+ sor@0,54580000 {
+ compatible = "nvidia,tegra210-sor1";
+ reg = <0x0 0x54580000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SOR1>,
+ <&tegra_car TEGRA210_CLK_PLL_D2_OUT0>,
+ <&tegra_car TEGRA210_CLK_PLL_DP>,
+ <&tegra_car TEGRA210_CLK_SOR_SAFE>;
+ clock-names = "sor", "parent", "dp", "safe";
+ resets = <&tegra_car 183>;
+ reset-names = "sor";
+ status = "disabled";
+ };
+
+ dpaux: dpaux@0,545c0000 {
+ compatible = "nvidia,tegra124-dpaux";
+ reg = <0x0 0x545c0000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_DPAUX>,
+ <&tegra_car TEGRA210_CLK_PLL_DP>;
+ clock-names = "dpaux", "parent";
+ resets = <&tegra_car 181>;
+ reset-names = "dpaux";
+ status = "disabled";
+ };
+
+ isp@0,54600000 {
+ compatible = "nvidia,tegra210-isp";
+ reg = <0x0 0x54600000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 71 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ isp@0,54680000 {
+ compatible = "nvidia,tegra210-isp";
+ reg = <0x0 0x54680000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 70 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+ i2c@0,546c0000 {
+ compatible = "nvidia,tegra210-i2c-vi";
+ reg = <0x0 0x546c0000 0x0 0x00040000>;
+ interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+ };
+
+ gic: interrupt-controller@0,50041000 {
+ compatible = "arm,gic-400";
+ #interrupt-cells = <3>;
+ interrupt-controller;
+ reg = <0x0 0x50041000 0x0 0x1000>,
+ <0x0 0x50042000 0x0 0x2000>,
+ <0x0 0x50044000 0x0 0x2000>,
+ <0x0 0x50046000 0x0 0x2000>;
+ interrupts = <GIC_PPI 9
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+ interrupt-parent = <&gic>;
+ };
+
+ gpu@0,57000000 {
+ compatible = "nvidia,gm20b";
+ reg = <0x0 0x57000000 0x0 0x01000000>,
+ <0x0 0x58000000 0x0 0x01000000>;
+ interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "stall", "nonstall";
+ clocks = <&tegra_car TEGRA210_CLK_GPU>,
+ <&tegra_car TEGRA210_CLK_PLL_P_OUT5>;
+ clock-names = "gpu", "pwr";
+ resets = <&tegra_car 184>;
+ reset-names = "gpu";
+ status = "disabled";
+ };
+
+ lic: interrupt-controller@0,60004000 {
+ compatible = "nvidia,tegra210-ictlr";
+ reg = <0x0 0x60004000 0x0 0x40>, /* primary controller */
+ <0x0 0x60004100 0x0 0x40>, /* secondary controller */
+ <0x0 0x60004200 0x0 0x40>, /* tertiary controller */
+ <0x0 0x60004300 0x0 0x40>, /* quaternary controller */
+ <0x0 0x60004400 0x0 0x40>, /* quinary controller */
+ <0x0 0x60004500 0x0 0x40>; /* senary controller */
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ interrupt-parent = <&gic>;
+ };
+
+ timer@0,60005000 {
+ compatible = "nvidia,tegra210-timer", "nvidia,tegra20-timer";
+ reg = <0x0 0x60005000 0x0 0x400>;
+ interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_TIMER>;
+ clock-names = "timer";
+ };
+
+ tegra_car: clock@0,60006000 {
+ compatible = "nvidia,tegra210-car";
+ reg = <0x0 0x60006000 0x0 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ flow-controller@0,60007000 {
+ compatible = "nvidia,tegra210-flowctrl";
+ reg = <0x0 0x60007000 0x0 0x1000>;
+ };
+
+ gpio: gpio@0,6000d000 {
+ compatible = "nvidia,tegra210-gpio", "nvidia,tegra124-gpio", "nvidia,tegra30-gpio";
+ reg = <0x0 0x6000d000 0x0 0x1000>;
+ interrupts = <GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 125 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ };
+
+ apbdma: dma@0,60020000 {
+ compatible = "nvidia,tegra210-apbdma", "nvidia,tegra148-apbdma";
+ reg = <0x0 0x60020000 0x0 0x1400>;
+ interrupts = <GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 132 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 136 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 141 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_APBDMA>;
+ clock-names = "dma";
+ resets = <&tegra_car 34>;
+ reset-names = "dma";
+ #dma-cells = <1>;
+ };
+
+ apbmisc@0,70000800 {
+ compatible = "nvidia,tegra210-apbmisc", "nvidia,tegra20-apbmisc";
+ reg = <0x0 0x70000800 0x0 0x64>, /* Chip revision */
+ <0x0 0x7000e864 0x0 0x04>; /* Strapping options */
+ };
+
+ pinmux: pinmux@0,700008d4 {
+ compatible = "nvidia,tegra210-pinmux";
+ reg = <0x0 0x700008d4 0x0 0x29c>, /* Pad control registers */
+ <0x0 0x70003000 0x0 0x294>; /* Mux registers */
+ };
+
+ /*
+ * There are two serial driver i.e. 8250 based simple serial
+ * driver and APB DMA based serial driver for higher baudrate
+ * and performace. To enable the 8250 based driver, the compatible
+ * is "nvidia,tegra124-uart", "nvidia,tegra20-uart" and to enable
+ * the APB DMA based serial driver, the comptible is
+ * "nvidia,tegra124-hsuart", "nvidia,tegra30-hsuart".
+ */
+ uarta: serial@0,70006000 {
+ compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+ reg = <0x0 0x70006000 0x0 0x40>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_UARTA>;
+ clock-names = "serial";
+ resets = <&tegra_car 6>;
+ reset-names = "serial";
+ dmas = <&apbdma 8>, <&apbdma 8>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ uartb: serial@0,70006040 {
+ compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+ reg = <0x0 0x70006040 0x0 0x40>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_UARTB>;
+ clock-names = "serial";
+ resets = <&tegra_car 7>;
+ reset-names = "serial";
+ dmas = <&apbdma 9>, <&apbdma 9>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ uartc: serial@0,70006200 {
+ compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+ reg = <0x0 0x70006200 0x0 0x40>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 46 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_UARTC>;
+ clock-names = "serial";
+ resets = <&tegra_car 55>;
+ reset-names = "serial";
+ dmas = <&apbdma 10>, <&apbdma 10>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ uartd: serial@0,70006300 {
+ compatible = "nvidia,tegra210-uart", "nvidia,tegra20-uart";
+ reg = <0x0 0x70006300 0x0 0x40>;
+ reg-shift = <2>;
+ interrupts = <GIC_SPI 90 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_UARTD>;
+ clock-names = "serial";
+ resets = <&tegra_car 65>;
+ reset-names = "serial";
+ dmas = <&apbdma 19>, <&apbdma 19>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ pwm: pwm@0,7000a000 {
+ compatible = "nvidia,tegra210-pwm", "nvidia,tegra20-pwm";
+ reg = <0x0 0x7000a000 0x0 0x100>;
+ #pwm-cells = <2>;
+ clocks = <&tegra_car TEGRA210_CLK_PWM>;
+ clock-names = "pwm";
+ resets = <&tegra_car 17>;
+ reset-names = "pwm";
+ status = "disabled";
+ };
+
+ i2c@0,7000c000 {
+ compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000c000 0x0 0x100>;
+ interrupts = <GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_I2C1>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 12>;
+ reset-names = "i2c";
+ dmas = <&apbdma 21>, <&apbdma 21>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000c400 {
+ compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000c400 0x0 0x100>;
+ interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_I2C2>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 54>;
+ reset-names = "i2c";
+ dmas = <&apbdma 22>, <&apbdma 22>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000c500 {
+ compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000c500 0x0 0x100>;
+ interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_I2C3>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 67>;
+ reset-names = "i2c";
+ dmas = <&apbdma 23>, <&apbdma 23>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000c700 {
+ compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000c700 0x0 0x100>;
+ interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_I2C4>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 103>;
+ reset-names = "i2c";
+ dmas = <&apbdma 26>, <&apbdma 26>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000d000 {
+ compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000d000 0x0 0x100>;
+ interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_I2C5>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 47>;
+ reset-names = "i2c";
+ dmas = <&apbdma 24>, <&apbdma 24>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ i2c@0,7000d100 {
+ compatible = "nvidia,tegra210-i2c", "nvidia,tegra114-i2c";
+ reg = <0x0 0x7000d100 0x0 0x100>;
+ interrupts = <GIC_SPI 63 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_I2C6>;
+ clock-names = "div-clk";
+ resets = <&tegra_car 166>;
+ reset-names = "i2c";
+ dmas = <&apbdma 30>, <&apbdma 30>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000d400 {
+ compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000d400 0x0 0x200>;
+ interrupts = <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_SBC1>;
+ clock-names = "spi";
+ resets = <&tegra_car 41>;
+ reset-names = "spi";
+ dmas = <&apbdma 15>, <&apbdma 15>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000d600 {
+ compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000d600 0x0 0x200>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_SBC2>;
+ clock-names = "spi";
+ resets = <&tegra_car 44>;
+ reset-names = "spi";
+ dmas = <&apbdma 16>, <&apbdma 16>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000d800 {
+ compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000d800 0x0 0x200>;
+ interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_SBC3>;
+ clock-names = "spi";
+ resets = <&tegra_car 46>;
+ reset-names = "spi";
+ dmas = <&apbdma 17>, <&apbdma 17>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ spi@0,7000da00 {
+ compatible = "nvidia,tegra210-spi", "nvidia,tegra114-spi";
+ reg = <0x0 0x7000da00 0x0 0x200>;
+ interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_SBC4>;
+ clock-names = "spi";
+ resets = <&tegra_car 68>;
+ reset-names = "spi";
+ dmas = <&apbdma 18>, <&apbdma 18>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ rtc@0,7000e000 {
+ compatible = "nvidia,tegra210-rtc", "nvidia,tegra20-rtc";
+ reg = <0x0 0x7000e000 0x0 0x100>;
+ interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_RTC>;
+ clock-names = "rtc";
+ };
+
+ pmc: pmc@0,7000e400 {
+ compatible = "nvidia,tegra210-pmc";
+ reg = <0x0 0x7000e400 0x0 0x400>;
+ clocks = <&tegra_car TEGRA210_CLK_PCLK>, <&clk32k_in>;
+ clock-names = "pclk", "clk32k_in";
+
+ #power-domain-cells = <1>;
+ };
+
+ fuse@0,7000f800 {
+ compatible = "nvidia,tegra210-efuse";
+ reg = <0x0 0x7000f800 0x0 0x400>;
+ clocks = <&tegra_car TEGRA210_CLK_FUSE>;
+ clock-names = "fuse";
+ resets = <&tegra_car 39>;
+ reset-names = "fuse";
+ };
+
+ mc: memory-controller@0,70019000 {
+ compatible = "nvidia,tegra210-mc";
+ reg = <0x0 0x70019000 0x0 0x1000>;
+ clocks = <&tegra_car TEGRA210_CLK_MC>;
+ clock-names = "mc";
+
+ interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+ #iommu-cells = <1>;
+ };
+
+ hda@0,70030000 {
+ compatible = "nvidia,tegra210-hda", "nvidia,tegra30-hda";
+ reg = <0x0 0x70030000 0x0 0x10000>;
+ interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_HDA>,
+ <&tegra_car TEGRA210_CLK_HDA2HDMI>,
+ <&tegra_car TEGRA210_CLK_HDA2CODEC_2X>;
+ clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+ resets = <&tegra_car 125>, /* hda */
+ <&tegra_car 128>, /* hda2hdmi */
+ <&tegra_car 111>; /* hda2codec_2x */
+ reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+ status = "disabled";
+ };
+
+ sdhci@0,700b0000 {
+ compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+ reg = <0x0 0x700b0000 0x0 0x200>;
+ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SDMMC1>;
+ clock-names = "sdhci";
+ resets = <&tegra_car 14>;
+ reset-names = "sdhci";
+ status = "disabled";
+ };
+
+ sdhci@0,700b0200 {
+ compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+ reg = <0x0 0x700b0200 0x0 0x200>;
+ interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SDMMC2>;
+ clock-names = "sdhci";
+ resets = <&tegra_car 9>;
+ reset-names = "sdhci";
+ status = "disabled";
+ };
+
+ sdhci@0,700b0400 {
+ compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+ reg = <0x0 0x700b0400 0x0 0x200>;
+ interrupts = <GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SDMMC3>;
+ clock-names = "sdhci";
+ resets = <&tegra_car 69>;
+ reset-names = "sdhci";
+ status = "disabled";
+ };
+
+ sdhci@0,700b0600 {
+ compatible = "nvidia,tegra210-sdhci", "nvidia,tegra124-sdhci";
+ reg = <0x0 0x700b0600 0x0 0x200>;
+ interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA210_CLK_SDMMC4>;
+ clock-names = "sdhci";
+ resets = <&tegra_car 15>;
+ reset-names = "sdhci";
+ status = "disabled";
+ };
+
+ mipi: mipi@0,700e3000 {
+ compatible = "nvidia,tegra210-mipi";
+ reg = <0x0 0x700e3000 0x0 0x100>;
+ clocks = <&tegra_car TEGRA210_CLK_MIPI_CAL>;
+ clock-names = "mipi-cal";
+ #nvidia,mipi-calibrate-cells = <1>;
+ };
+
+ spi@0,70410000 {
+ compatible = "nvidia,tegra210-qspi";
+ reg = <0x0 0x70410000 0x0 0x1000>;
+ interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car TEGRA210_CLK_QSPI>;
+ clock-names = "qspi";
+ resets = <&tegra_car 211>;
+ reset-names = "qspi";
+ dmas = <&apbdma 5>, <&apbdma 5>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
+
+ usb@0,7d000000 {
+ compatible = "nvidia,tegra210-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+ reg = <0x0 0x7d000000 0x0 0x4000>;
+ interrupts = <GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA210_CLK_USBD>;
+ clock-names = "usb";
+ resets = <&tegra_car 22>;
+ reset-names = "usb";
+ nvidia,phy = <&phy1>;
+ status = "disabled";
+ };
+
+ phy1: usb-phy@0,7d000000 {
+ compatible = "nvidia,tegra210-usb-phy", "nvidia,tegra30-usb-phy";
+ reg = <0x0 0x7d000000 0x0 0x4000>,
+ <0x0 0x7d000000 0x0 0x4000>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA210_CLK_USBD>,
+ <&tegra_car TEGRA210_CLK_PLL_U>,
+ <&tegra_car TEGRA210_CLK_USBD>;
+ clock-names = "reg", "pll_u", "utmi-pads";
+ resets = <&tegra_car 22>, <&tegra_car 22>;
+ reset-names = "usb", "utmi-pads";
+ nvidia,hssync-start-delay = <0>;
+ nvidia,idle-wait-delay = <17>;
+ nvidia,elastic-limit = <16>;
+ nvidia,term-range-adj = <6>;
+ nvidia,xcvr-setup = <9>;
+ nvidia,xcvr-lsfslew = <0>;
+ nvidia,xcvr-lsrslew = <3>;
+ nvidia,hssquelch-level = <2>;
+ nvidia,hsdiscon-level = <5>;
+ nvidia,xcvr-hsslew = <12>;
+ nvidia,has-utmi-pad-registers;
+ status = "disabled";
+ };
+
+ usb@0,7d004000 {
+ compatible = "nvidia,tegra210-ehci", "nvidia,tegra30-ehci", "usb-ehci";
+ reg = <0x0 0x7d004000 0x0 0x4000>;
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA210_CLK_USB2>;
+ clock-names = "usb";
+ resets = <&tegra_car 58>;
+ reset-names = "usb";
+ nvidia,phy = <&phy2>;
+ status = "disabled";
+ };
+
+ phy2: usb-phy@0,7d004000 {
+ compatible = "nvidia,tegra210-usb-phy", "nvidia,tegra30-usb-phy";
+ reg = <0x0 0x7d004000 0x0 0x4000>,
+ <0x0 0x7d000000 0x0 0x4000>;
+ phy_type = "utmi";
+ clocks = <&tegra_car TEGRA210_CLK_USB2>,
+ <&tegra_car TEGRA210_CLK_PLL_U>,
+ <&tegra_car TEGRA210_CLK_USBD>;
+ clock-names = "reg", "pll_u", "utmi-pads";
+ resets = <&tegra_car 58>, <&tegra_car 22>;
+ reset-names = "usb", "utmi-pads";
+ nvidia,hssync-start-delay = <0>;
+ nvidia,idle-wait-delay = <17>;
+ nvidia,elastic-limit = <16>;
+ nvidia,term-range-adj = <6>;
+ nvidia,xcvr-setup = <9>;
+ nvidia,xcvr-lsfslew = <0>;
+ nvidia,xcvr-lsrslew = <3>;
+ nvidia,hssquelch-level = <2>;
+ nvidia,hsdiscon-level = <5>;
+ nvidia,xcvr-hsslew = <12>;
+ status = "disabled";
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <1>;
+ };
+
+ cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <2>;
+ };
+
+ cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <3>;
+ };
+ };
+
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <GIC_PPI 13
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 14
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 11
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 10
+ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
+ interrupt-parent = <&gic>;
+ };
+};
-#define NR_syscalls 323 /* length of syscall table */
+#define NR_syscalls 324 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
#define __NR_membarrier 1344
#define __NR_kcmp 1345
#define __NR_mlock2 1346
+#define __NR_copy_file_range 1347
#endif /* _UAPI_ASM_IA64_UNISTD_H */
data8 sys_membarrier
data8 sys_kcmp // 1345
data8 sys_mlock2
+ data8 sys_copy_file_range
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
struct inode *inode = file_inode(file);
int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (!err) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = spufs_mfc_flush(file, NULL);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return err;
}
{
struct dentry *dentry, *tmp;
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
spin_lock(&dentry->d_lock);
if (simple_positive(dentry)) {
}
}
shrink_dcache_parent(dir);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
}
/* Caller must hold parent->i_mutex */
parent = d_inode(dir->d_parent);
ctx = SPUFS_I(d_inode(dir))->i_ctx;
- mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(parent, I_MUTEX_PARENT);
ret = spufs_rmdir(parent, dir);
- mutex_unlock(&parent->i_mutex);
+ inode_unlock(parent);
WARN_ON(ret);
return dcache_dir_close(inode, file);
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
dget(dentry);
inc_nlink(dir);
if (ret)
spufs_rmdir(dir, dentry);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
struct dentry *parent;
parent = dentry->d_parent;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
if (simple_positive(dentry)) {
if (d_is_dir(dentry))
simple_rmdir(d_inode(parent), dentry);
}
d_delete(dentry);
dput(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
}
static void hypfs_delete_tree(struct dentry *root)
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(dentry)) {
dentry = ERR_PTR(-ENOMEM);
d_instantiate(dentry, inode);
dget(dentry);
fail:
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return dentry;
}
}
/**
- * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
* instruction. This function requires explicit ordering with an
- * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation.
+ * arch_wmb_pmem() call.
*/
-static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
+ void *vaddr = (void __force *)addr;
void *vend = vaddr + size;
void *p;
len = copy_from_iter_nocache(vaddr, bytes, i);
if (__iter_needs_pmem_wb(i))
- __arch_wb_cache_pmem(vaddr, bytes);
+ arch_wb_cache_pmem(addr, bytes);
return len;
}
void *vaddr = (void __force *)addr;
memset(vaddr, 0, size);
- __arch_wb_cache_pmem(vaddr, size);
+ arch_wb_cache_pmem(addr, size);
}
static inline bool __arch_has_wmb_pmem(void)
obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
+ blk-lib.o blk-mq.o blk-mq-tag.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
}
if (unlikely(!bip))
- return NULL;
+ return ERR_PTR(-ENOMEM);
memset(bip, 0, sizeof(*bip));
return bip;
err:
mempool_free(bip, bs->bio_integrity_pool);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL(bio_integrity_alloc);
/* Allocate bio integrity payload and integrity vectors */
bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
- if (unlikely(bip == NULL)) {
+ if (IS_ERR(bip)) {
printk(KERN_ERR "could not allocate data integrity bioset\n");
kfree(buf);
- return -EIO;
+ return PTR_ERR(bip);
}
bip->bip_flags |= BIP_BLOCK_INTEGRITY;
BUG_ON(bip_src == NULL);
bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
-
- if (bip == NULL)
- return -EIO;
+ if (IS_ERR(bip))
+ return PTR_ERR(bip);
memcpy(bip->bip_vec, bip_src->bip_vec,
bip_src->bip_vcnt * sizeof(struct bio_vec));
wake_up_all(&q->mq_freeze_wq);
}
+static void blk_rq_timed_out_timer(unsigned long data)
+{
+ struct request_queue *q = (struct request_queue *)data;
+
+ kblockd_schedule_work(&q->timeout_work);
+}
+
struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
{
struct request_queue *q;
if (blk_init_rl(&q->root_rl, q, GFP_KERNEL))
goto fail;
+ INIT_WORK(&q->timeout_work, blk_timeout_work);
q->request_fn = rfn;
q->prep_rq_fn = NULL;
q->unprep_rq_fn = NULL;
+++ /dev/null
-/*
- * Functions related to interrupt-poll handling in the block layer. This
- * is similar to NAPI for network devices.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/blk-iopoll.h>
-#include <linux/delay.h>
-
-#include "blk.h"
-
-static unsigned int blk_iopoll_budget __read_mostly = 256;
-
-static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
-
-/**
- * blk_iopoll_sched - Schedule a run of the iopoll handler
- * @iop: The parent iopoll structure
- *
- * Description:
- * Add this blk_iopoll structure to the pending poll list and trigger the
- * raise of the blk iopoll softirq. The driver must already have gotten a
- * successful return from blk_iopoll_sched_prep() before calling this.
- **/
-void blk_iopoll_sched(struct blk_iopoll *iop)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
- __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_iopoll_sched);
-
-/**
- * __blk_iopoll_complete - Mark this @iop as un-polled again
- * @iop: The parent iopoll structure
- *
- * Description:
- * See blk_iopoll_complete(). This function must be called with interrupts
- * disabled.
- **/
-void __blk_iopoll_complete(struct blk_iopoll *iop)
-{
- list_del(&iop->list);
- smp_mb__before_atomic();
- clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(__blk_iopoll_complete);
-
-/**
- * blk_iopoll_complete - Mark this @iop as un-polled again
- * @iop: The parent iopoll structure
- *
- * Description:
- * If a driver consumes less than the assigned budget in its run of the
- * iopoll handler, it'll end the polled mode by calling this function. The
- * iopoll handler will not be invoked again before blk_iopoll_sched_prep()
- * is called.
- **/
-void blk_iopoll_complete(struct blk_iopoll *iop)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __blk_iopoll_complete(iop);
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_iopoll_complete);
-
-static void blk_iopoll_softirq(struct softirq_action *h)
-{
- struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
- int rearm = 0, budget = blk_iopoll_budget;
- unsigned long start_time = jiffies;
-
- local_irq_disable();
-
- while (!list_empty(list)) {
- struct blk_iopoll *iop;
- int work, weight;
-
- /*
- * If softirq window is exhausted then punt.
- */
- if (budget <= 0 || time_after(jiffies, start_time)) {
- rearm = 1;
- break;
- }
-
- local_irq_enable();
-
- /* Even though interrupts have been re-enabled, this
- * access is safe because interrupts can only add new
- * entries to the tail of this list, and only ->poll()
- * calls can remove this head entry from the list.
- */
- iop = list_entry(list->next, struct blk_iopoll, list);
-
- weight = iop->weight;
- work = 0;
- if (test_bit(IOPOLL_F_SCHED, &iop->state))
- work = iop->poll(iop, weight);
-
- budget -= work;
-
- local_irq_disable();
-
- /*
- * Drivers must not modify the iopoll state, if they
- * consume their assigned weight (or more, some drivers can't
- * easily just stop processing, they have to complete an
- * entire mask of commands).In such cases this code
- * still "owns" the iopoll instance and therefore can
- * move the instance around on the list at-will.
- */
- if (work >= weight) {
- if (blk_iopoll_disable_pending(iop))
- __blk_iopoll_complete(iop);
- else
- list_move_tail(&iop->list, list);
- }
- }
-
- if (rearm)
- __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
-
- local_irq_enable();
-}
-
-/**
- * blk_iopoll_disable - Disable iopoll on this @iop
- * @iop: The parent iopoll structure
- *
- * Description:
- * Disable io polling and wait for any pending callbacks to have completed.
- **/
-void blk_iopoll_disable(struct blk_iopoll *iop)
-{
- set_bit(IOPOLL_F_DISABLE, &iop->state);
- while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
- msleep(1);
- clear_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_disable);
-
-/**
- * blk_iopoll_enable - Enable iopoll on this @iop
- * @iop: The parent iopoll structure
- *
- * Description:
- * Enable iopoll on this @iop. Note that the handler run will not be
- * scheduled, it will only mark it as active.
- **/
-void blk_iopoll_enable(struct blk_iopoll *iop)
-{
- BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
- smp_mb__before_atomic();
- clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_enable);
-
-/**
- * blk_iopoll_init - Initialize this @iop
- * @iop: The parent iopoll structure
- * @weight: The default weight (or command completion budget)
- * @poll_fn: The handler to invoke
- *
- * Description:
- * Initialize this blk_iopoll structure. Before being actively used, the
- * driver must call blk_iopoll_enable().
- **/
-void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
-{
- memset(iop, 0, sizeof(*iop));
- INIT_LIST_HEAD(&iop->list);
- iop->weight = weight;
- iop->poll = poll_fn;
- set_bit(IOPOLL_F_SCHED, &iop->state);
-}
-EXPORT_SYMBOL(blk_iopoll_init);
-
-static int blk_iopoll_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- /*
- * If a CPU goes away, splice its entries to the current CPU
- * and trigger a run of the softirq
- */
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
- int cpu = (unsigned long) hcpu;
-
- local_irq_disable();
- list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
- this_cpu_ptr(&blk_cpu_iopoll));
- __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
- local_irq_enable();
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block blk_iopoll_cpu_notifier = {
- .notifier_call = blk_iopoll_cpu_notify,
-};
-
-static __init int blk_iopoll_setup(void)
-{
- int i;
-
- for_each_possible_cpu(i)
- INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
-
- open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
- register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
- return 0;
-}
-subsys_initcall(blk_iopoll_setup);
blk_mq_complete_request(rq, -EIO);
return;
}
- if (rq->cmd_flags & REQ_NO_TIMEOUT)
- return;
if (time_after_eq(jiffies, rq->deadline)) {
if (!blk_mark_rq_complete(rq))
}
}
-static void blk_mq_rq_timer(unsigned long priv)
+static void blk_mq_timeout_work(struct work_struct *work)
{
- struct request_queue *q = (struct request_queue *)priv;
+ struct request_queue *q =
+ container_of(work, struct request_queue, timeout_work);
struct blk_mq_timeout_data data = {
.next = 0,
.next_set = 0,
};
int i;
+ if (blk_queue_enter(q, true))
+ return;
+
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
if (data.next_set) {
blk_mq_tag_idle(hctx);
}
}
+ blk_queue_exit(q);
}
/*
hctxs[i]->queue_num = i;
}
- setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
+ INIT_WORK(&q->timeout_work, blk_mq_timeout_work);
blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ);
q->nr_queues = nr_cpu_ids;
}
}
-void blk_rq_timed_out_timer(unsigned long data)
+void blk_timeout_work(struct work_struct *work)
{
- struct request_queue *q = (struct request_queue *) data;
+ struct request_queue *q =
+ container_of(work, struct request_queue, timeout_work);
unsigned long flags, next = 0;
struct request *rq, *tmp;
int next_set = 0;
+ if (blk_queue_enter(q, true))
+ return;
spin_lock_irqsave(q->queue_lock, flags);
list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list)
mod_timer(&q->timeout, round_jiffies_up(next));
spin_unlock_irqrestore(q->queue_lock, flags);
+ blk_queue_exit(q);
}
/**
struct request_queue *q = req->q;
unsigned long expiry;
- if (req->cmd_flags & REQ_NO_TIMEOUT)
- return;
-
/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
if (!q->mq_ops && !q->rq_timed_out_fn)
return;
}
#endif
-void blk_rq_timed_out_timer(unsigned long data);
+void blk_timeout_work(struct work_struct *work);
unsigned long blk_rq_timeout(unsigned long timeout);
void blk_add_timer(struct request *req);
void blk_delete_timer(struct request *);
if (arg && !blkdev_dax_capable(bdev))
return -ENOTTY;
- mutex_lock(&bdev->bd_inode->i_mutex);
+ inode_lock(bdev->bd_inode);
if (bdev->bd_map_count == 0)
inode_set_flags(bdev->bd_inode, arg, S_DAX);
else
rc = -EBUSY;
- mutex_unlock(&bdev->bd_inode->i_mutex);
+ inode_unlock(bdev->bd_inode);
return rc;
}
#else
goto unlock;
type->ops->owner = THIS_MODULE;
+ if (type->ops_nokey)
+ type->ops_nokey->owner = THIS_MODULE;
node->type = type;
list_add(&node->list, &alg_types);
err = 0;
}
EXPORT_SYMBOL_GPL(af_alg_release);
+void af_alg_release_parent(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ unsigned int nokey = ask->nokey_refcnt;
+ bool last = nokey && !ask->refcnt;
+
+ sk = ask->parent;
+ ask = alg_sk(sk);
+
+ lock_sock(sk);
+ ask->nokey_refcnt -= nokey;
+ if (!last)
+ last = !--ask->refcnt;
+ release_sock(sk);
+
+ if (last)
+ sock_put(sk);
+}
+EXPORT_SYMBOL_GPL(af_alg_release_parent);
+
static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
const u32 forbidden = CRYPTO_ALG_INTERNAL;
struct sockaddr_alg *sa = (void *)uaddr;
const struct af_alg_type *type;
void *private;
+ int err;
if (sock->state == SS_CONNECTED)
return -EINVAL;
return PTR_ERR(private);
}
+ err = -EBUSY;
lock_sock(sk);
+ if (ask->refcnt | ask->nokey_refcnt)
+ goto unlock;
swap(ask->type, type);
swap(ask->private, private);
+ err = 0;
+
+unlock:
release_sock(sk);
alg_do_release(type, private);
- return 0;
+ return err;
}
static int alg_setkey(struct sock *sk, char __user *ukey,
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type;
- int err = -ENOPROTOOPT;
+ int err = -EBUSY;
lock_sock(sk);
+ if (ask->refcnt)
+ goto unlock;
+
type = ask->type;
+ err = -ENOPROTOOPT;
if (level != SOL_ALG || !type)
goto unlock;
struct alg_sock *ask = alg_sk(sk);
const struct af_alg_type *type;
struct sock *sk2;
+ unsigned int nokey;
int err;
lock_sock(sk);
security_sk_clone(sk, sk2);
err = type->accept(ask->private, sk2);
- if (err) {
- sk_free(sk2);
+
+ nokey = err == -ENOKEY;
+ if (nokey && type->accept_nokey)
+ err = type->accept_nokey(ask->private, sk2);
+
+ if (err)
goto unlock;
- }
sk2->sk_family = PF_ALG;
- sock_hold(sk);
+ if (nokey || !ask->refcnt++)
+ sock_hold(sk);
+ ask->nokey_refcnt += nokey;
alg_sk(sk2)->parent = sk;
alg_sk(sk2)->type = type;
+ alg_sk(sk2)->nokey_refcnt = nokey;
newsock->ops = type->ops;
newsock->state = SS_CONNECTED;
+ if (nokey)
+ newsock->ops = type->ops_nokey;
+
err = 0;
unlock:
struct ahash_alg *alg = crypto_ahash_alg(hash);
hash->setkey = ahash_nosetkey;
+ hash->has_setkey = false;
hash->export = ahash_no_export;
hash->import = ahash_no_import;
hash->finup = alg->finup ?: ahash_def_finup;
hash->digest = alg->digest;
- if (alg->setkey)
+ if (alg->setkey) {
hash->setkey = alg->setkey;
+ hash->has_setkey = true;
+ }
if (alg->export)
hash->export = alg->export;
if (alg->import)
struct ahash_request req;
};
+struct algif_hash_tfm {
+ struct crypto_ahash *hash;
+ bool has_key;
+};
+
static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
size_t ignored)
{
.accept = hash_accept,
};
+static int hash_check_key(struct socket *sock)
+{
+ int err = 0;
+ struct sock *psk;
+ struct alg_sock *pask;
+ struct algif_hash_tfm *tfm;
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+
+ lock_sock(sk);
+ if (ask->refcnt)
+ goto unlock_child;
+
+ psk = ask->parent;
+ pask = alg_sk(ask->parent);
+ tfm = pask->private;
+
+ err = -ENOKEY;
+ lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
+ if (!tfm->has_key)
+ goto unlock;
+
+ if (!pask->refcnt++)
+ sock_hold(psk);
+
+ ask->refcnt = 1;
+ sock_put(psk);
+
+ err = 0;
+
+unlock:
+ release_sock(psk);
+unlock_child:
+ release_sock(sk);
+
+ return err;
+}
+
+static int hash_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
+ size_t size)
+{
+ int err;
+
+ err = hash_check_key(sock);
+ if (err)
+ return err;
+
+ return hash_sendmsg(sock, msg, size);
+}
+
+static ssize_t hash_sendpage_nokey(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags)
+{
+ int err;
+
+ err = hash_check_key(sock);
+ if (err)
+ return err;
+
+ return hash_sendpage(sock, page, offset, size, flags);
+}
+
+static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
+ size_t ignored, int flags)
+{
+ int err;
+
+ err = hash_check_key(sock);
+ if (err)
+ return err;
+
+ return hash_recvmsg(sock, msg, ignored, flags);
+}
+
+static int hash_accept_nokey(struct socket *sock, struct socket *newsock,
+ int flags)
+{
+ int err;
+
+ err = hash_check_key(sock);
+ if (err)
+ return err;
+
+ return hash_accept(sock, newsock, flags);
+}
+
+static struct proto_ops algif_hash_ops_nokey = {
+ .family = PF_ALG,
+
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .getname = sock_no_getname,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .getsockopt = sock_no_getsockopt,
+ .mmap = sock_no_mmap,
+ .bind = sock_no_bind,
+ .setsockopt = sock_no_setsockopt,
+ .poll = sock_no_poll,
+
+ .release = af_alg_release,
+ .sendmsg = hash_sendmsg_nokey,
+ .sendpage = hash_sendpage_nokey,
+ .recvmsg = hash_recvmsg_nokey,
+ .accept = hash_accept_nokey,
+};
+
static void *hash_bind(const char *name, u32 type, u32 mask)
{
- return crypto_alloc_ahash(name, type, mask);
+ struct algif_hash_tfm *tfm;
+ struct crypto_ahash *hash;
+
+ tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+ if (!tfm)
+ return ERR_PTR(-ENOMEM);
+
+ hash = crypto_alloc_ahash(name, type, mask);
+ if (IS_ERR(hash)) {
+ kfree(tfm);
+ return ERR_CAST(hash);
+ }
+
+ tfm->hash = hash;
+
+ return tfm;
}
static void hash_release(void *private)
{
- crypto_free_ahash(private);
+ struct algif_hash_tfm *tfm = private;
+
+ crypto_free_ahash(tfm->hash);
+ kfree(tfm);
}
static int hash_setkey(void *private, const u8 *key, unsigned int keylen)
{
- return crypto_ahash_setkey(private, key, keylen);
+ struct algif_hash_tfm *tfm = private;
+ int err;
+
+ err = crypto_ahash_setkey(tfm->hash, key, keylen);
+ tfm->has_key = !err;
+
+ return err;
}
static void hash_sock_destruct(struct sock *sk)
af_alg_release_parent(sk);
}
-static int hash_accept_parent(void *private, struct sock *sk)
+static int hash_accept_parent_nokey(void *private, struct sock *sk)
{
struct hash_ctx *ctx;
struct alg_sock *ask = alg_sk(sk);
- unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private);
- unsigned ds = crypto_ahash_digestsize(private);
+ struct algif_hash_tfm *tfm = private;
+ struct crypto_ahash *hash = tfm->hash;
+ unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(hash);
+ unsigned ds = crypto_ahash_digestsize(hash);
ctx = sock_kmalloc(sk, len, GFP_KERNEL);
if (!ctx)
ask->private = ctx;
- ahash_request_set_tfm(&ctx->req, private);
+ ahash_request_set_tfm(&ctx->req, hash);
ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
af_alg_complete, &ctx->completion);
return 0;
}
+static int hash_accept_parent(void *private, struct sock *sk)
+{
+ struct algif_hash_tfm *tfm = private;
+
+ if (!tfm->has_key && crypto_ahash_has_setkey(tfm->hash))
+ return -ENOKEY;
+
+ return hash_accept_parent_nokey(private, sk);
+}
+
static const struct af_alg_type algif_type_hash = {
.bind = hash_bind,
.release = hash_release,
.setkey = hash_setkey,
.accept = hash_accept_parent,
+ .accept_nokey = hash_accept_parent_nokey,
.ops = &algif_hash_ops,
+ .ops_nokey = &algif_hash_ops_nokey,
.name = "hash",
.owner = THIS_MODULE
};
struct scatterlist sg[0];
};
+struct skcipher_tfm {
+ struct crypto_skcipher *skcipher;
+ bool has_key;
+};
+
struct skcipher_ctx {
struct list_head tsgl;
struct af_alg_sgl rsgl;
sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list);
sg = sgl->sg;
- sg_unmark_end(sg + sgl->cur);
+ if (sgl->cur)
+ sg_unmark_end(sg + sgl->cur - 1);
do {
i = sgl->cur;
plen = min_t(size_t, len, PAGE_SIZE);
lock_sock(sk);
while (msg_data_left(msg)) {
- sgl = list_first_entry(&ctx->tsgl,
- struct skcipher_sg_list, list);
- sg = sgl->sg;
-
- while (!sg->length)
- sg++;
-
if (!ctx->used) {
err = skcipher_wait_for_data(sk, flags);
if (err)
if (!used)
goto free;
+ sgl = list_first_entry(&ctx->tsgl,
+ struct skcipher_sg_list, list);
+ sg = sgl->sg;
+
+ while (!sg->length)
+ sg++;
+
skcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used,
ctx->iv);
.poll = skcipher_poll,
};
+static int skcipher_check_key(struct socket *sock)
+{
+ int err = 0;
+ struct sock *psk;
+ struct alg_sock *pask;
+ struct skcipher_tfm *tfm;
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+
+ lock_sock(sk);
+ if (ask->refcnt)
+ goto unlock_child;
+
+ psk = ask->parent;
+ pask = alg_sk(ask->parent);
+ tfm = pask->private;
+
+ err = -ENOKEY;
+ lock_sock_nested(psk, SINGLE_DEPTH_NESTING);
+ if (!tfm->has_key)
+ goto unlock;
+
+ if (!pask->refcnt++)
+ sock_hold(psk);
+
+ ask->refcnt = 1;
+ sock_put(psk);
+
+ err = 0;
+
+unlock:
+ release_sock(psk);
+unlock_child:
+ release_sock(sk);
+
+ return err;
+}
+
+static int skcipher_sendmsg_nokey(struct socket *sock, struct msghdr *msg,
+ size_t size)
+{
+ int err;
+
+ err = skcipher_check_key(sock);
+ if (err)
+ return err;
+
+ return skcipher_sendmsg(sock, msg, size);
+}
+
+static ssize_t skcipher_sendpage_nokey(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags)
+{
+ int err;
+
+ err = skcipher_check_key(sock);
+ if (err)
+ return err;
+
+ return skcipher_sendpage(sock, page, offset, size, flags);
+}
+
+static int skcipher_recvmsg_nokey(struct socket *sock, struct msghdr *msg,
+ size_t ignored, int flags)
+{
+ int err;
+
+ err = skcipher_check_key(sock);
+ if (err)
+ return err;
+
+ return skcipher_recvmsg(sock, msg, ignored, flags);
+}
+
+static struct proto_ops algif_skcipher_ops_nokey = {
+ .family = PF_ALG,
+
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .getname = sock_no_getname,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .getsockopt = sock_no_getsockopt,
+ .mmap = sock_no_mmap,
+ .bind = sock_no_bind,
+ .accept = sock_no_accept,
+ .setsockopt = sock_no_setsockopt,
+
+ .release = af_alg_release,
+ .sendmsg = skcipher_sendmsg_nokey,
+ .sendpage = skcipher_sendpage_nokey,
+ .recvmsg = skcipher_recvmsg_nokey,
+ .poll = skcipher_poll,
+};
+
static void *skcipher_bind(const char *name, u32 type, u32 mask)
{
- return crypto_alloc_skcipher(name, type, mask);
+ struct skcipher_tfm *tfm;
+ struct crypto_skcipher *skcipher;
+
+ tfm = kzalloc(sizeof(*tfm), GFP_KERNEL);
+ if (!tfm)
+ return ERR_PTR(-ENOMEM);
+
+ skcipher = crypto_alloc_skcipher(name, type, mask);
+ if (IS_ERR(skcipher)) {
+ kfree(tfm);
+ return ERR_CAST(skcipher);
+ }
+
+ tfm->skcipher = skcipher;
+
+ return tfm;
}
static void skcipher_release(void *private)
{
- crypto_free_skcipher(private);
+ struct skcipher_tfm *tfm = private;
+
+ crypto_free_skcipher(tfm->skcipher);
+ kfree(tfm);
}
static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
{
- return crypto_skcipher_setkey(private, key, keylen);
+ struct skcipher_tfm *tfm = private;
+ int err;
+
+ err = crypto_skcipher_setkey(tfm->skcipher, key, keylen);
+ tfm->has_key = !err;
+
+ return err;
}
static void skcipher_wait(struct sock *sk)
af_alg_release_parent(sk);
}
-static int skcipher_accept_parent(void *private, struct sock *sk)
+static int skcipher_accept_parent_nokey(void *private, struct sock *sk)
{
struct skcipher_ctx *ctx;
struct alg_sock *ask = alg_sk(sk);
- unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(private);
+ struct skcipher_tfm *tfm = private;
+ struct crypto_skcipher *skcipher = tfm->skcipher;
+ unsigned int len = sizeof(*ctx) + crypto_skcipher_reqsize(skcipher);
ctx = sock_kmalloc(sk, len, GFP_KERNEL);
if (!ctx)
return -ENOMEM;
- ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(private),
+ ctx->iv = sock_kmalloc(sk, crypto_skcipher_ivsize(skcipher),
GFP_KERNEL);
if (!ctx->iv) {
sock_kfree_s(sk, ctx, len);
return -ENOMEM;
}
- memset(ctx->iv, 0, crypto_skcipher_ivsize(private));
+ memset(ctx->iv, 0, crypto_skcipher_ivsize(skcipher));
INIT_LIST_HEAD(&ctx->tsgl);
ctx->len = len;
ask->private = ctx;
- skcipher_request_set_tfm(&ctx->req, private);
+ skcipher_request_set_tfm(&ctx->req, skcipher);
skcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG,
af_alg_complete, &ctx->completion);
return 0;
}
+static int skcipher_accept_parent(void *private, struct sock *sk)
+{
+ struct skcipher_tfm *tfm = private;
+
+ if (!tfm->has_key && crypto_skcipher_has_setkey(tfm->skcipher))
+ return -ENOKEY;
+
+ return skcipher_accept_parent_nokey(private, sk);
+}
+
static const struct af_alg_type algif_type_skcipher = {
.bind = skcipher_bind,
.release = skcipher_release,
.setkey = skcipher_setkey,
.accept = skcipher_accept_parent,
+ .accept_nokey = skcipher_accept_parent_nokey,
.ops = &algif_skcipher_ops,
+ .ops_nokey = &algif_skcipher_ops_nokey,
.name = "skcipher",
.owner = THIS_MODULE
};
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crc32c");
MODULE_ALIAS_CRYPTO("crc32c-generic");
-MODULE_SOFTDEP("pre: crc32c");
crt->finup = shash_async_finup;
crt->digest = shash_async_digest;
- if (alg->setkey)
+ if (alg->setkey) {
crt->setkey = shash_async_setkey;
+ crt->has_setkey = true;
+ }
if (alg->export)
crt->export = shash_async_export;
if (alg->import)
skcipher->decrypt = skcipher_decrypt_blkcipher;
skcipher->ivsize = crypto_blkcipher_ivsize(blkcipher);
+ skcipher->has_setkey = calg->cra_blkcipher.max_keysize;
return 0;
}
skcipher->ivsize = crypto_ablkcipher_ivsize(ablkcipher);
skcipher->reqsize = crypto_ablkcipher_reqsize(ablkcipher) +
sizeof(struct ablkcipher_request);
+ skcipher->has_setkey = calg->cra_ablkcipher.max_keysize;
return 0;
}
#include <linux/hardirq.h>
#include <linux/pstore.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h> /* kvfree() */
#include <acpi/apei.h>
#include "apei-internal.h"
return -ENOMEM;
memcpy(new_entries, entries,
erst_record_id_cache.len * sizeof(entries[0]));
- if (erst_record_id_cache.size < PAGE_SIZE)
- kfree(entries);
- else
- vfree(entries);
+ kvfree(entries);
erst_record_id_cache.entries = entries = new_entries;
erst_record_id_cache.size = new_size;
}
if ARM_AMBA
config TEGRA_AHB
- bool "Enable AHB driver for NVIDIA Tegra SoCs"
+ bool
default y if ARCH_TEGRA
help
Adds AHB configuration functionality for NVIDIA Tegra SoCs,
newattrs.ia_uid = uid;
newattrs.ia_gid = gid;
newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
/* mark as kernel-created inode */
d_inode(dentry)->i_private = &thread;
err = -ENOENT;
}
dput(dentry);
- mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+ inode_unlock(d_inode(parent.dentry));
path_put(&parent);
return err;
}
newattrs.ia_mode = stat.mode & ~0777;
newattrs.ia_valid =
ATTR_UID|ATTR_GID|ATTR_MODE;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
if (!err || err == -ENOENT)
deleted = 1;
err = -ENOENT;
}
dput(dentry);
- mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+ inode_unlock(d_inode(parent.dentry));
path_put(&parent);
if (deleted && strchr(nodename, '/'))
ssize = get_capacity(d->gd);
bd = bdget_disk(d->gd, 0);
if (bd) {
- mutex_lock(&bd->bd_inode->i_mutex);
+ inode_lock(bd->bd_inode);
i_size_write(bd->bd_inode, (loff_t)ssize<<9);
- mutex_unlock(&bd->bd_inode->i_mutex);
+ inode_unlock(bd->bd_inode);
bdput(bd);
}
spin_lock_irq(&d->lock);
return need_transaction;
}
-static int al_write_transaction(struct drbd_device *device);
+#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
+/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
+ * are still coupled, or assume too much about their relation.
+ * Code below will not work if this is violated.
+ * Will be cleaned up with some followup patch.
+ */
+# error FIXME
+#endif
+
+static unsigned int al_extent_to_bm_page(unsigned int al_enr)
+{
+ return al_enr >>
+ /* bit to page */
+ ((PAGE_SHIFT + 3) -
+ /* al extent number to bit */
+ (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
+}
+
+static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
+{
+ const unsigned int stripes = device->ldev->md.al_stripes;
+ const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
+
+ /* transaction number, modulo on-disk ring buffer wrap around */
+ unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
+
+ /* ... to aligned 4k on disk block */
+ t = ((t % stripes) * stripe_size_4kB) + t/stripes;
+
+ /* ... to 512 byte sector in activity log */
+ t *= 8;
+
+ /* ... plus offset to the on disk position */
+ return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
+}
+
+static int __al_write_transaction(struct drbd_device *device, struct al_transaction_on_disk *buffer)
+{
+ struct lc_element *e;
+ sector_t sector;
+ int i, mx;
+ unsigned extent_nr;
+ unsigned crc = 0;
+ int err = 0;
+
+ memset(buffer, 0, sizeof(*buffer));
+ buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
+ buffer->tr_number = cpu_to_be32(device->al_tr_number);
+
+ i = 0;
+
+ /* Even though no one can start to change this list
+ * once we set the LC_LOCKED -- from drbd_al_begin_io(),
+ * lc_try_lock_for_transaction() --, someone may still
+ * be in the process of changing it. */
+ spin_lock_irq(&device->al_lock);
+ list_for_each_entry(e, &device->act_log->to_be_changed, list) {
+ if (i == AL_UPDATES_PER_TRANSACTION) {
+ i++;
+ break;
+ }
+ buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
+ buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
+ if (e->lc_number != LC_FREE)
+ drbd_bm_mark_for_writeout(device,
+ al_extent_to_bm_page(e->lc_number));
+ i++;
+ }
+ spin_unlock_irq(&device->al_lock);
+ BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
+
+ buffer->n_updates = cpu_to_be16(i);
+ for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
+ buffer->update_slot_nr[i] = cpu_to_be16(-1);
+ buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
+ }
+
+ buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
+ buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
+
+ mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
+ device->act_log->nr_elements - device->al_tr_cycle);
+ for (i = 0; i < mx; i++) {
+ unsigned idx = device->al_tr_cycle + i;
+ extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
+ buffer->context[i] = cpu_to_be32(extent_nr);
+ }
+ for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
+ buffer->context[i] = cpu_to_be32(LC_FREE);
+
+ device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
+ if (device->al_tr_cycle >= device->act_log->nr_elements)
+ device->al_tr_cycle = 0;
+
+ sector = al_tr_number_to_on_disk_sector(device);
+
+ crc = crc32c(0, buffer, 4096);
+ buffer->crc32c = cpu_to_be32(crc);
+
+ if (drbd_bm_write_hinted(device))
+ err = -EIO;
+ else {
+ bool write_al_updates;
+ rcu_read_lock();
+ write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
+ rcu_read_unlock();
+ if (write_al_updates) {
+ if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
+ err = -EIO;
+ drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
+ } else {
+ device->al_tr_number++;
+ device->al_writ_cnt++;
+ }
+ }
+ }
+
+ return err;
+}
+
+static int al_write_transaction(struct drbd_device *device)
+{
+ struct al_transaction_on_disk *buffer;
+ int err;
+
+ if (!get_ldev(device)) {
+ drbd_err(device, "disk is %s, cannot start al transaction\n",
+ drbd_disk_str(device->state.disk));
+ return -EIO;
+ }
+
+ /* The bitmap write may have failed, causing a state change. */
+ if (device->state.disk < D_INCONSISTENT) {
+ drbd_err(device,
+ "disk is %s, cannot write al transaction\n",
+ drbd_disk_str(device->state.disk));
+ put_ldev(device);
+ return -EIO;
+ }
+
+ /* protects md_io_buffer, al_tr_cycle, ... */
+ buffer = drbd_md_get_buffer(device, __func__);
+ if (!buffer) {
+ drbd_err(device, "disk failed while waiting for md_io buffer\n");
+ put_ldev(device);
+ return -ENODEV;
+ }
+
+ err = __al_write_transaction(device, buffer);
+
+ drbd_md_put_buffer(device);
+ put_ldev(device);
+
+ return err;
+}
+
void drbd_al_begin_io_commit(struct drbd_device *device)
{
wake_up(&device->al_wait);
}
-#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
-/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
- * are still coupled, or assume too much about their relation.
- * Code below will not work if this is violated.
- * Will be cleaned up with some followup patch.
- */
-# error FIXME
-#endif
-
-static unsigned int al_extent_to_bm_page(unsigned int al_enr)
-{
- return al_enr >>
- /* bit to page */
- ((PAGE_SHIFT + 3) -
- /* al extent number to bit */
- (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
-}
-
-static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
-{
- const unsigned int stripes = device->ldev->md.al_stripes;
- const unsigned int stripe_size_4kB = device->ldev->md.al_stripe_size_4k;
-
- /* transaction number, modulo on-disk ring buffer wrap around */
- unsigned int t = device->al_tr_number % (device->ldev->md.al_size_4k);
-
- /* ... to aligned 4k on disk block */
- t = ((t % stripes) * stripe_size_4kB) + t/stripes;
-
- /* ... to 512 byte sector in activity log */
- t *= 8;
-
- /* ... plus offset to the on disk position */
- return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
-}
-
-int al_write_transaction(struct drbd_device *device)
-{
- struct al_transaction_on_disk *buffer;
- struct lc_element *e;
- sector_t sector;
- int i, mx;
- unsigned extent_nr;
- unsigned crc = 0;
- int err = 0;
-
- if (!get_ldev(device)) {
- drbd_err(device, "disk is %s, cannot start al transaction\n",
- drbd_disk_str(device->state.disk));
- return -EIO;
- }
-
- /* The bitmap write may have failed, causing a state change. */
- if (device->state.disk < D_INCONSISTENT) {
- drbd_err(device,
- "disk is %s, cannot write al transaction\n",
- drbd_disk_str(device->state.disk));
- put_ldev(device);
- return -EIO;
- }
-
- /* protects md_io_buffer, al_tr_cycle, ... */
- buffer = drbd_md_get_buffer(device, __func__);
- if (!buffer) {
- drbd_err(device, "disk failed while waiting for md_io buffer\n");
- put_ldev(device);
- return -ENODEV;
- }
-
- memset(buffer, 0, sizeof(*buffer));
- buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
- buffer->tr_number = cpu_to_be32(device->al_tr_number);
-
- i = 0;
-
- /* Even though no one can start to change this list
- * once we set the LC_LOCKED -- from drbd_al_begin_io(),
- * lc_try_lock_for_transaction() --, someone may still
- * be in the process of changing it. */
- spin_lock_irq(&device->al_lock);
- list_for_each_entry(e, &device->act_log->to_be_changed, list) {
- if (i == AL_UPDATES_PER_TRANSACTION) {
- i++;
- break;
- }
- buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
- buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
- if (e->lc_number != LC_FREE)
- drbd_bm_mark_for_writeout(device,
- al_extent_to_bm_page(e->lc_number));
- i++;
- }
- spin_unlock_irq(&device->al_lock);
- BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
-
- buffer->n_updates = cpu_to_be16(i);
- for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
- buffer->update_slot_nr[i] = cpu_to_be16(-1);
- buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
- }
-
- buffer->context_size = cpu_to_be16(device->act_log->nr_elements);
- buffer->context_start_slot_nr = cpu_to_be16(device->al_tr_cycle);
-
- mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
- device->act_log->nr_elements - device->al_tr_cycle);
- for (i = 0; i < mx; i++) {
- unsigned idx = device->al_tr_cycle + i;
- extent_nr = lc_element_by_index(device->act_log, idx)->lc_number;
- buffer->context[i] = cpu_to_be32(extent_nr);
- }
- for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
- buffer->context[i] = cpu_to_be32(LC_FREE);
-
- device->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
- if (device->al_tr_cycle >= device->act_log->nr_elements)
- device->al_tr_cycle = 0;
-
- sector = al_tr_number_to_on_disk_sector(device);
-
- crc = crc32c(0, buffer, 4096);
- buffer->crc32c = cpu_to_be32(crc);
-
- if (drbd_bm_write_hinted(device))
- err = -EIO;
- else {
- bool write_al_updates;
- rcu_read_lock();
- write_al_updates = rcu_dereference(device->ldev->disk_conf)->al_updates;
- rcu_read_unlock();
- if (write_al_updates) {
- if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
- err = -EIO;
- drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
- } else {
- device->al_tr_number++;
- device->al_writ_cnt++;
- }
- }
- }
-
- drbd_md_put_buffer(device);
- put_ldev(device);
-
- return err;
-}
-
static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
{
int rv;
wake_up(&device->al_wait);
}
-int drbd_initialize_al(struct drbd_device *device, void *buffer)
+int drbd_al_initialize(struct drbd_device *device, void *buffer)
{
struct al_transaction_on_disk *al = buffer;
struct drbd_md *md = &device->ldev->md;
- sector_t al_base = md->md_offset + md->al_offset;
int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
int i;
- memset(al, 0, 4096);
- al->magic = cpu_to_be32(DRBD_AL_MAGIC);
- al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
- al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+ __al_write_transaction(device, al);
+ /* There may or may not have been a pending transaction. */
+ spin_lock_irq(&device->al_lock);
+ lc_committed(device->act_log);
+ spin_unlock_irq(&device->al_lock);
- for (i = 0; i < al_size_4k; i++) {
- int err = drbd_md_sync_page_io(device, device->ldev, al_base + i * 8, WRITE);
+ /* The rest of the transactions will have an empty "updates" list, and
+ * are written out only to provide the context, and to initialize the
+ * on-disk ring buffer. */
+ for (i = 1; i < al_size_4k; i++) {
+ int err = __al_write_transaction(device, al);
if (err)
return err;
}
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/bitops.h>
+#include <linux/bitmap.h>
#include <linux/vmalloc.h>
#include <linux/string.h>
#include <linux/drbd.h>
}
}
-static void bm_vk_free(void *ptr, int v)
+static inline void bm_vk_free(void *ptr)
{
- if (v)
- vfree(ptr);
- else
- kfree(ptr);
+ kvfree(ptr);
}
/*
{
struct page **old_pages = b->bm_pages;
struct page **new_pages, *page;
- unsigned int i, bytes, vmalloced = 0;
+ unsigned int i, bytes;
unsigned long have = b->bm_number_of_pages;
BUG_ON(have == 0 && old_pages != NULL);
PAGE_KERNEL);
if (!new_pages)
return NULL;
- vmalloced = 1;
}
if (want >= have) {
page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
if (!page) {
bm_free_pages(new_pages + have, i - have);
- bm_vk_free(new_pages, vmalloced);
+ bm_vk_free(new_pages);
return NULL;
}
/* we want to know which page it is
*/
}
- if (vmalloced)
- b->bm_flags |= BM_P_VMALLOCED;
- else
- b->bm_flags &= ~BM_P_VMALLOCED;
-
return new_pages;
}
if (!expect(device->bitmap))
return;
bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
- bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags));
+ bm_vk_free(device->bitmap->bm_pages);
kfree(device->bitmap);
device->bitmap = NULL;
}
* this masks out the remaining bits.
* Returns the number of bits cleared.
*/
+#ifndef BITS_PER_PAGE
#define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3))
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1)
+#else
+# if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3))
+# error "ambiguous BITS_PER_PAGE"
+# endif
+#endif
#define BITS_PER_LONG_MASK (BITS_PER_LONG - 1)
static int bm_clear_surplus(struct drbd_bitmap *b)
{
unsigned long *p_addr;
unsigned long bits = 0;
unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
- int idx, i, last_word;
+ int idx, last_word;
/* all but last page */
for (idx = 0; idx < b->bm_number_of_pages - 1; idx++) {
p_addr = __bm_map_pidx(b, idx);
- for (i = 0; i < LWPP; i++)
- bits += hweight_long(p_addr[i]);
+ bits += bitmap_weight(p_addr, BITS_PER_PAGE);
__bm_unmap(p_addr);
cond_resched();
}
/* last (or only) page */
last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
p_addr = __bm_map_pidx(b, idx);
- for (i = 0; i < last_word; i++)
- bits += hweight_long(p_addr[i]);
+ bits += bitmap_weight(p_addr, last_word * BITS_PER_LONG);
p_addr[last_word] &= cpu_to_lel(mask);
bits += hweight_long(p_addr[last_word]);
/* 32bit arch, may have an unused padding long */
unsigned long want, have, onpages; /* number of pages */
struct page **npages, **opages = NULL;
int err = 0, growing;
- int opages_vmalloced;
if (!expect(b))
return -ENOMEM;
if (capacity == b->bm_dev_capacity)
goto out;
- opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
-
if (capacity == 0) {
spin_lock_irq(&b->bm_lock);
opages = b->bm_pages;
b->bm_dev_capacity = 0;
spin_unlock_irq(&b->bm_lock);
bm_free_pages(opages, onpages);
- bm_vk_free(opages, opages_vmalloced);
+ bm_vk_free(opages);
goto out;
}
bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
spin_unlock_irq(&b->bm_lock);
if (opages != npages)
- bm_vk_free(opages, opages_vmalloced);
+ bm_vk_free(opages);
if (!growing)
b->bm_set = bm_count_bits(b);
drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
int bits;
int changed = 0;
unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
+
+ /* I think it is more cache line friendly to hweight_long then set to ~0UL,
+ * than to first bitmap_weight() all words, then bitmap_fill() all words */
for (i = first_word; i < last_word; i++) {
bits = hweight_long(paddr[i]);
paddr[i] = ~0UL;
int n = e-s;
p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
bm = p_addr + MLPP(s);
- while (n--)
- count += hweight_long(*bm++);
+ count += bitmap_weight(bm, n * BITS_PER_LONG);
bm_unmap(p_addr);
} else {
drbd_err(device, "start offset (%d) too large in drbd_bm_e_weight\n", s);
if (!parent || d_really_is_negative(parent))
goto out;
/* serialize with d_delete() */
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
/* Make sure the object is still alive */
if (simple_positive(file->f_path.dentry)
&& kref_get_unless_zero(kref))
ret = 0;
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!ret) {
ret = single_open(file, show, data);
if (ret)
return 0;
}
+static int device_ed_gen_id_show(struct seq_file *m, void *ignored)
+{
+ struct drbd_device *device = m->private;
+ seq_printf(m, "0x%016llX\n", (unsigned long long)device->ed_uuid);
+ return 0;
+}
+
#define drbd_debugfs_device_attr(name) \
static int device_ ## name ## _open(struct inode *inode, struct file *file) \
{ \
drbd_debugfs_device_attr(act_log_extents)
drbd_debugfs_device_attr(resync_extents)
drbd_debugfs_device_attr(data_gen_id)
+drbd_debugfs_device_attr(ed_gen_id)
void drbd_debugfs_device_add(struct drbd_device *device)
{
DCF(act_log_extents);
DCF(resync_extents);
DCF(data_gen_id);
+ DCF(ed_gen_id);
#undef DCF
return;
drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
+ drbd_debugfs_remove(&device->debugfs_vol_ed_gen_id);
drbd_debugfs_remove(&device->debugfs_vol);
}
extern char usermode_helper[];
-/* I don't remember why XCPU ...
- * This is used to wake the asender,
- * and to interrupt sending the sending task
- * on disconnect.
- */
-#define DRBD_SIG SIGXCPU
-
/* This is used to stop/restart our threads.
* Cannot use SIGTERM nor SIGKILL, since these
* are sent out by init on runlevel changes
extern int drbd_wait_misc(struct drbd_device *, struct drbd_interval *);
+extern void lock_all_resources(void);
+extern void unlock_all_resources(void);
+
struct drbd_request {
struct drbd_work w;
struct drbd_device *device;
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
- SUSPEND_IO, /* suspend application io */
BITMAP_IO, /* suspend application io;
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
/* definition of bits in bm_flags to be used in drbd_bm_lock
* and drbd_bitmap_io and friends. */
enum bm_flag {
- /* do we need to kfree, or vfree bm_pages? */
- BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
-
/* currently locked for bulk operation */
BM_LOCKED_MASK = 0xf,
void (*done)(struct drbd_device *device, int rv);
};
-enum write_ordering_e {
- WO_none,
- WO_drain_io,
- WO_bdev_flush,
-};
-
struct fifo_buffer {
unsigned int head_index;
unsigned int size;
enum {
NET_CONGESTED, /* The data socket is congested */
RESOLVE_CONFLICTS, /* Set on one node, cleared on the peer! */
- SEND_PING, /* whether asender should send a ping asap */
- SIGNAL_ASENDER, /* whether asender wants to be interrupted */
+ SEND_PING,
GOT_PING_ACK, /* set when we receive a ping_ack packet, ping_wait gets woken */
CONN_WD_ST_CHG_REQ, /* A cluster wide state change on the connection is active */
CONN_WD_ST_CHG_OKAY,
DEVICE_WORK_PENDING, /* tell worker that some device has pending work */
};
+enum which_state { NOW, OLD = NOW, NEW };
+
struct drbd_resource {
char *name;
#ifdef CONFIG_DEBUG_FS
unsigned long last_reconnect_jif;
struct drbd_thread receiver;
struct drbd_thread worker;
- struct drbd_thread asender;
+ struct drbd_thread ack_receiver;
+ struct workqueue_struct *ack_sender;
/* cached pointers,
* so we can look up the oldest pending requests more quickly.
struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
struct {
+ unsigned long last_sent_barrier_jif;
+
/* whether this sender thread
* has processed a single write yet. */
bool seen_any_write_yet;
} send;
};
+static inline bool has_net_conf(struct drbd_connection *connection)
+{
+ bool has_net_conf;
+
+ rcu_read_lock();
+ has_net_conf = rcu_dereference(connection->net_conf);
+ rcu_read_unlock();
+
+ return has_net_conf;
+}
+
void __update_timing_details(
struct drbd_thread_timing_details *tdp,
unsigned int *cb_nr,
struct list_head peer_devices;
struct drbd_device *device;
struct drbd_connection *connection;
+ struct work_struct send_acks_work;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_peer_dev;
#endif
struct dentry *debugfs_vol_act_log_extents;
struct dentry *debugfs_vol_resync_extents;
struct dentry *debugfs_vol_data_gen_id;
+ struct dentry *debugfs_vol_ed_gen_id;
#endif
unsigned int vnr; /* volume number within the connection */
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
atomic_t unacked_cnt; /* Need to send replies for */
atomic_t local_cnt; /* Waiting for local completion */
+ atomic_t suspend_cnt;
/* Interval tree of pending local requests */
struct rb_root read_requests;
return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices);
}
+static inline struct drbd_peer_device *
+conn_peer_device(struct drbd_connection *connection, int volume_number)
+{
+ return idr_find(&connection->peer_devices, volume_number);
+}
+
#define for_each_resource(resource, _resources) \
list_for_each_entry(resource, _resources, resources)
extern int drbd_send_bitmap(struct drbd_device *device);
extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
-extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
+extern void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev);
extern void drbd_device_cleanup(struct drbd_device *device);
void drbd_print_uuids(struct drbd_device *device, const char *text);
/* to allocate from that set */
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
-extern rwlock_t global_state_lock;
+extern struct mutex resources_mutex;
extern int conn_lowest_minor(struct drbd_connection *connection);
extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor);
/* drbd_nl.c */
+
+extern struct mutex notification_mutex;
+
extern void drbd_suspend_io(struct drbd_device *device);
extern void drbd_resume_io(struct drbd_device *device);
extern char *ppsize(char *buf, unsigned long long size);
/* drbd_receiver.c */
extern int drbd_receiver(struct drbd_thread *thi);
-extern int drbd_asender(struct drbd_thread *thi);
+extern int drbd_ack_receiver(struct drbd_thread *thi);
+extern void drbd_send_ping_wf(struct work_struct *ws);
+extern void drbd_send_acks_wf(struct work_struct *ws);
extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
bool throttle_if_app_is_waiting);
#define drbd_rs_failed_io(device, sector, size) \
__drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
extern void drbd_al_shrink(struct drbd_device *device);
-extern int drbd_initialize_al(struct drbd_device *, void *);
+extern int drbd_al_initialize(struct drbd_device *, void *);
/* drbd_nl.c */
/* state info broadcast */
};
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib);
+extern void notify_resource_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_resource *,
+ struct resource_info *,
+ enum drbd_notification_type);
+extern void notify_device_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_device *,
+ struct device_info *,
+ enum drbd_notification_type);
+extern void notify_connection_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_connection *,
+ struct connection_info *,
+ enum drbd_notification_type);
+extern void notify_peer_device_state(struct sk_buff *,
+ unsigned int,
+ struct drbd_peer_device *,
+ struct peer_device_info *,
+ enum drbd_notification_type);
+extern void notify_helper(enum drbd_notification_type, struct drbd_device *,
+ struct drbd_connection *, const char *, int);
+
/*
* inline helper functions
*************************/
return 0;
}
-static inline enum drbd_state_rv
-_drbd_set_state(struct drbd_device *device, union drbd_state ns,
- enum chg_state_flags flags, struct completion *done)
-{
- enum drbd_state_rv rv;
-
- read_lock(&global_state_lock);
- rv = __drbd_set_state(device, ns, flags, done);
- read_unlock(&global_state_lock);
-
- return rv;
-}
-
static inline union drbd_state drbd_read_state(struct drbd_device *device)
{
struct drbd_resource *resource = device->resource;
extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
-static inline void wake_asender(struct drbd_connection *connection)
+/* To get the ack_receiver out of the blocking network stack,
+ * so it can change its sk_rcvtimeo from idle- to ping-timeout,
+ * and send a ping, we need to send a signal.
+ * Which signal we send is irrelevant. */
+static inline void wake_ack_receiver(struct drbd_connection *connection)
{
- if (test_bit(SIGNAL_ASENDER, &connection->flags))
- force_sig(DRBD_SIG, connection->asender.task);
+ struct task_struct *task = connection->ack_receiver.task;
+ if (task && get_t_state(&connection->ack_receiver) == RUNNING)
+ force_sig(SIGXCPU, task);
}
static inline void request_ping(struct drbd_connection *connection)
{
set_bit(SEND_PING, &connection->flags);
- wake_asender(connection);
+ wake_ack_receiver(connection);
}
extern void *conn_prepare_command(struct drbd_connection *, struct drbd_socket *);
if (drbd_suspended(device))
return false;
- if (test_bit(SUSPEND_IO, &device->flags))
+ if (atomic_read(&device->suspend_cnt))
return false;
/* to avoid potential deadlock or bitmap corruption,
*/
struct idr drbd_devices;
struct list_head drbd_resources;
+struct mutex resources_mutex;
struct kmem_cache *drbd_request_cache;
struct kmem_cache *drbd_ee_cache; /* peer requests */
/* long elapsed = (long)(jiffies - device->last_received); */
drop_it = connection->meta.socket == sock
- || !connection->asender.task
- || get_t_state(&connection->asender) != RUNNING
+ || !connection->ack_receiver.task
+ || get_t_state(&connection->ack_receiver) != RUNNING
|| connection->cstate < C_WF_REPORT_PARAMS;
if (drop_it)
drbd_update_congested(connection);
}
do {
- /* STRANGE
- * tcp_sendmsg does _not_ use its size parameter at all ?
- *
- * -EAGAIN on timeout, -EINTR on signal.
- */
-/* THINK
- * do we need to block DRBD_SIG if sock == &meta.socket ??
- * otherwise wake_asender() might interrupt some send_*Ack !
- */
rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
if (rv == -EAGAIN) {
if (we_should_drop_the_connection(connection, sock))
drbd_bm_cleanup(device);
}
- drbd_free_ldev(device->ldev);
+ drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL;
clear_bit(AL_SUSPENDED, &device->flags);
if (device->this_bdev)
bdput(device->this_bdev);
- drbd_free_ldev(device->ldev);
+ drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL;
drbd_release_all_peer_reqs(device);
cpumask_copy(resource->cpu_mask, new_cpu_mask);
for_each_connection_rcu(connection, resource) {
connection->receiver.reset_cpu_mask = 1;
- connection->asender.reset_cpu_mask = 1;
+ connection->ack_receiver.reset_cpu_mask = 1;
connection->worker.reset_cpu_mask = 1;
}
}
kref_init(&resource->kref);
idr_init(&resource->devices);
INIT_LIST_HEAD(&resource->connections);
- resource->write_ordering = WO_bdev_flush;
+ resource->write_ordering = WO_BDEV_FLUSH;
list_add_tail_rcu(&resource->resources, &drbd_resources);
mutex_init(&resource->conf_update);
mutex_init(&resource->adm_mutex);
connection->receiver.connection = connection;
drbd_thread_init(resource, &connection->worker, drbd_worker, "worker");
connection->worker.connection = connection;
- drbd_thread_init(resource, &connection->asender, drbd_asender, "asender");
- connection->asender.connection = connection;
+ drbd_thread_init(resource, &connection->ack_receiver, drbd_ack_receiver, "ack_recv");
+ connection->ack_receiver.connection = connection;
kref_init(&connection->kref);
{
/* opencoded create_singlethread_workqueue(),
* to be able to say "drbd%d", ..., minor */
- device->submit.wq = alloc_workqueue("drbd%u_submit",
- WQ_UNBOUND | WQ_MEM_RECLAIM, 1, device->minor);
+ device->submit.wq =
+ alloc_ordered_workqueue("drbd%u_submit", WQ_MEM_RECLAIM, device->minor);
if (!device->submit.wq)
return -ENOMEM;
goto out_idr_remove_from_resource;
}
kref_get(&connection->kref);
+ INIT_WORK(&peer_device->send_acks_work, drbd_send_acks_wf);
}
if (init_submitter(device)) {
drbd_proc = NULL; /* play safe for drbd_cleanup */
idr_init(&drbd_devices);
- rwlock_init(&global_state_lock);
+ mutex_init(&resources_mutex);
INIT_LIST_HEAD(&drbd_resources);
err = drbd_genl_register();
return err;
}
-void drbd_free_ldev(struct drbd_backing_dev *ldev)
-{
- if (ldev == NULL)
- return;
-
- blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
- blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
-
- kfree(ldev->disk_conf);
- kfree(ldev);
-}
-
static void drbd_free_one_sock(struct drbd_socket *ds)
{
struct socket *s;
* and read it. */
bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
bdev->md.md_offset = drbd_md_ss(bdev);
+ /* Even for (flexible or indexed) external meta data,
+ * initially restrict us to the 4k superblock for now.
+ * Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
+ bdev->md.md_size_sect = 8;
if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) {
/* NOTE: can't do normal error processing here as this is
spin_lock_irq(&device->resource->req_lock);
set_bit(BITMAP_IO, &device->flags);
- if (atomic_read(&device->ap_bio_cnt) == 0) {
+ /* don't wait for pending application IO if the caller indicates that
+ * application IO does not conflict anyways. */
+ if (flags == BM_LOCKED_CHANGE_ALLOWED || atomic_read(&device->ap_bio_cnt) == 0) {
if (!test_and_set_bit(BITMAP_IO_QUEUED, &device->flags))
drbd_queue_work(&first_peer_device(device)->connection->sender_work,
&device->bm_io_work.w);
return 0;
}
+void lock_all_resources(void)
+{
+ struct drbd_resource *resource;
+ int __maybe_unused i = 0;
+
+ mutex_lock(&resources_mutex);
+ local_irq_disable();
+ for_each_resource(resource, &drbd_resources)
+ spin_lock_nested(&resource->req_lock, i++);
+}
+
+void unlock_all_resources(void)
+{
+ struct drbd_resource *resource;
+
+ for_each_resource(resource, &drbd_resources)
+ spin_unlock(&resource->req_lock);
+ local_irq_enable();
+ mutex_unlock(&resources_mutex);
+}
+
#ifdef CONFIG_DRBD_FAULT_INJECTION
/* Fault insertion support including random number generator shamelessly
* stolen from kernel/rcutorture.c */
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
+#include "drbd_state_change.h"
#include <asm/unaligned.h>
#include <linux/drbd_limits.h>
#include <linux/kthread.h>
int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
/* .dumpit */
int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_devices_done(struct netlink_callback *cb);
+int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_connections_done(struct netlink_callback *cb);
+int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb);
+int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb);
+int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb);
#include <linux/drbd_genl_api.h>
#include "drbd_nla.h"
#include <linux/genl_magic_func.h>
+static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
+static atomic_t notify_genl_seq = ATOMIC_INIT(2); /* two. */
+
+DEFINE_MUTEX(notification_mutex);
+
/* used blkdev_get_by_path, to claim our meta data device(s) */
static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
sib.sib_reason = SIB_HELPER_PRE;
sib.helper_name = cmd;
drbd_bcast_event(device, &sib);
+ notify_helper(NOTIFY_CALL, device, connection, cmd, 0);
ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
sib.sib_reason = SIB_HELPER_POST;
sib.helper_exit_code = ret;
drbd_bcast_event(device, &sib);
+ notify_helper(NOTIFY_RESPONSE, device, connection, cmd, ret);
if (current == connection->worker.task)
clear_bit(CALLBACK_PENDING, &connection->flags);
drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
/* TODO: conn_bcast_event() ?? */
+ notify_helper(NOTIFY_CALL, NULL, connection, cmd, 0);
ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
if (ret)
usermode_helper, cmd, resource_name,
(ret >> 8) & 0xff, ret);
/* TODO: conn_bcast_event() ?? */
+ notify_helper(NOTIFY_RESPONSE, NULL, connection, cmd, ret);
if (ret < 0) /* Ignore any ERRNOs we got. */
ret = 0;
* and can be long lived.
* This changes an device->flag, is triggered by drbd internals,
* and should be short-lived. */
+/* It needs to be a counter, since multiple threads might
+ independently suspend and resume IO. */
void drbd_suspend_io(struct drbd_device *device)
{
- set_bit(SUSPEND_IO, &device->flags);
+ atomic_inc(&device->suspend_cnt);
if (drbd_suspended(device))
return;
wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
void drbd_resume_io(struct drbd_device *device)
{
- clear_bit(SUSPEND_IO, &device->flags);
- wake_up(&device->misc_wait);
+ if (atomic_dec_and_test(&device->suspend_cnt))
+ wake_up(&device->misc_wait);
}
/**
enum determine_dev_size
drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
{
- sector_t prev_first_sect, prev_size; /* previous meta location */
- sector_t la_size_sect, u_size;
+ struct md_offsets_and_sizes {
+ u64 last_agreed_sect;
+ u64 md_offset;
+ s32 al_offset;
+ s32 bm_offset;
+ u32 md_size_sect;
+
+ u32 al_stripes;
+ u32 al_stripe_size_4k;
+ } prev;
+ sector_t u_size, size;
struct drbd_md *md = &device->ldev->md;
- u32 prev_al_stripe_size_4k;
- u32 prev_al_stripes;
- sector_t size;
char ppb[10];
void *buffer;
int md_moved, la_size_changed;
enum determine_dev_size rv = DS_UNCHANGED;
- /* race:
- * application request passes inc_ap_bio,
- * but then cannot get an AL-reference.
- * this function later may wait on ap_bio_cnt == 0. -> deadlock.
+ /* We may change the on-disk offsets of our meta data below. Lock out
+ * anything that may cause meta data IO, to avoid acting on incomplete
+ * layout changes or scribbling over meta data that is in the process
+ * of being moved.
*
- * to avoid that:
- * Suspend IO right here.
- * still lock the act_log to not trigger ASSERTs there.
- */
+ * Move is not exactly correct, btw, currently we have all our meta
+ * data in core memory, to "move" it we just write it all out, there
+ * are no reads. */
drbd_suspend_io(device);
buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
if (!buffer) {
return DS_ERROR;
}
- /* no wait necessary anymore, actually we could assert that */
- wait_event(device->al_wait, lc_try_lock(device->act_log));
-
- prev_first_sect = drbd_md_first_sector(device->ldev);
- prev_size = device->ldev->md.md_size_sect;
- la_size_sect = device->ldev->md.la_size_sect;
+ /* remember current offset and sizes */
+ prev.last_agreed_sect = md->la_size_sect;
+ prev.md_offset = md->md_offset;
+ prev.al_offset = md->al_offset;
+ prev.bm_offset = md->bm_offset;
+ prev.md_size_sect = md->md_size_sect;
+ prev.al_stripes = md->al_stripes;
+ prev.al_stripe_size_4k = md->al_stripe_size_4k;
if (rs) {
/* rs is non NULL if we should change the AL layout only */
-
- prev_al_stripes = md->al_stripes;
- prev_al_stripe_size_4k = md->al_stripe_size_4k;
-
md->al_stripes = rs->al_stripes;
md->al_stripe_size_4k = rs->al_stripe_size / 4;
md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
rcu_read_unlock();
size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
- if (size < la_size_sect) {
+ if (size < prev.last_agreed_sect) {
if (rs && u_size == 0) {
/* Remove "rs &&" later. This check should always be active, but
right now the receiver expects the permissive behavior */
err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
if (unlikely(err)) {
/* currently there is only one error: ENOMEM! */
- size = drbd_bm_capacity(device)>>1;
+ size = drbd_bm_capacity(device);
if (size == 0) {
drbd_err(device, "OUT OF MEMORY! "
"Could not allocate bitmap!\n");
} else {
drbd_err(device, "BM resizing failed. "
- "Leaving size unchanged at size = %lu KB\n",
- (unsigned long)size);
+ "Leaving size unchanged\n");
}
rv = DS_ERROR;
}
/* racy, see comments above. */
drbd_set_my_capacity(device, size);
- device->ldev->md.la_size_sect = size;
+ md->la_size_sect = size;
drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
(unsigned long long)size>>1);
}
if (rv <= DS_ERROR)
goto err_out;
- la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
+ la_size_changed = (prev.last_agreed_sect != md->la_size_sect);
- md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
- || prev_size != device->ldev->md.md_size_sect;
+ md_moved = prev.md_offset != md->md_offset
+ || prev.md_size_sect != md->md_size_sect;
if (la_size_changed || md_moved || rs) {
u32 prev_flags;
* Clear the timer, to avoid scary "timer expired!" messages,
* "Superblock" is written out at least twice below, anyways. */
del_timer(&device->md_sync_timer);
- drbd_al_shrink(device); /* All extents inactive. */
+ /* We won't change the "al-extents" setting, we just may need
+ * to move the on-disk location of the activity log ringbuffer.
+ * Lock for transaction is good enough, it may well be "dirty"
+ * or even "starving". */
+ wait_event(device->al_wait, lc_try_lock_for_transaction(device->act_log));
+
+ /* mark current on-disk bitmap and activity log as unreliable */
prev_flags = md->flags;
- md->flags &= ~MDF_PRIMARY_IND;
+ md->flags |= MDF_FULL_SYNC | MDF_AL_DISABLED;
drbd_md_write(device, buffer);
+ drbd_al_initialize(device, buffer);
+
drbd_info(device, "Writing the whole bitmap, %s\n",
la_size_changed && md_moved ? "size changed and md moved" :
la_size_changed ? "size changed" : "md moved");
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
"size changed", BM_LOCKED_MASK);
- drbd_initialize_al(device, buffer);
+ /* on-disk bitmap and activity log is authoritative again
+ * (unless there was an IO error meanwhile...) */
md->flags = prev_flags;
drbd_md_write(device, buffer);
md->al_stripes, md->al_stripe_size_4k * 4);
}
- if (size > la_size_sect)
- rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
- if (size < la_size_sect)
+ if (size > prev.last_agreed_sect)
+ rv = prev.last_agreed_sect ? DS_GREW : DS_GREW_FROM_ZERO;
+ if (size < prev.last_agreed_sect)
rv = DS_SHRUNK;
if (0) {
err_out:
- if (rs) {
- md->al_stripes = prev_al_stripes;
- md->al_stripe_size_4k = prev_al_stripe_size_4k;
- md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
-
- drbd_md_set_sector_offsets(device, device->ldev);
- }
+ /* restore previous offset and sizes */
+ md->la_size_sect = prev.last_agreed_sect;
+ md->md_offset = prev.md_offset;
+ md->al_offset = prev.al_offset;
+ md->bm_offset = prev.bm_offset;
+ md->md_size_sect = prev.md_size_sect;
+ md->al_stripes = prev.al_stripes;
+ md->al_stripe_size_4k = prev.al_stripe_size_4k;
+ md->al_size_4k = (u64)prev.al_stripes * prev.al_stripe_size_4k;
}
lc_unlock(device->act_log);
wake_up(&device->al_wait);
lc_destroy(n);
return -EBUSY;
} else {
- if (t)
- lc_destroy(t);
+ lc_destroy(t);
}
drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
return 0;
if (b) {
struct drbd_connection *connection = first_peer_device(device)->connection;
+ blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
+
if (blk_queue_discard(b) &&
(connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
- /* For now, don't allow more than one activity log extent worth of data
- * to be discarded in one go. We may need to rework drbd_al_begin_io()
- * to allow for even larger discard ranges */
- blk_queue_max_discard_sectors(q, DRBD_MAX_DISCARD_SECTORS);
-
+ /* We don't care, stacking below should fix it for the local device.
+ * Whether or not it is a suitable granularity on the remote device
+ * is not our problem, really. If you care, you need to
+ * use devices with similar topology on all peers. */
+ q->limits.discard_granularity = 512;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
- /* REALLY? Is stacking secdiscard "legal"? */
- if (blk_queue_secdiscard(b))
- queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
} else {
blk_queue_max_discard_sectors(q, 0);
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
- queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
+ q->limits.discard_granularity = 0;
}
blk_queue_stack_limits(q, b);
q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
}
}
+ /* To avoid confusion, if this queue does not support discard, clear
+ * max_discard_sectors, which is what lsblk -D reports to the user. */
+ if (!blk_queue_discard(q)) {
+ blk_queue_max_discard_sectors(q, 0);
+ q->limits.discard_granularity = 0;
+ }
}
void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
connection->cstate == C_STANDALONE;
spin_unlock_irq(&connection->resource->req_lock);
if (stop_threads) {
- /* asender is implicitly stopped by receiver
- * in conn_disconnect() */
+ /* ack_receiver thread and ack_sender workqueue are implicitly
+ * stopped by receiver in conn_disconnect() */
drbd_thread_stop(&connection->receiver);
drbd_thread_stop(&connection->worker);
}
goto fail_unlock;
}
- write_lock_irq(&global_state_lock);
+ lock_all_resources();
retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
if (retcode == NO_ERROR) {
rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
drbd_resync_after_changed(device);
}
- write_unlock_irq(&global_state_lock);
+ unlock_all_resources();
if (retcode != NO_ERROR)
goto fail_unlock;
set_bit(MD_NO_FUA, &device->flags);
if (write_ordering_changed(old_disk_conf, new_disk_conf))
- drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
+ drbd_bump_write_ordering(device->resource, NULL, WO_BDEV_FLUSH);
drbd_md_sync(device);
return 0;
}
+static struct block_device *open_backing_dev(struct drbd_device *device,
+ const char *bdev_path, void *claim_ptr, bool do_bd_link)
+{
+ struct block_device *bdev;
+ int err = 0;
+
+ bdev = blkdev_get_by_path(bdev_path,
+ FMODE_READ | FMODE_WRITE | FMODE_EXCL, claim_ptr);
+ if (IS_ERR(bdev)) {
+ drbd_err(device, "open(\"%s\") failed with %ld\n",
+ bdev_path, PTR_ERR(bdev));
+ return bdev;
+ }
+
+ if (!do_bd_link)
+ return bdev;
+
+ err = bd_link_disk_holder(bdev, device->vdisk);
+ if (err) {
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ drbd_err(device, "bd_link_disk_holder(\"%s\", ...) failed with %d\n",
+ bdev_path, err);
+ bdev = ERR_PTR(err);
+ }
+ return bdev;
+}
+
+static int open_backing_devices(struct drbd_device *device,
+ struct disk_conf *new_disk_conf,
+ struct drbd_backing_dev *nbc)
+{
+ struct block_device *bdev;
+
+ bdev = open_backing_dev(device, new_disk_conf->backing_dev, device, true);
+ if (IS_ERR(bdev))
+ return ERR_OPEN_DISK;
+ nbc->backing_bdev = bdev;
+
+ /*
+ * meta_dev_idx >= 0: external fixed size, possibly multiple
+ * drbd sharing one meta device. TODO in that case, paranoia
+ * check that [md_bdev, meta_dev_idx] is not yet used by some
+ * other drbd minor! (if you use drbd.conf + drbdadm, that
+ * should check it for you already; but if you don't, or
+ * someone fooled it, we need to double check here)
+ */
+ bdev = open_backing_dev(device, new_disk_conf->meta_dev,
+ /* claim ptr: device, if claimed exclusively; shared drbd_m_holder,
+ * if potentially shared with other drbd minors */
+ (new_disk_conf->meta_dev_idx < 0) ? (void*)device : (void*)drbd_m_holder,
+ /* avoid double bd_claim_by_disk() for the same (source,target) tuple,
+ * as would happen with internal metadata. */
+ (new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_FLEX_INT &&
+ new_disk_conf->meta_dev_idx != DRBD_MD_INDEX_INTERNAL));
+ if (IS_ERR(bdev))
+ return ERR_OPEN_MD_DISK;
+ nbc->md_bdev = bdev;
+ return NO_ERROR;
+}
+
+static void close_backing_dev(struct drbd_device *device, struct block_device *bdev,
+ bool do_bd_unlink)
+{
+ if (!bdev)
+ return;
+ if (do_bd_unlink)
+ bd_unlink_disk_holder(bdev, device->vdisk);
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+}
+
+void drbd_backing_dev_free(struct drbd_device *device, struct drbd_backing_dev *ldev)
+{
+ if (ldev == NULL)
+ return;
+
+ close_backing_dev(device, ldev->md_bdev, ldev->md_bdev != ldev->backing_bdev);
+ close_backing_dev(device, ldev->backing_bdev, true);
+
+ kfree(ldev->disk_conf);
+ kfree(ldev);
+}
+
int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
sector_t min_md_device_sectors;
struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
struct disk_conf *new_disk_conf = NULL;
- struct block_device *bdev;
struct lru_cache *resync_lru = NULL;
struct fifo_buffer *new_plan = NULL;
union drbd_state ns, os;
device = adm_ctx.device;
mutex_lock(&adm_ctx.resource->adm_mutex);
peer_device = first_peer_device(device);
- connection = peer_device ? peer_device->connection : NULL;
+ connection = peer_device->connection;
conn_reconfig_start(connection);
/* if you want to reconfigure, please tear down first */
goto fail;
}
- write_lock_irq(&global_state_lock);
- retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
- write_unlock_irq(&global_state_lock);
- if (retcode != NO_ERROR)
- goto fail;
-
rcu_read_lock();
nc = rcu_dereference(connection->net_conf);
if (nc) {
}
rcu_read_unlock();
- bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
- if (IS_ERR(bdev)) {
- drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
- PTR_ERR(bdev));
- retcode = ERR_OPEN_DISK;
- goto fail;
- }
- nbc->backing_bdev = bdev;
-
- /*
- * meta_dev_idx >= 0: external fixed size, possibly multiple
- * drbd sharing one meta device. TODO in that case, paranoia
- * check that [md_bdev, meta_dev_idx] is not yet used by some
- * other drbd minor! (if you use drbd.conf + drbdadm, that
- * should check it for you already; but if you don't, or
- * someone fooled it, we need to double check here)
- */
- bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL,
- (new_disk_conf->meta_dev_idx < 0) ?
- (void *)device : (void *)drbd_m_holder);
- if (IS_ERR(bdev)) {
- drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
- PTR_ERR(bdev));
- retcode = ERR_OPEN_MD_DISK;
+ retcode = open_backing_devices(device, new_disk_conf, nbc);
+ if (retcode != NO_ERROR)
goto fail;
- }
- nbc->md_bdev = bdev;
if ((nbc->backing_bdev == nbc->md_bdev) !=
(new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
goto force_diskless_dec;
}
+ lock_all_resources();
+ retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
+ if (retcode != NO_ERROR) {
+ unlock_all_resources();
+ goto force_diskless_dec;
+ }
+
/* Reset the "barriers don't work" bits here, then force meta data to
* be written, to ensure we determine if barriers are supported. */
if (new_disk_conf->md_flushes)
new_disk_conf = NULL;
new_plan = NULL;
- drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
+ drbd_resync_after_changed(device);
+ drbd_bump_write_ordering(device->resource, device->ldev, WO_BDEV_FLUSH);
+ unlock_all_resources();
if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
set_bit(CRASHED_PRIMARY, &device->flags);
fail:
conn_reconfig_done(connection);
if (nbc) {
- if (nbc->backing_bdev)
- blkdev_put(nbc->backing_bdev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL);
- if (nbc->md_bdev)
- blkdev_put(nbc->md_bdev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ close_backing_dev(device, nbc->md_bdev, nbc->md_bdev != nbc->backing_bdev);
+ close_backing_dev(device, nbc->backing_bdev, true);
kfree(nbc);
}
kfree(new_disk_conf);
static int adm_detach(struct drbd_device *device, int force)
{
enum drbd_state_rv retcode;
+ void *buffer;
int ret;
if (force) {
}
drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
- drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
- retcode = drbd_request_state(device, NS(disk, D_FAILED));
- drbd_md_put_buffer(device);
+ buffer = drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
+ if (buffer) {
+ retcode = drbd_request_state(device, NS(disk, D_FAILED));
+ drbd_md_put_buffer(device);
+ } else /* already <= D_FAILED */
+ retcode = SS_NOTHING_TO_DO;
/* D_FAILED will transition to DISKLESS. */
+ drbd_resume_io(device);
ret = wait_event_interruptible(device->misc_wait,
device->state.disk != D_FAILED);
- drbd_resume_io(device);
if ((int)retcode == (int)SS_IS_DISKLESS)
retcode = SS_NOTHING_TO_DO;
if (ret)
return 0;
}
+static void connection_to_info(struct connection_info *info,
+ struct drbd_connection *connection)
+{
+ info->conn_connection_state = connection->cstate;
+ info->conn_role = conn_highest_peer(connection);
+}
+
+static void peer_device_to_info(struct peer_device_info *info,
+ struct drbd_peer_device *peer_device)
+{
+ struct drbd_device *device = peer_device->device;
+
+ info->peer_repl_state =
+ max_t(enum drbd_conns, C_WF_REPORT_PARAMS, device->state.conn);
+ info->peer_disk_state = device->state.pdsk;
+ info->peer_resync_susp_user = device->state.user_isp;
+ info->peer_resync_susp_peer = device->state.peer_isp;
+ info->peer_resync_susp_dependency = device->state.aftr_isp;
+}
+
int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
{
+ struct connection_info connection_info;
+ enum drbd_notification_type flags;
+ unsigned int peer_devices = 0;
struct drbd_config_context adm_ctx;
struct drbd_peer_device *peer_device;
struct net_conf *old_net_conf, *new_net_conf = NULL;
connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
+ idr_for_each_entry(&connection->peer_devices, peer_device, i) {
+ peer_devices++;
+ }
+
+ connection_to_info(&connection_info, connection);
+ flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+ mutex_lock(¬ification_mutex);
+ notify_connection_state(NULL, 0, connection, &connection_info, NOTIFY_CREATE | flags);
+ idr_for_each_entry(&connection->peer_devices, peer_device, i) {
+ struct peer_device_info peer_device_info;
+
+ peer_device_to_info(&peer_device_info, peer_device);
+ flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+ notify_peer_device_state(NULL, 0, peer_device, &peer_device_info, NOTIFY_CREATE | flags);
+ }
+ mutex_unlock(¬ification_mutex);
mutex_unlock(&adm_ctx.resource->conf_update);
rcu_read_lock();
drbd_err(connection,
"unexpected rv2=%d in conn_try_disconnect()\n",
rv2);
+ /* Unlike in DRBD 9, the state engine has generated
+ * NOTIFY_DESTROY events before clearing connection->net_conf. */
}
return rv;
}
mutex_unlock(&device->resource->conf_update);
synchronize_rcu();
kfree(old_disk_conf);
+ new_disk_conf = NULL;
}
ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
fail_ldev:
put_ldev(device);
+ kfree(new_disk_conf);
goto fail;
}
mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
if (test_bit(NEW_CUR_UUID, &device->flags)) {
- drbd_uuid_new_current(device);
+ if (get_ldev_if_state(device, D_ATTACHING)) {
+ drbd_uuid_new_current(device);
+ put_ldev(device);
+ } else {
+ /* This is effectively a multi-stage "forced down".
+ * The NEW_CUR_UUID bit is supposedly only set, if we
+ * lost the replication connection, and are configured
+ * to freeze IO and wait for some fence-peer handler.
+ * So we still don't have a replication connection.
+ * And now we don't have a local disk either. After
+ * resume, we will fail all pending and new IO, because
+ * we don't have any data anymore. Which means we will
+ * eventually be able to terminate all users of this
+ * device, and then take it down. By bumping the
+ * "effective" data uuid, we make sure that you really
+ * need to tear down before you reconfigure, we will
+ * the refuse to re-connect or re-attach (because no
+ * matching real data uuid exists).
+ */
+ u64 val;
+ get_random_bytes(&val, sizeof(u64));
+ drbd_set_ed_uuid(device, val);
+ drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
+ }
clear_bit(NEW_CUR_UUID, &device->flags);
}
drbd_suspend_io(device);
return -EMSGSIZE;
}
+/*
+ * The generic netlink dump callbacks are called outside the genl_lock(), so
+ * they cannot use the simple attribute parsing code which uses global
+ * attribute tables.
+ */
+static struct nlattr *find_cfg_context_attr(const struct nlmsghdr *nlh, int attr)
+{
+ const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
+ const int maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
+ struct nlattr *nla;
+
+ nla = nla_find(nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen),
+ DRBD_NLA_CFG_CONTEXT);
+ if (!nla)
+ return NULL;
+ return drbd_nla_find_nested(maxtype, nla, __nla_type(attr));
+}
+
+static void resource_to_info(struct resource_info *, struct drbd_resource *);
+
+int drbd_adm_dump_resources(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct drbd_genlmsghdr *dh;
+ struct drbd_resource *resource;
+ struct resource_info resource_info;
+ struct resource_statistics resource_statistics;
+ int err;
+
+ rcu_read_lock();
+ if (cb->args[0]) {
+ for_each_resource_rcu(resource, &drbd_resources)
+ if (resource == (struct drbd_resource *)cb->args[0])
+ goto found_resource;
+ err = 0; /* resource was probably deleted */
+ goto out;
+ }
+ resource = list_entry(&drbd_resources,
+ struct drbd_resource, resources);
+
+found_resource:
+ list_for_each_entry_continue_rcu(resource, &drbd_resources, resources) {
+ goto put_result;
+ }
+ err = 0;
+ goto out;
+
+put_result:
+ dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &drbd_genl_family,
+ NLM_F_MULTI, DRBD_ADM_GET_RESOURCES);
+ err = -ENOMEM;
+ if (!dh)
+ goto out;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ err = nla_put_drbd_cfg_context(skb, resource, NULL, NULL);
+ if (err)
+ goto out;
+ err = res_opts_to_skb(skb, &resource->res_opts, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ resource_to_info(&resource_info, resource);
+ err = resource_info_to_skb(skb, &resource_info, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ resource_statistics.res_stat_write_ordering = resource->write_ordering;
+ err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ cb->args[0] = (long)resource;
+ genlmsg_end(skb, dh);
+ err = 0;
+
+out:
+ rcu_read_unlock();
+ if (err)
+ return err;
+ return skb->len;
+}
+
+static void device_to_statistics(struct device_statistics *s,
+ struct drbd_device *device)
+{
+ memset(s, 0, sizeof(*s));
+ s->dev_upper_blocked = !may_inc_ap_bio(device);
+ if (get_ldev(device)) {
+ struct drbd_md *md = &device->ldev->md;
+ u64 *history_uuids = (u64 *)s->history_uuids;
+ struct request_queue *q;
+ int n;
+
+ spin_lock_irq(&md->uuid_lock);
+ s->dev_current_uuid = md->uuid[UI_CURRENT];
+ BUILD_BUG_ON(sizeof(s->history_uuids) < UI_HISTORY_END - UI_HISTORY_START + 1);
+ for (n = 0; n < UI_HISTORY_END - UI_HISTORY_START + 1; n++)
+ history_uuids[n] = md->uuid[UI_HISTORY_START + n];
+ for (; n < HISTORY_UUIDS; n++)
+ history_uuids[n] = 0;
+ s->history_uuids_len = HISTORY_UUIDS;
+ spin_unlock_irq(&md->uuid_lock);
+
+ s->dev_disk_flags = md->flags;
+ q = bdev_get_queue(device->ldev->backing_bdev);
+ s->dev_lower_blocked =
+ bdi_congested(&q->backing_dev_info,
+ (1 << WB_async_congested) |
+ (1 << WB_sync_congested));
+ put_ldev(device);
+ }
+ s->dev_size = drbd_get_capacity(device->this_bdev);
+ s->dev_read = device->read_cnt;
+ s->dev_write = device->writ_cnt;
+ s->dev_al_writes = device->al_writ_cnt;
+ s->dev_bm_writes = device->bm_writ_cnt;
+ s->dev_upper_pending = atomic_read(&device->ap_bio_cnt);
+ s->dev_lower_pending = atomic_read(&device->local_cnt);
+ s->dev_al_suspended = test_bit(AL_SUSPENDED, &device->flags);
+ s->dev_exposed_data_uuid = device->ed_uuid;
+}
+
+static int put_resource_in_arg0(struct netlink_callback *cb, int holder_nr)
+{
+ if (cb->args[0]) {
+ struct drbd_resource *resource =
+ (struct drbd_resource *)cb->args[0];
+ kref_put(&resource->kref, drbd_destroy_resource);
+ }
+
+ return 0;
+}
+
+int drbd_adm_dump_devices_done(struct netlink_callback *cb) {
+ return put_resource_in_arg0(cb, 7);
+}
+
+static void device_to_info(struct device_info *, struct drbd_device *);
+
+int drbd_adm_dump_devices(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *resource_filter;
+ struct drbd_resource *resource;
+ struct drbd_device *uninitialized_var(device);
+ int minor, err, retcode;
+ struct drbd_genlmsghdr *dh;
+ struct device_info device_info;
+ struct device_statistics device_statistics;
+ struct idr *idr_to_search;
+
+ resource = (struct drbd_resource *)cb->args[0];
+ if (!cb->args[0] && !cb->args[1]) {
+ resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+ if (resource_filter) {
+ retcode = ERR_RES_NOT_KNOWN;
+ resource = drbd_find_resource(nla_data(resource_filter));
+ if (!resource)
+ goto put_result;
+ cb->args[0] = (long)resource;
+ }
+ }
+
+ rcu_read_lock();
+ minor = cb->args[1];
+ idr_to_search = resource ? &resource->devices : &drbd_devices;
+ device = idr_get_next(idr_to_search, &minor);
+ if (!device) {
+ err = 0;
+ goto out;
+ }
+ idr_for_each_entry_continue(idr_to_search, device, minor) {
+ retcode = NO_ERROR;
+ goto put_result; /* only one iteration */
+ }
+ err = 0;
+ goto out; /* no more devices */
+
+put_result:
+ dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &drbd_genl_family,
+ NLM_F_MULTI, DRBD_ADM_GET_DEVICES);
+ err = -ENOMEM;
+ if (!dh)
+ goto out;
+ dh->ret_code = retcode;
+ dh->minor = -1U;
+ if (retcode == NO_ERROR) {
+ dh->minor = device->minor;
+ err = nla_put_drbd_cfg_context(skb, device->resource, NULL, device);
+ if (err)
+ goto out;
+ if (get_ldev(device)) {
+ struct disk_conf *disk_conf =
+ rcu_dereference(device->ldev->disk_conf);
+
+ err = disk_conf_to_skb(skb, disk_conf, !capable(CAP_SYS_ADMIN));
+ put_ldev(device);
+ if (err)
+ goto out;
+ }
+ device_to_info(&device_info, device);
+ err = device_info_to_skb(skb, &device_info, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+
+ device_to_statistics(&device_statistics, device);
+ err = device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ cb->args[1] = minor + 1;
+ }
+ genlmsg_end(skb, dh);
+ err = 0;
+
+out:
+ rcu_read_unlock();
+ if (err)
+ return err;
+ return skb->len;
+}
+
+int drbd_adm_dump_connections_done(struct netlink_callback *cb)
+{
+ return put_resource_in_arg0(cb, 6);
+}
+
+enum { SINGLE_RESOURCE, ITERATE_RESOURCES };
+
+int drbd_adm_dump_connections(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *resource_filter;
+ struct drbd_resource *resource = NULL, *next_resource;
+ struct drbd_connection *uninitialized_var(connection);
+ int err = 0, retcode;
+ struct drbd_genlmsghdr *dh;
+ struct connection_info connection_info;
+ struct connection_statistics connection_statistics;
+
+ rcu_read_lock();
+ resource = (struct drbd_resource *)cb->args[0];
+ if (!cb->args[0]) {
+ resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+ if (resource_filter) {
+ retcode = ERR_RES_NOT_KNOWN;
+ resource = drbd_find_resource(nla_data(resource_filter));
+ if (!resource)
+ goto put_result;
+ cb->args[0] = (long)resource;
+ cb->args[1] = SINGLE_RESOURCE;
+ }
+ }
+ if (!resource) {
+ if (list_empty(&drbd_resources))
+ goto out;
+ resource = list_first_entry(&drbd_resources, struct drbd_resource, resources);
+ kref_get(&resource->kref);
+ cb->args[0] = (long)resource;
+ cb->args[1] = ITERATE_RESOURCES;
+ }
+
+ next_resource:
+ rcu_read_unlock();
+ mutex_lock(&resource->conf_update);
+ rcu_read_lock();
+ if (cb->args[2]) {
+ for_each_connection_rcu(connection, resource)
+ if (connection == (struct drbd_connection *)cb->args[2])
+ goto found_connection;
+ /* connection was probably deleted */
+ goto no_more_connections;
+ }
+ connection = list_entry(&resource->connections, struct drbd_connection, connections);
+
+found_connection:
+ list_for_each_entry_continue_rcu(connection, &resource->connections, connections) {
+ if (!has_net_conf(connection))
+ continue;
+ retcode = NO_ERROR;
+ goto put_result; /* only one iteration */
+ }
+
+no_more_connections:
+ if (cb->args[1] == ITERATE_RESOURCES) {
+ for_each_resource_rcu(next_resource, &drbd_resources) {
+ if (next_resource == resource)
+ goto found_resource;
+ }
+ /* resource was probably deleted */
+ }
+ goto out;
+
+found_resource:
+ list_for_each_entry_continue_rcu(next_resource, &drbd_resources, resources) {
+ mutex_unlock(&resource->conf_update);
+ kref_put(&resource->kref, drbd_destroy_resource);
+ resource = next_resource;
+ kref_get(&resource->kref);
+ cb->args[0] = (long)resource;
+ cb->args[2] = 0;
+ goto next_resource;
+ }
+ goto out; /* no more resources */
+
+put_result:
+ dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &drbd_genl_family,
+ NLM_F_MULTI, DRBD_ADM_GET_CONNECTIONS);
+ err = -ENOMEM;
+ if (!dh)
+ goto out;
+ dh->ret_code = retcode;
+ dh->minor = -1U;
+ if (retcode == NO_ERROR) {
+ struct net_conf *net_conf;
+
+ err = nla_put_drbd_cfg_context(skb, resource, connection, NULL);
+ if (err)
+ goto out;
+ net_conf = rcu_dereference(connection->net_conf);
+ if (net_conf) {
+ err = net_conf_to_skb(skb, net_conf, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ }
+ connection_to_info(&connection_info, connection);
+ err = connection_info_to_skb(skb, &connection_info, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
+ err = connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ cb->args[2] = (long)connection;
+ }
+ genlmsg_end(skb, dh);
+ err = 0;
+
+out:
+ rcu_read_unlock();
+ if (resource)
+ mutex_unlock(&resource->conf_update);
+ if (err)
+ return err;
+ return skb->len;
+}
+
+enum mdf_peer_flag {
+ MDF_PEER_CONNECTED = 1 << 0,
+ MDF_PEER_OUTDATED = 1 << 1,
+ MDF_PEER_FENCING = 1 << 2,
+ MDF_PEER_FULL_SYNC = 1 << 3,
+};
+
+static void peer_device_to_statistics(struct peer_device_statistics *s,
+ struct drbd_peer_device *peer_device)
+{
+ struct drbd_device *device = peer_device->device;
+
+ memset(s, 0, sizeof(*s));
+ s->peer_dev_received = device->recv_cnt;
+ s->peer_dev_sent = device->send_cnt;
+ s->peer_dev_pending = atomic_read(&device->ap_pending_cnt) +
+ atomic_read(&device->rs_pending_cnt);
+ s->peer_dev_unacked = atomic_read(&device->unacked_cnt);
+ s->peer_dev_out_of_sync = drbd_bm_total_weight(device) << (BM_BLOCK_SHIFT - 9);
+ s->peer_dev_resync_failed = device->rs_failed << (BM_BLOCK_SHIFT - 9);
+ if (get_ldev(device)) {
+ struct drbd_md *md = &device->ldev->md;
+
+ spin_lock_irq(&md->uuid_lock);
+ s->peer_dev_bitmap_uuid = md->uuid[UI_BITMAP];
+ spin_unlock_irq(&md->uuid_lock);
+ s->peer_dev_flags =
+ (drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND) ?
+ MDF_PEER_CONNECTED : 0) +
+ (drbd_md_test_flag(device->ldev, MDF_CONSISTENT) &&
+ !drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE) ?
+ MDF_PEER_OUTDATED : 0) +
+ /* FIXME: MDF_PEER_FENCING? */
+ (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ?
+ MDF_PEER_FULL_SYNC : 0);
+ put_ldev(device);
+ }
+}
+
+int drbd_adm_dump_peer_devices_done(struct netlink_callback *cb)
+{
+ return put_resource_in_arg0(cb, 9);
+}
+
+int drbd_adm_dump_peer_devices(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nlattr *resource_filter;
+ struct drbd_resource *resource;
+ struct drbd_device *uninitialized_var(device);
+ struct drbd_peer_device *peer_device = NULL;
+ int minor, err, retcode;
+ struct drbd_genlmsghdr *dh;
+ struct idr *idr_to_search;
+
+ resource = (struct drbd_resource *)cb->args[0];
+ if (!cb->args[0] && !cb->args[1]) {
+ resource_filter = find_cfg_context_attr(cb->nlh, T_ctx_resource_name);
+ if (resource_filter) {
+ retcode = ERR_RES_NOT_KNOWN;
+ resource = drbd_find_resource(nla_data(resource_filter));
+ if (!resource)
+ goto put_result;
+ }
+ cb->args[0] = (long)resource;
+ }
+
+ rcu_read_lock();
+ minor = cb->args[1];
+ idr_to_search = resource ? &resource->devices : &drbd_devices;
+ device = idr_find(idr_to_search, minor);
+ if (!device) {
+next_device:
+ minor++;
+ cb->args[2] = 0;
+ device = idr_get_next(idr_to_search, &minor);
+ if (!device) {
+ err = 0;
+ goto out;
+ }
+ }
+ if (cb->args[2]) {
+ for_each_peer_device(peer_device, device)
+ if (peer_device == (struct drbd_peer_device *)cb->args[2])
+ goto found_peer_device;
+ /* peer device was probably deleted */
+ goto next_device;
+ }
+ /* Make peer_device point to the list head (not the first entry). */
+ peer_device = list_entry(&device->peer_devices, struct drbd_peer_device, peer_devices);
+
+found_peer_device:
+ list_for_each_entry_continue_rcu(peer_device, &device->peer_devices, peer_devices) {
+ if (!has_net_conf(peer_device->connection))
+ continue;
+ retcode = NO_ERROR;
+ goto put_result; /* only one iteration */
+ }
+ goto next_device;
+
+put_result:
+ dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, &drbd_genl_family,
+ NLM_F_MULTI, DRBD_ADM_GET_PEER_DEVICES);
+ err = -ENOMEM;
+ if (!dh)
+ goto out;
+ dh->ret_code = retcode;
+ dh->minor = -1U;
+ if (retcode == NO_ERROR) {
+ struct peer_device_info peer_device_info;
+ struct peer_device_statistics peer_device_statistics;
+
+ dh->minor = minor;
+ err = nla_put_drbd_cfg_context(skb, device->resource, peer_device->connection, device);
+ if (err)
+ goto out;
+ peer_device_to_info(&peer_device_info, peer_device);
+ err = peer_device_info_to_skb(skb, &peer_device_info, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ peer_device_to_statistics(&peer_device_statistics, peer_device);
+ err = peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto out;
+ cb->args[1] = minor;
+ cb->args[2] = (long)peer_device;
+ }
+ genlmsg_end(skb, dh);
+ err = 0;
+
+out:
+ rcu_read_unlock();
+ if (err)
+ return err;
+ return skb->len;
+}
/*
* Return the connection of @resource if @resource has exactly one connection.
*/
return NO_ERROR;
}
+static void resource_to_info(struct resource_info *info,
+ struct drbd_resource *resource)
+{
+ info->res_role = conn_highest_role(first_connection(resource));
+ info->res_susp = resource->susp;
+ info->res_susp_nod = resource->susp_nod;
+ info->res_susp_fen = resource->susp_fen;
+}
+
int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
{
+ struct drbd_connection *connection;
struct drbd_config_context adm_ctx;
enum drbd_ret_code retcode;
struct res_opts res_opts;
}
/* not yet safe for genl_family.parallel_ops */
- if (!conn_create(adm_ctx.resource_name, &res_opts))
+ mutex_lock(&resources_mutex);
+ connection = conn_create(adm_ctx.resource_name, &res_opts);
+ mutex_unlock(&resources_mutex);
+
+ if (connection) {
+ struct resource_info resource_info;
+
+ mutex_lock(¬ification_mutex);
+ resource_to_info(&resource_info, connection->resource);
+ notify_resource_state(NULL, 0, connection->resource,
+ &resource_info, NOTIFY_CREATE);
+ mutex_unlock(¬ification_mutex);
+ } else
retcode = ERR_NOMEM;
+
out:
drbd_adm_finish(&adm_ctx, info, retcode);
return 0;
}
+static void device_to_info(struct device_info *info,
+ struct drbd_device *device)
+{
+ info->dev_disk_state = device->state.disk;
+}
+
+
int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
mutex_lock(&adm_ctx.resource->adm_mutex);
retcode = drbd_create_device(&adm_ctx, dh->minor);
+ if (retcode == NO_ERROR) {
+ struct drbd_device *device;
+ struct drbd_peer_device *peer_device;
+ struct device_info info;
+ unsigned int peer_devices = 0;
+ enum drbd_notification_type flags;
+
+ device = minor_to_device(dh->minor);
+ for_each_peer_device(peer_device, device) {
+ if (!has_net_conf(peer_device->connection))
+ continue;
+ peer_devices++;
+ }
+
+ device_to_info(&info, device);
+ mutex_lock(¬ification_mutex);
+ flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+ notify_device_state(NULL, 0, device, &info, NOTIFY_CREATE | flags);
+ for_each_peer_device(peer_device, device) {
+ struct peer_device_info peer_device_info;
+
+ if (!has_net_conf(peer_device->connection))
+ continue;
+ peer_device_to_info(&peer_device_info, peer_device);
+ flags = (peer_devices--) ? NOTIFY_CONTINUES : 0;
+ notify_peer_device_state(NULL, 0, peer_device, &peer_device_info,
+ NOTIFY_CREATE | flags);
+ }
+ mutex_unlock(¬ification_mutex);
+ }
mutex_unlock(&adm_ctx.resource->adm_mutex);
out:
drbd_adm_finish(&adm_ctx, info, retcode);
static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
{
+ struct drbd_peer_device *peer_device;
+
if (device->state.disk == D_DISKLESS &&
/* no need to be device->state.conn == C_STANDALONE &&
* we may want to delete a minor from a live replication group.
*/
device->state.role == R_SECONDARY) {
+ struct drbd_connection *connection =
+ first_connection(device->resource);
+
_drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
CS_VERBOSE + CS_WAIT_COMPLETE);
+
+ /* If the state engine hasn't stopped the sender thread yet, we
+ * need to flush the sender work queue before generating the
+ * DESTROY events here. */
+ if (get_t_state(&connection->worker) == RUNNING)
+ drbd_flush_workqueue(&connection->sender_work);
+
+ mutex_lock(¬ification_mutex);
+ for_each_peer_device(peer_device, device) {
+ if (!has_net_conf(peer_device->connection))
+ continue;
+ notify_peer_device_state(NULL, 0, peer_device, NULL,
+ NOTIFY_DESTROY | NOTIFY_CONTINUES);
+ }
+ notify_device_state(NULL, 0, device, NULL, NOTIFY_DESTROY);
+ mutex_unlock(¬ification_mutex);
+
drbd_delete_device(device);
return NO_ERROR;
} else
if (!idr_is_empty(&resource->devices))
return ERR_RES_IN_USE;
+ /* The state engine has stopped the sender thread, so we don't
+ * need to flush the sender work queue before generating the
+ * DESTROY event here. */
+ mutex_lock(¬ification_mutex);
+ notify_resource_state(NULL, 0, resource, NULL, NOTIFY_DESTROY);
+ mutex_unlock(¬ification_mutex);
+
+ mutex_lock(&resources_mutex);
list_del_rcu(&resource->resources);
+ mutex_unlock(&resources_mutex);
/* Make sure all threads have actually stopped: state handling only
* does drbd_thread_stop_nowait(). */
list_for_each_entry(connection, &resource->connections, connections)
void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
{
- static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
struct sk_buff *msg;
struct drbd_genlmsghdr *d_out;
unsigned seq;
if (nla_put_status_info(msg, device, sib))
goto nla_put_failure;
genlmsg_end(msg, d_out);
- err = drbd_genl_multicast_events(msg, 0);
+ err = drbd_genl_multicast_events(msg, GFP_NOWAIT);
/* msg has been consumed or freed in netlink_broadcast() */
if (err && err != -ESRCH)
goto failed;
"Event seq:%u sib_reason:%u\n",
err, seq, sib->sib_reason);
}
+
+static int nla_put_notification_header(struct sk_buff *msg,
+ enum drbd_notification_type type)
+{
+ struct drbd_notification_header nh = {
+ .nh_type = type,
+ };
+
+ return drbd_notification_header_to_skb(msg, &nh, true);
+}
+
+void notify_resource_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_resource *resource,
+ struct resource_info *resource_info,
+ enum drbd_notification_type type)
+{
+ struct resource_statistics resource_statistics;
+ struct drbd_genlmsghdr *dh;
+ bool multicast = false;
+ int err;
+
+ if (!skb) {
+ seq = atomic_inc_return(¬ify_genl_seq);
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto failed;
+ multicast = true;
+ }
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_RESOURCE_STATE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_drbd_cfg_context(skb, resource, NULL, NULL) ||
+ nla_put_notification_header(skb, type) ||
+ ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+ resource_info_to_skb(skb, resource_info, true)))
+ goto nla_put_failure;
+ resource_statistics.res_stat_write_ordering = resource->write_ordering;
+ err = resource_statistics_to_skb(skb, &resource_statistics, !capable(CAP_SYS_ADMIN));
+ if (err)
+ goto nla_put_failure;
+ genlmsg_end(skb, dh);
+ if (multicast) {
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+failed:
+ drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+void notify_device_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_device *device,
+ struct device_info *device_info,
+ enum drbd_notification_type type)
+{
+ struct device_statistics device_statistics;
+ struct drbd_genlmsghdr *dh;
+ bool multicast = false;
+ int err;
+
+ if (!skb) {
+ seq = atomic_inc_return(¬ify_genl_seq);
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto failed;
+ multicast = true;
+ }
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_DEVICE_STATE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = device->minor;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_drbd_cfg_context(skb, device->resource, NULL, device) ||
+ nla_put_notification_header(skb, type) ||
+ ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+ device_info_to_skb(skb, device_info, true)))
+ goto nla_put_failure;
+ device_to_statistics(&device_statistics, device);
+ device_statistics_to_skb(skb, &device_statistics, !capable(CAP_SYS_ADMIN));
+ genlmsg_end(skb, dh);
+ if (multicast) {
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+failed:
+ drbd_err(device, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+void notify_connection_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_connection *connection,
+ struct connection_info *connection_info,
+ enum drbd_notification_type type)
+{
+ struct connection_statistics connection_statistics;
+ struct drbd_genlmsghdr *dh;
+ bool multicast = false;
+ int err;
+
+ if (!skb) {
+ seq = atomic_inc_return(¬ify_genl_seq);
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto failed;
+ multicast = true;
+ }
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_CONNECTION_STATE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_drbd_cfg_context(skb, connection->resource, connection, NULL) ||
+ nla_put_notification_header(skb, type) ||
+ ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+ connection_info_to_skb(skb, connection_info, true)))
+ goto nla_put_failure;
+ connection_statistics.conn_congested = test_bit(NET_CONGESTED, &connection->flags);
+ connection_statistics_to_skb(skb, &connection_statistics, !capable(CAP_SYS_ADMIN));
+ genlmsg_end(skb, dh);
+ if (multicast) {
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+failed:
+ drbd_err(connection, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+void notify_peer_device_state(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_peer_device *peer_device,
+ struct peer_device_info *peer_device_info,
+ enum drbd_notification_type type)
+{
+ struct peer_device_statistics peer_device_statistics;
+ struct drbd_resource *resource = peer_device->device->resource;
+ struct drbd_genlmsghdr *dh;
+ bool multicast = false;
+ int err;
+
+ if (!skb) {
+ seq = atomic_inc_return(¬ify_genl_seq);
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto failed;
+ multicast = true;
+ }
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_PEER_DEVICE_STATE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_drbd_cfg_context(skb, resource, peer_device->connection, peer_device->device) ||
+ nla_put_notification_header(skb, type) ||
+ ((type & ~NOTIFY_FLAGS) != NOTIFY_DESTROY &&
+ peer_device_info_to_skb(skb, peer_device_info, true)))
+ goto nla_put_failure;
+ peer_device_to_statistics(&peer_device_statistics, peer_device);
+ peer_device_statistics_to_skb(skb, &peer_device_statistics, !capable(CAP_SYS_ADMIN));
+ genlmsg_end(skb, dh);
+ if (multicast) {
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto failed;
+ }
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+failed:
+ drbd_err(peer_device, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+void notify_helper(enum drbd_notification_type type,
+ struct drbd_device *device, struct drbd_connection *connection,
+ const char *name, int status)
+{
+ struct drbd_resource *resource = device ? device->resource : connection->resource;
+ struct drbd_helper_info helper_info;
+ unsigned int seq = atomic_inc_return(¬ify_genl_seq);
+ struct sk_buff *skb = NULL;
+ struct drbd_genlmsghdr *dh;
+ int err;
+
+ strlcpy(helper_info.helper_name, name, sizeof(helper_info.helper_name));
+ helper_info.helper_name_len = min(strlen(name), sizeof(helper_info.helper_name));
+ helper_info.helper_status = status;
+
+ skb = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
+ err = -ENOMEM;
+ if (!skb)
+ goto fail;
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_HELPER);
+ if (!dh)
+ goto fail;
+ dh->minor = device ? device->minor : -1;
+ dh->ret_code = NO_ERROR;
+ mutex_lock(¬ification_mutex);
+ if (nla_put_drbd_cfg_context(skb, resource, connection, device) ||
+ nla_put_notification_header(skb, type) ||
+ drbd_helper_info_to_skb(skb, &helper_info, true))
+ goto unlock_fail;
+ genlmsg_end(skb, dh);
+ err = drbd_genl_multicast_events(skb, GFP_NOWAIT);
+ skb = NULL;
+ /* skb has been consumed or freed in netlink_broadcast() */
+ if (err && err != -ESRCH)
+ goto unlock_fail;
+ mutex_unlock(¬ification_mutex);
+ return;
+
+unlock_fail:
+ mutex_unlock(¬ification_mutex);
+fail:
+ nlmsg_free(skb);
+ drbd_err(resource, "Error %d while broadcasting event. Event seq:%u\n",
+ err, seq);
+}
+
+static void notify_initial_state_done(struct sk_buff *skb, unsigned int seq)
+{
+ struct drbd_genlmsghdr *dh;
+ int err;
+
+ err = -EMSGSIZE;
+ dh = genlmsg_put(skb, 0, seq, &drbd_genl_family, 0, DRBD_INITIAL_STATE_DONE);
+ if (!dh)
+ goto nla_put_failure;
+ dh->minor = -1U;
+ dh->ret_code = NO_ERROR;
+ if (nla_put_notification_header(skb, NOTIFY_EXISTS))
+ goto nla_put_failure;
+ genlmsg_end(skb, dh);
+ return;
+
+nla_put_failure:
+ nlmsg_free(skb);
+ pr_err("Error %d sending event. Event seq:%u\n", err, seq);
+}
+
+static void free_state_changes(struct list_head *list)
+{
+ while (!list_empty(list)) {
+ struct drbd_state_change *state_change =
+ list_first_entry(list, struct drbd_state_change, list);
+ list_del(&state_change->list);
+ forget_state_change(state_change);
+ }
+}
+
+static unsigned int notifications_for_state_change(struct drbd_state_change *state_change)
+{
+ return 1 +
+ state_change->n_connections +
+ state_change->n_devices +
+ state_change->n_devices * state_change->n_connections;
+}
+
+static int get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct drbd_state_change *state_change = (struct drbd_state_change *)cb->args[0];
+ unsigned int seq = cb->args[2];
+ unsigned int n;
+ enum drbd_notification_type flags = 0;
+
+ /* There is no need for taking notification_mutex here: it doesn't
+ matter if the initial state events mix with later state chage
+ events; we can always tell the events apart by the NOTIFY_EXISTS
+ flag. */
+
+ cb->args[5]--;
+ if (cb->args[5] == 1) {
+ notify_initial_state_done(skb, seq);
+ goto out;
+ }
+ n = cb->args[4]++;
+ if (cb->args[4] < cb->args[3])
+ flags |= NOTIFY_CONTINUES;
+ if (n < 1) {
+ notify_resource_state_change(skb, seq, state_change->resource,
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n--;
+ if (n < state_change->n_connections) {
+ notify_connection_state_change(skb, seq, &state_change->connections[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n -= state_change->n_connections;
+ if (n < state_change->n_devices) {
+ notify_device_state_change(skb, seq, &state_change->devices[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+ n -= state_change->n_devices;
+ if (n < state_change->n_devices * state_change->n_connections) {
+ notify_peer_device_state_change(skb, seq, &state_change->peer_devices[n],
+ NOTIFY_EXISTS | flags);
+ goto next;
+ }
+
+next:
+ if (cb->args[4] == cb->args[3]) {
+ struct drbd_state_change *next_state_change =
+ list_entry(state_change->list.next,
+ struct drbd_state_change, list);
+ cb->args[0] = (long)next_state_change;
+ cb->args[3] = notifications_for_state_change(next_state_change);
+ cb->args[4] = 0;
+ }
+out:
+ return skb->len;
+}
+
+int drbd_adm_get_initial_state(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct drbd_resource *resource;
+ LIST_HEAD(head);
+
+ if (cb->args[5] >= 1) {
+ if (cb->args[5] > 1)
+ return get_initial_state(skb, cb);
+ if (cb->args[0]) {
+ struct drbd_state_change *state_change =
+ (struct drbd_state_change *)cb->args[0];
+
+ /* connect list to head */
+ list_add(&head, &state_change->list);
+ free_state_changes(&head);
+ }
+ return 0;
+ }
+
+ cb->args[5] = 2; /* number of iterations */
+ mutex_lock(&resources_mutex);
+ for_each_resource(resource, &drbd_resources) {
+ struct drbd_state_change *state_change;
+
+ state_change = remember_old_state(resource, GFP_KERNEL);
+ if (!state_change) {
+ if (!list_empty(&head))
+ free_state_changes(&head);
+ mutex_unlock(&resources_mutex);
+ return -ENOMEM;
+ }
+ copy_old_to_new_state_change(state_change);
+ list_add_tail(&state_change->list, &head);
+ cb->args[5] += notifications_for_state_change(state_change);
+ }
+ mutex_unlock(&resources_mutex);
+
+ if (!list_empty(&head)) {
+ struct drbd_state_change *state_change =
+ list_entry(head.next, struct drbd_state_change, list);
+ cb->args[0] = (long)state_change;
+ cb->args[3] = notifications_for_state_change(state_change);
+ list_del(&head); /* detach list from head */
+ }
+
+ cb->args[2] = cb->nlh->nlmsg_seq;
+ return get_initial_state(skb, cb);
+}
char wp;
static char write_ordering_chars[] = {
- [WO_none] = 'n',
- [WO_drain_io] = 'd',
- [WO_bdev_flush] = 'f',
+ [WO_NONE] = 'n',
+ [WO_DRAIN_IO] = 'd',
+ [WO_BDEV_FLUSH] = 'f',
};
seq_printf(seq, "version: " REL_VERSION " (api:%d/proto:%d-%d)\n%s\n",
P_AUTH_RESPONSE = 0x11,
P_STATE_CHG_REQ = 0x12,
- /* asender (meta socket */
+ /* (meta socket) */
P_PING = 0x13,
P_PING_ACK = 0x14,
P_RECV_ACK = 0x15, /* Used in protocol B */
}
}
-static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
+static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
{
LIST_HEAD(reclaimed);
struct drbd_peer_request *peer_req, *t;
spin_lock_irq(&device->resource->req_lock);
reclaim_finished_net_peer_reqs(device, &reclaimed);
spin_unlock_irq(&device->resource->req_lock);
-
list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
drbd_free_net_peer_req(device, peer_req);
}
+static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
+{
+ struct drbd_peer_device *peer_device;
+ int vnr;
+
+ rcu_read_lock();
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
+ struct drbd_device *device = peer_device->device;
+ if (!atomic_read(&device->pp_in_use_by_net))
+ continue;
+
+ kref_get(&device->kref);
+ rcu_read_unlock();
+ drbd_reclaim_net_peer_reqs(device);
+ kref_put(&device->kref, drbd_destroy_device);
+ rcu_read_lock();
+ }
+ rcu_read_unlock();
+}
+
/**
* drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
* @device: DRBD device.
if (atomic_read(&device->pp_in_use) < mxb)
page = __drbd_alloc_pages(device, number);
+ /* Try to keep the fast path fast, but occasionally we need
+ * to reclaim the pages we lended to the network stack. */
+ if (page && atomic_read(&device->pp_in_use_by_net) > 512)
+ drbd_reclaim_net_peer_reqs(device);
+
while (page == NULL) {
prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
- drbd_kick_lo_and_reclaim_net(device);
+ drbd_reclaim_net_peer_reqs(device);
if (atomic_read(&device->pp_in_use) < mxb) {
page = __drbd_alloc_pages(device, number);
return 0;
}
- drbd_thread_start(&connection->asender);
+ drbd_thread_start(&connection->ack_receiver);
+ /* opencoded create_singlethread_workqueue(),
+ * to be able to use format string arguments */
+ connection->ack_sender =
+ alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
+ if (!connection->ack_sender) {
+ drbd_err(connection, "Failed to create workqueue ack_sender\n");
+ return 0;
+ }
mutex_lock(&connection->resource->conf_update);
/* The discard_my_data flag is a single-shot modifier to the next
struct drbd_peer_device *peer_device;
int vnr;
- if (connection->resource->write_ordering >= WO_bdev_flush) {
+ if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
rcu_read_lock();
idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
struct drbd_device *device = peer_device->device;
/* would rather check on EOPNOTSUPP, but that is not reliable.
* don't try again for ANY return value != 0
* if (rv == -EOPNOTSUPP) */
- drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
+ drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
}
put_ldev(device);
kref_put(&device->kref, drbd_destroy_device);
dc = rcu_dereference(bdev->disk_conf);
- if (wo == WO_bdev_flush && !dc->disk_flushes)
- wo = WO_drain_io;
- if (wo == WO_drain_io && !dc->disk_drain)
- wo = WO_none;
+ if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
+ wo = WO_DRAIN_IO;
+ if (wo == WO_DRAIN_IO && !dc->disk_drain)
+ wo = WO_NONE;
return wo;
}
enum write_ordering_e pwo;
int vnr;
static char *write_ordering_str[] = {
- [WO_none] = "none",
- [WO_drain_io] = "drain",
- [WO_bdev_flush] = "flush",
+ [WO_NONE] = "none",
+ [WO_DRAIN_IO] = "drain",
+ [WO_BDEV_FLUSH] = "flush",
};
pwo = resource->write_ordering;
- if (wo != WO_bdev_flush)
+ if (wo != WO_BDEV_FLUSH)
wo = min(pwo, wo);
rcu_read_lock();
idr_for_each_entry(&resource->devices, device, vnr) {
rcu_read_unlock();
resource->write_ordering = wo;
- if (pwo != resource->write_ordering || wo == WO_bdev_flush)
+ if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
}
if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
/* wait for all pending IO completions, before we start
* zeroing things out. */
- conn_wait_active_ee_empty(first_peer_device(device)->connection);
+ conn_wait_active_ee_empty(peer_req->peer_device->connection);
/* add it to the active list now,
* so we can find it to present it in debugfs */
peer_req->submit_jif = jiffies;
rcu_read_unlock();
}
-static struct drbd_peer_device *
-conn_peer_device(struct drbd_connection *connection, int volume_number)
-{
- return idr_find(&connection->peer_devices, volume_number);
-}
-
static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
{
int rv;
* Therefore we must send the barrier_ack after the barrier request was
* completed. */
switch (connection->resource->write_ordering) {
- case WO_none:
+ case WO_NONE:
if (rv == FE_RECYCLED)
return 0;
drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
/* Fall through */
- case WO_bdev_flush:
- case WO_drain_io:
+ case WO_BDEV_FLUSH:
+ case WO_DRAIN_IO:
conn_wait_active_ee_empty(connection);
drbd_flush(connection);
}
/*
- * e_end_resync_block() is called in asender context via
+ * e_end_resync_block() is called in ack_sender context via
* drbd_finish_peer_reqs().
*/
static int e_end_resync_block(struct drbd_work *w, int unused)
}
/*
- * e_end_block() is called in asender context via drbd_finish_peer_reqs().
+ * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
*/
static int e_end_block(struct drbd_work *w, int cancel)
{
} else
D_ASSERT(device, drbd_interval_empty(&peer_req->i));
- drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
+ drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
return err;
}
}
rcu_read_lock();
- tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
+ tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
rcu_read_unlock();
if (!tp)
peer_req->w.cb = superseded ? e_send_superseded :
e_send_retry_write;
list_add_tail(&peer_req->w.list, &device->done_ee);
- wake_asender(connection);
+ queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
err = -ENOENT;
goto out;
if (dp_flags & DP_SEND_RECEIVE_ACK) {
/* I really don't like it that the receiver thread
* sends on the msock, but anyways */
- drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
+ drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
}
if (tp) {
os = ns = drbd_read_state(device);
spin_unlock_irq(&device->resource->req_lock);
- /* If some other part of the code (asender thread, timeout)
+ /* If some other part of the code (ack_receiver thread, timeout)
* already decided to close the connection again,
* we must not "re-establish" it here. */
if (os.conn <= C_TEAR_DOWN)
*/
conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
- /* asender does not clean up anything. it must not interfere, either */
- drbd_thread_stop(&connection->asender);
+ /* ack_receiver does not clean up anything. it must not interfere, either */
+ drbd_thread_stop(&connection->ack_receiver);
+ if (connection->ack_sender) {
+ destroy_workqueue(connection->ack_sender);
+ connection->ack_sender = NULL;
+ }
drbd_free_sock(connection);
rcu_read_lock();
return 0;
}
-static int connection_finish_peer_reqs(struct drbd_connection *connection)
+struct meta_sock_cmd {
+ size_t pkt_size;
+ int (*fn)(struct drbd_connection *connection, struct packet_info *);
+};
+
+static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
{
- struct drbd_peer_device *peer_device;
- int vnr, not_empty = 0;
+ long t;
+ struct net_conf *nc;
- do {
- clear_bit(SIGNAL_ASENDER, &connection->flags);
- flush_signals(current);
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ t = ping_timeout ? nc->ping_timeo : nc->ping_int;
+ rcu_read_unlock();
- rcu_read_lock();
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
- kref_get(&device->kref);
- rcu_read_unlock();
- if (drbd_finish_peer_reqs(device)) {
- kref_put(&device->kref, drbd_destroy_device);
- return 1;
- }
- kref_put(&device->kref, drbd_destroy_device);
- rcu_read_lock();
- }
- set_bit(SIGNAL_ASENDER, &connection->flags);
+ t *= HZ;
+ if (ping_timeout)
+ t /= 10;
- spin_lock_irq(&connection->resource->req_lock);
- idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
- struct drbd_device *device = peer_device->device;
- not_empty = !list_empty(&device->done_ee);
- if (not_empty)
- break;
- }
- spin_unlock_irq(&connection->resource->req_lock);
- rcu_read_unlock();
- } while (not_empty);
+ connection->meta.socket->sk->sk_rcvtimeo = t;
+}
- return 0;
+static void set_ping_timeout(struct drbd_connection *connection)
+{
+ set_rcvtimeo(connection, 1);
}
-struct asender_cmd {
- size_t pkt_size;
- int (*fn)(struct drbd_connection *connection, struct packet_info *);
-};
+static void set_idle_timeout(struct drbd_connection *connection)
+{
+ set_rcvtimeo(connection, 0);
+}
-static struct asender_cmd asender_tbl[] = {
+static struct meta_sock_cmd ack_receiver_tbl[] = {
[P_PING] = { 0, got_Ping },
[P_PING_ACK] = { 0, got_PingAck },
[P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
[P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
};
-int drbd_asender(struct drbd_thread *thi)
+int drbd_ack_receiver(struct drbd_thread *thi)
{
struct drbd_connection *connection = thi->connection;
- struct asender_cmd *cmd = NULL;
+ struct meta_sock_cmd *cmd = NULL;
struct packet_info pi;
+ unsigned long pre_recv_jif;
int rv;
void *buf = connection->meta.rbuf;
int received = 0;
unsigned int header_size = drbd_header_size(connection);
int expect = header_size;
bool ping_timeout_active = false;
- struct net_conf *nc;
- int ping_timeo, tcp_cork, ping_int;
struct sched_param param = { .sched_priority = 2 };
rv = sched_setscheduler(current, SCHED_RR, ¶m);
if (rv < 0)
- drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
+ drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
while (get_t_state(thi) == RUNNING) {
drbd_thread_current_set_cpu(thi);
- rcu_read_lock();
- nc = rcu_dereference(connection->net_conf);
- ping_timeo = nc->ping_timeo;
- tcp_cork = nc->tcp_cork;
- ping_int = nc->ping_int;
- rcu_read_unlock();
+ conn_reclaim_net_peer_reqs(connection);
if (test_and_clear_bit(SEND_PING, &connection->flags)) {
if (drbd_send_ping(connection)) {
drbd_err(connection, "drbd_send_ping has failed\n");
goto reconnect;
}
- connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
+ set_ping_timeout(connection);
ping_timeout_active = true;
}
- /* TODO: conditionally cork; it may hurt latency if we cork without
- much to send */
- if (tcp_cork)
- drbd_tcp_cork(connection->meta.socket);
- if (connection_finish_peer_reqs(connection)) {
- drbd_err(connection, "connection_finish_peer_reqs() failed\n");
- goto reconnect;
- }
- /* but unconditionally uncork unless disabled */
- if (tcp_cork)
- drbd_tcp_uncork(connection->meta.socket);
-
- /* short circuit, recv_msg would return EINTR anyways. */
- if (signal_pending(current))
- continue;
-
+ pre_recv_jif = jiffies;
rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
- clear_bit(SIGNAL_ASENDER, &connection->flags);
-
- flush_signals(current);
/* Note:
* -EINTR (on meta) we got a signal
* rv < expected: "woken" by signal during receive
* rv == 0 : "connection shut down by peer"
*/
-received_more:
if (likely(rv > 0)) {
received += rv;
buf += rv;
} else if (rv == -EAGAIN) {
/* If the data socket received something meanwhile,
* that is good enough: peer is still alive. */
- if (time_after(connection->last_received,
- jiffies - connection->meta.socket->sk->sk_rcvtimeo))
+ if (time_after(connection->last_received, pre_recv_jif))
continue;
if (ping_timeout_active) {
drbd_err(connection, "PingAck did not arrive in time.\n");
set_bit(SEND_PING, &connection->flags);
continue;
} else if (rv == -EINTR) {
+ /* maybe drbd_thread_stop(): the while condition will notice.
+ * maybe woken for send_ping: we'll send a ping above,
+ * and change the rcvtimeo */
+ flush_signals(current);
continue;
} else {
drbd_err(connection, "sock_recvmsg returned %d\n", rv);
if (received == expect && cmd == NULL) {
if (decode_header(connection, connection->meta.rbuf, &pi))
goto reconnect;
- cmd = &asender_tbl[pi.cmd];
- if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
+ cmd = &ack_receiver_tbl[pi.cmd];
+ if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
cmdname(pi.cmd), pi.cmd);
goto disconnect;
connection->last_received = jiffies;
- if (cmd == &asender_tbl[P_PING_ACK]) {
- /* restore idle timeout */
- connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
+ if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
+ set_idle_timeout(connection);
ping_timeout_active = false;
}
expect = header_size;
cmd = NULL;
}
- if (test_bit(SEND_PING, &connection->flags))
- continue;
- rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT);
- if (rv > 0)
- goto received_more;
}
if (0) {
disconnect:
conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
}
- clear_bit(SIGNAL_ASENDER, &connection->flags);
- drbd_info(connection, "asender terminated\n");
+ drbd_info(connection, "ack_receiver terminated\n");
return 0;
}
+
+void drbd_send_acks_wf(struct work_struct *ws)
+{
+ struct drbd_peer_device *peer_device =
+ container_of(ws, struct drbd_peer_device, send_acks_work);
+ struct drbd_connection *connection = peer_device->connection;
+ struct drbd_device *device = peer_device->device;
+ struct net_conf *nc;
+ int tcp_cork, err;
+
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ tcp_cork = nc->tcp_cork;
+ rcu_read_unlock();
+
+ if (tcp_cork)
+ drbd_tcp_cork(connection->meta.socket);
+
+ err = drbd_finish_peer_reqs(device);
+ kref_put(&device->kref, drbd_destroy_device);
+ /* get is in drbd_endio_write_sec_final(). That is necessary to keep the
+ struct work_struct send_acks_work alive, which is in the peer_device object */
+
+ if (err) {
+ conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
+ return;
+ }
+
+ if (tcp_cork)
+ drbd_tcp_uncork(connection->meta.socket);
+
+ return;
+}
kref_get(&req->kref); /* wait for the DONE */
if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
- /* potentially already completed in the asender thread */
+ /* potentially already completed in the ack_receiver thread */
if (!(s & RQ_NET_DONE)) {
atomic_add(req->i.size >> 9, &device->ap_in_flight);
set_if_null_req_not_net_done(peer_device, req);
}
- if (s & RQ_NET_PENDING)
+ if (req->rq_state & RQ_NET_PENDING)
set_if_null_req_ack_pending(peer_device, req);
}
return false;
}
+bool drbd_should_do_remote(union drbd_dev_state s)
+{
+ return s.pdsk == D_UP_TO_DATE ||
+ (s.pdsk >= D_INCONSISTENT &&
+ s.conn >= C_WF_BITMAP_T &&
+ s.conn < C_AHEAD);
+ /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
+ That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
+ states. */
+}
+
+static bool drbd_should_send_out_of_sync(union drbd_dev_state s)
+{
+ return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
+ /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
+ since we enter state C_AHEAD only if proto >= 96 */
+}
+
/* returns number of connections (== 1, for drbd 8.4)
* expected to actually write this data,
* which does NOT include those that we are L_AHEAD for. */
* stable storage, and this is a WRITE, we may not even submit
* this bio. */
if (get_ldev(device)) {
- req->pre_submit_jif = jiffies;
if (drbd_insert_fault(device,
rw == WRITE ? DRBD_FAULT_DT_WR
: rw == READ ? DRBD_FAULT_DT_RD
&device->pending_master_completion[rw == WRITE]);
if (req->private_bio) {
/* needs to be marked within the same spinlock */
+ req->pre_submit_jif = jiffies;
list_add_tail(&req->req_pending_local,
&device->pending_completion[rw == WRITE]);
_req_mod(req, TO_BE_SUBMITTED);
return BLK_QC_T_NONE;
}
+static bool net_timeout_reached(struct drbd_request *net_req,
+ struct drbd_connection *connection,
+ unsigned long now, unsigned long ent,
+ unsigned int ko_count, unsigned int timeout)
+{
+ struct drbd_device *device = net_req->device;
+
+ if (!time_after(now, net_req->pre_send_jif + ent))
+ return false;
+
+ if (time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent))
+ return false;
+
+ if (net_req->rq_state & RQ_NET_PENDING) {
+ drbd_warn(device, "Remote failed to finish a request within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
+ jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
+ return true;
+ }
+
+ /* We received an ACK already (or are using protocol A),
+ * but are waiting for the epoch closing barrier ack.
+ * Check if we sent the barrier already. We should not blame the peer
+ * for being unresponsive, if we did not even ask it yet. */
+ if (net_req->epoch == connection->send.current_epoch_nr) {
+ drbd_warn(device,
+ "We did not send a P_BARRIER for %ums > ko-count (%u) * timeout (%u * 0.1s); drbd kernel thread blocked?\n",
+ jiffies_to_msecs(now - net_req->pre_send_jif), ko_count, timeout);
+ return false;
+ }
+
+ /* Worst case: we may have been blocked for whatever reason, then
+ * suddenly are able to send a lot of requests (and epoch separating
+ * barriers) in quick succession.
+ * The timestamp of the net_req may be much too old and not correspond
+ * to the sending time of the relevant unack'ed barrier packet, so
+ * would trigger a spurious timeout. The latest barrier packet may
+ * have a too recent timestamp to trigger the timeout, potentially miss
+ * a timeout. Right now we don't have a place to conveniently store
+ * these timestamps.
+ * But in this particular situation, the application requests are still
+ * completed to upper layers, DRBD should still "feel" responsive.
+ * No need yet to kill this connection, it may still recover.
+ * If not, eventually we will have queued enough into the network for
+ * us to block. From that point of view, the timestamp of the last sent
+ * barrier packet is relevant enough.
+ */
+ if (time_after(now, connection->send.last_sent_barrier_jif + ent)) {
+ drbd_warn(device, "Remote failed to answer a P_BARRIER (sent at %lu jif; now=%lu jif) within %ums > ko-count (%u) * timeout (%u * 0.1s)\n",
+ connection->send.last_sent_barrier_jif, now,
+ jiffies_to_msecs(now - connection->send.last_sent_barrier_jif), ko_count, timeout);
+ return true;
+ }
+ return false;
+}
+
+/* A request is considered timed out, if
+ * - we have some effective timeout from the configuration,
+ * with some state restrictions applied,
+ * - the oldest request is waiting for a response from the network
+ * resp. the local disk,
+ * - the oldest request is in fact older than the effective timeout,
+ * - the connection was established (resp. disk was attached)
+ * for longer than the timeout already.
+ * Note that for 32bit jiffies and very stable connections/disks,
+ * we may have a wrap around, which is catched by
+ * !time_in_range(now, last_..._jif, last_..._jif + timeout).
+ *
+ * Side effect: once per 32bit wrap-around interval, which means every
+ * ~198 days with 250 HZ, we have a window where the timeout would need
+ * to expire twice (worst case) to become effective. Good enough.
+ */
+
void request_timer_fn(unsigned long data)
{
struct drbd_device *device = (struct drbd_device *) data;
unsigned long oldest_submit_jif;
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
unsigned long now;
+ unsigned int ko_count = 0, timeout = 0;
rcu_read_lock();
nc = rcu_dereference(connection->net_conf);
- if (nc && device->state.conn >= C_WF_REPORT_PARAMS)
- ent = nc->timeout * HZ/10 * nc->ko_count;
+ if (nc && device->state.conn >= C_WF_REPORT_PARAMS) {
+ ko_count = nc->ko_count;
+ timeout = nc->timeout;
+ }
if (get_ldev(device)) { /* implicit state.disk >= D_INCONSISTENT */
dt = rcu_dereference(device->ldev->disk_conf)->disk_timeout * HZ / 10;
}
rcu_read_unlock();
+
+ ent = timeout * HZ/10 * ko_count;
et = min_not_zero(dt, ent);
if (!et)
spin_lock_irq(&device->resource->req_lock);
req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
- req_peer = connection->req_not_net_done;
+
/* maybe the oldest request waiting for the peer is in fact still
- * blocking in tcp sendmsg */
- if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
- req_peer = connection->req_next;
+ * blocking in tcp sendmsg. That's ok, though, that's handled via the
+ * socket send timeout, requesting a ping, and bumping ko-count in
+ * we_should_drop_the_connection().
+ */
+
+ /* check the oldest request we did successfully sent,
+ * but which is still waiting for an ACK. */
+ req_peer = connection->req_ack_pending;
+
+ /* if we don't have such request (e.g. protocoll A)
+ * check the oldest requests which is still waiting on its epoch
+ * closing barrier ack. */
+ if (!req_peer)
+ req_peer = connection->req_not_net_done;
/* evaluate the oldest peer request only in one timer! */
if (req_peer && req_peer->device != device)
: req_write ? req_write->pre_submit_jif
: req_read ? req_read->pre_submit_jif : now;
- /* The request is considered timed out, if
- * - we have some effective timeout from the configuration,
- * with above state restrictions applied,
- * - the oldest request is waiting for a response from the network
- * resp. the local disk,
- * - the oldest request is in fact older than the effective timeout,
- * - the connection was established (resp. disk was attached)
- * for longer than the timeout already.
- * Note that for 32bit jiffies and very stable connections/disks,
- * we may have a wrap around, which is catched by
- * !time_in_range(now, last_..._jif, last_..._jif + timeout).
- *
- * Side effect: once per 32bit wrap-around interval, which means every
- * ~198 days with 250 HZ, we have a window where the timeout would need
- * to expire twice (worst case) to become effective. Good enough.
- */
- if (ent && req_peer &&
- time_after(now, req_peer->pre_send_jif + ent) &&
- !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
- drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
+ if (ent && req_peer && net_timeout_reached(req_peer, connection, now, ent, ko_count, timeout))
_conn_request_state(connection, NS(conn, C_TIMEOUT), CS_VERBOSE | CS_HARD);
- }
+
if (dt && oldest_submit_jif != now &&
time_after(now, oldest_submit_jif + dt) &&
!time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
return rv;
}
-static inline bool drbd_should_do_remote(union drbd_dev_state s)
-{
- return s.pdsk == D_UP_TO_DATE ||
- (s.pdsk >= D_INCONSISTENT &&
- s.conn >= C_WF_BITMAP_T &&
- s.conn < C_AHEAD);
- /* Before proto 96 that was >= CONNECTED instead of >= C_WF_BITMAP_T.
- That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
- states. */
-}
-static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
-{
- return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
- /* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
- since we enter state C_AHEAD only if proto >= 96 */
-}
+extern bool drbd_should_do_remote(union drbd_dev_state);
#endif
#include "drbd_int.h"
#include "drbd_protocol.h"
#include "drbd_req.h"
+#include "drbd_state_change.h"
struct after_state_chg_work {
struct drbd_work w;
union drbd_state ns;
enum chg_state_flags flags;
struct completion *done;
+ struct drbd_state_change *state_change;
};
enum sanitize_state_warnings {
IMPLICITLY_UPGRADED_PDSK,
};
+static void count_objects(struct drbd_resource *resource,
+ unsigned int *n_devices,
+ unsigned int *n_connections)
+{
+ struct drbd_device *device;
+ struct drbd_connection *connection;
+ int vnr;
+
+ *n_devices = 0;
+ *n_connections = 0;
+
+ idr_for_each_entry(&resource->devices, device, vnr)
+ (*n_devices)++;
+ for_each_connection(connection, resource)
+ (*n_connections)++;
+}
+
+static struct drbd_state_change *alloc_state_change(unsigned int n_devices, unsigned int n_connections, gfp_t gfp)
+{
+ struct drbd_state_change *state_change;
+ unsigned int size, n;
+
+ size = sizeof(struct drbd_state_change) +
+ n_devices * sizeof(struct drbd_device_state_change) +
+ n_connections * sizeof(struct drbd_connection_state_change) +
+ n_devices * n_connections * sizeof(struct drbd_peer_device_state_change);
+ state_change = kmalloc(size, gfp);
+ if (!state_change)
+ return NULL;
+ state_change->n_devices = n_devices;
+ state_change->n_connections = n_connections;
+ state_change->devices = (void *)(state_change + 1);
+ state_change->connections = (void *)&state_change->devices[n_devices];
+ state_change->peer_devices = (void *)&state_change->connections[n_connections];
+ state_change->resource->resource = NULL;
+ for (n = 0; n < n_devices; n++)
+ state_change->devices[n].device = NULL;
+ for (n = 0; n < n_connections; n++)
+ state_change->connections[n].connection = NULL;
+ return state_change;
+}
+
+struct drbd_state_change *remember_old_state(struct drbd_resource *resource, gfp_t gfp)
+{
+ struct drbd_state_change *state_change;
+ struct drbd_device *device;
+ unsigned int n_devices;
+ struct drbd_connection *connection;
+ unsigned int n_connections;
+ int vnr;
+
+ struct drbd_device_state_change *device_state_change;
+ struct drbd_peer_device_state_change *peer_device_state_change;
+ struct drbd_connection_state_change *connection_state_change;
+
+ /* Caller holds req_lock spinlock.
+ * No state, no device IDR, no connections lists can change. */
+ count_objects(resource, &n_devices, &n_connections);
+ state_change = alloc_state_change(n_devices, n_connections, gfp);
+ if (!state_change)
+ return NULL;
+
+ kref_get(&resource->kref);
+ state_change->resource->resource = resource;
+ state_change->resource->role[OLD] =
+ conn_highest_role(first_connection(resource));
+ state_change->resource->susp[OLD] = resource->susp;
+ state_change->resource->susp_nod[OLD] = resource->susp_nod;
+ state_change->resource->susp_fen[OLD] = resource->susp_fen;
+
+ connection_state_change = state_change->connections;
+ for_each_connection(connection, resource) {
+ kref_get(&connection->kref);
+ connection_state_change->connection = connection;
+ connection_state_change->cstate[OLD] =
+ connection->cstate;
+ connection_state_change->peer_role[OLD] =
+ conn_highest_peer(connection);
+ connection_state_change++;
+ }
+
+ device_state_change = state_change->devices;
+ peer_device_state_change = state_change->peer_devices;
+ idr_for_each_entry(&resource->devices, device, vnr) {
+ kref_get(&device->kref);
+ device_state_change->device = device;
+ device_state_change->disk_state[OLD] = device->state.disk;
+
+ /* The peer_devices for each device have to be enumerated in
+ the order of the connections. We may not use for_each_peer_device() here. */
+ for_each_connection(connection, resource) {
+ struct drbd_peer_device *peer_device;
+
+ peer_device = conn_peer_device(connection, device->vnr);
+ peer_device_state_change->peer_device = peer_device;
+ peer_device_state_change->disk_state[OLD] =
+ device->state.pdsk;
+ peer_device_state_change->repl_state[OLD] =
+ max_t(enum drbd_conns,
+ C_WF_REPORT_PARAMS, device->state.conn);
+ peer_device_state_change->resync_susp_user[OLD] =
+ device->state.user_isp;
+ peer_device_state_change->resync_susp_peer[OLD] =
+ device->state.peer_isp;
+ peer_device_state_change->resync_susp_dependency[OLD] =
+ device->state.aftr_isp;
+ peer_device_state_change++;
+ }
+ device_state_change++;
+ }
+
+ return state_change;
+}
+
+static void remember_new_state(struct drbd_state_change *state_change)
+{
+ struct drbd_resource_state_change *resource_state_change;
+ struct drbd_resource *resource;
+ unsigned int n;
+
+ if (!state_change)
+ return;
+
+ resource_state_change = &state_change->resource[0];
+ resource = resource_state_change->resource;
+
+ resource_state_change->role[NEW] =
+ conn_highest_role(first_connection(resource));
+ resource_state_change->susp[NEW] = resource->susp;
+ resource_state_change->susp_nod[NEW] = resource->susp_nod;
+ resource_state_change->susp_fen[NEW] = resource->susp_fen;
+
+ for (n = 0; n < state_change->n_devices; n++) {
+ struct drbd_device_state_change *device_state_change =
+ &state_change->devices[n];
+ struct drbd_device *device = device_state_change->device;
+
+ device_state_change->disk_state[NEW] = device->state.disk;
+ }
+
+ for (n = 0; n < state_change->n_connections; n++) {
+ struct drbd_connection_state_change *connection_state_change =
+ &state_change->connections[n];
+ struct drbd_connection *connection =
+ connection_state_change->connection;
+
+ connection_state_change->cstate[NEW] = connection->cstate;
+ connection_state_change->peer_role[NEW] =
+ conn_highest_peer(connection);
+ }
+
+ for (n = 0; n < state_change->n_devices * state_change->n_connections; n++) {
+ struct drbd_peer_device_state_change *peer_device_state_change =
+ &state_change->peer_devices[n];
+ struct drbd_device *device =
+ peer_device_state_change->peer_device->device;
+ union drbd_dev_state state = device->state;
+
+ peer_device_state_change->disk_state[NEW] = state.pdsk;
+ peer_device_state_change->repl_state[NEW] =
+ max_t(enum drbd_conns, C_WF_REPORT_PARAMS, state.conn);
+ peer_device_state_change->resync_susp_user[NEW] =
+ state.user_isp;
+ peer_device_state_change->resync_susp_peer[NEW] =
+ state.peer_isp;
+ peer_device_state_change->resync_susp_dependency[NEW] =
+ state.aftr_isp;
+ }
+}
+
+void copy_old_to_new_state_change(struct drbd_state_change *state_change)
+{
+ struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
+ unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
+
+#define OLD_TO_NEW(x) \
+ (x[NEW] = x[OLD])
+
+ OLD_TO_NEW(resource_state_change->role);
+ OLD_TO_NEW(resource_state_change->susp);
+ OLD_TO_NEW(resource_state_change->susp_nod);
+ OLD_TO_NEW(resource_state_change->susp_fen);
+
+ for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
+ struct drbd_connection_state_change *connection_state_change =
+ &state_change->connections[n_connection];
+
+ OLD_TO_NEW(connection_state_change->peer_role);
+ OLD_TO_NEW(connection_state_change->cstate);
+ }
+
+ for (n_device = 0; n_device < state_change->n_devices; n_device++) {
+ struct drbd_device_state_change *device_state_change =
+ &state_change->devices[n_device];
+
+ OLD_TO_NEW(device_state_change->disk_state);
+ }
+
+ n_peer_devices = state_change->n_devices * state_change->n_connections;
+ for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
+ struct drbd_peer_device_state_change *p =
+ &state_change->peer_devices[n_peer_device];
+
+ OLD_TO_NEW(p->disk_state);
+ OLD_TO_NEW(p->repl_state);
+ OLD_TO_NEW(p->resync_susp_user);
+ OLD_TO_NEW(p->resync_susp_peer);
+ OLD_TO_NEW(p->resync_susp_dependency);
+ }
+
+#undef OLD_TO_NEW
+}
+
+void forget_state_change(struct drbd_state_change *state_change)
+{
+ unsigned int n;
+
+ if (!state_change)
+ return;
+
+ if (state_change->resource->resource)
+ kref_put(&state_change->resource->resource->kref, drbd_destroy_resource);
+ for (n = 0; n < state_change->n_devices; n++) {
+ struct drbd_device *device = state_change->devices[n].device;
+
+ if (device)
+ kref_put(&device->kref, drbd_destroy_device);
+ }
+ for (n = 0; n < state_change->n_connections; n++) {
+ struct drbd_connection *connection =
+ state_change->connections[n].connection;
+
+ if (connection)
+ kref_put(&connection->kref, drbd_destroy_connection);
+ }
+ kfree(state_change);
+}
+
static int w_after_state_ch(struct drbd_work *w, int unused);
static void after_state_ch(struct drbd_device *device, union drbd_state os,
- union drbd_state ns, enum chg_state_flags flags);
+ union drbd_state ns, enum chg_state_flags flags,
+ struct drbd_state_change *);
static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
return R_SECONDARY;
return R_UNKNOWN;
}
+
static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
{
if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
drbd_info(device, "Resumed AL updates\n");
}
-/* helper for __drbd_set_state */
+/* helper for _drbd_set_state */
static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
{
if (first_peer_device(device)->connection->agreed_pro_version < 90)
}
/**
- * __drbd_set_state() - Set a new DRBD state
+ * _drbd_set_state() - Set a new DRBD state
* @device: DRBD device.
* @ns: new state.
* @flags: Flags
* @done: Optional completion, that will get completed after the after_state_ch() finished
*
- * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
+ * Caller needs to hold req_lock. Do not call directly.
*/
enum drbd_state_rv
-__drbd_set_state(struct drbd_device *device, union drbd_state ns,
- enum chg_state_flags flags, struct completion *done)
+_drbd_set_state(struct drbd_device *device, union drbd_state ns,
+ enum chg_state_flags flags, struct completion *done)
{
struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
enum drbd_state_rv rv = SS_SUCCESS;
enum sanitize_state_warnings ssw;
struct after_state_chg_work *ascw;
+ struct drbd_state_change *state_change;
os = drbd_read_state(device);
if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
clear_bit(RS_DONE, &device->flags);
+ /* FIXME: Have any flags been set earlier in this function already? */
+ state_change = remember_old_state(device->resource, GFP_ATOMIC);
+
/* changes to local_cnt and device flags should be visible before
* changes to state, which again should be visible before anything else
* depending on that change happens. */
device->resource->susp_fen = ns.susp_fen;
smp_wmb();
+ remember_new_state(state_change);
+
/* put replicated vs not-replicated requests in seperate epochs */
if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
drbd_should_do_remote((union drbd_dev_state)ns.i))
ascw->w.cb = w_after_state_ch;
ascw->device = device;
ascw->done = done;
+ ascw->state_change = state_change;
drbd_queue_work(&connection->sender_work,
&ascw->w);
} else {
container_of(w, struct after_state_chg_work, w);
struct drbd_device *device = ascw->device;
- after_state_ch(device, ascw->os, ascw->ns, ascw->flags);
+ after_state_ch(device, ascw->os, ascw->ns, ascw->flags, ascw->state_change);
+ forget_state_change(ascw->state_change);
if (ascw->flags & CS_WAIT_COMPLETE)
complete(ascw->done);
kfree(ascw);
D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
/* open coded non-blocking drbd_suspend_io(device); */
- set_bit(SUSPEND_IO, &device->flags);
+ atomic_inc(&device->suspend_cnt);
drbd_bm_lock(device, why, flags);
rv = io_fn(device);
return rv;
}
+void notify_resource_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_resource_state_change *resource_state_change,
+ enum drbd_notification_type type)
+{
+ struct drbd_resource *resource = resource_state_change->resource;
+ struct resource_info resource_info = {
+ .res_role = resource_state_change->role[NEW],
+ .res_susp = resource_state_change->susp[NEW],
+ .res_susp_nod = resource_state_change->susp_nod[NEW],
+ .res_susp_fen = resource_state_change->susp_fen[NEW],
+ };
+
+ notify_resource_state(skb, seq, resource, &resource_info, type);
+}
+
+void notify_connection_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_connection_state_change *connection_state_change,
+ enum drbd_notification_type type)
+{
+ struct drbd_connection *connection = connection_state_change->connection;
+ struct connection_info connection_info = {
+ .conn_connection_state = connection_state_change->cstate[NEW],
+ .conn_role = connection_state_change->peer_role[NEW],
+ };
+
+ notify_connection_state(skb, seq, connection, &connection_info, type);
+}
+
+void notify_device_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_device_state_change *device_state_change,
+ enum drbd_notification_type type)
+{
+ struct drbd_device *device = device_state_change->device;
+ struct device_info device_info = {
+ .dev_disk_state = device_state_change->disk_state[NEW],
+ };
+
+ notify_device_state(skb, seq, device, &device_info, type);
+}
+
+void notify_peer_device_state_change(struct sk_buff *skb,
+ unsigned int seq,
+ struct drbd_peer_device_state_change *p,
+ enum drbd_notification_type type)
+{
+ struct drbd_peer_device *peer_device = p->peer_device;
+ struct peer_device_info peer_device_info = {
+ .peer_repl_state = p->repl_state[NEW],
+ .peer_disk_state = p->disk_state[NEW],
+ .peer_resync_susp_user = p->resync_susp_user[NEW],
+ .peer_resync_susp_peer = p->resync_susp_peer[NEW],
+ .peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
+ };
+
+ notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
+}
+
+static void broadcast_state_change(struct drbd_state_change *state_change)
+{
+ struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
+ bool resource_state_has_changed;
+ unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
+ void (*last_func)(struct sk_buff *, unsigned int, void *,
+ enum drbd_notification_type) = NULL;
+ void *uninitialized_var(last_arg);
+
+#define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
+#define FINAL_STATE_CHANGE(type) \
+ ({ if (last_func) \
+ last_func(NULL, 0, last_arg, type); \
+ })
+#define REMEMBER_STATE_CHANGE(func, arg, type) \
+ ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
+ last_func = (typeof(last_func))func; \
+ last_arg = arg; \
+ })
+
+ mutex_lock(¬ification_mutex);
+
+ resource_state_has_changed =
+ HAS_CHANGED(resource_state_change->role) ||
+ HAS_CHANGED(resource_state_change->susp) ||
+ HAS_CHANGED(resource_state_change->susp_nod) ||
+ HAS_CHANGED(resource_state_change->susp_fen);
+
+ if (resource_state_has_changed)
+ REMEMBER_STATE_CHANGE(notify_resource_state_change,
+ resource_state_change, NOTIFY_CHANGE);
+
+ for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
+ struct drbd_connection_state_change *connection_state_change =
+ &state_change->connections[n_connection];
+
+ if (HAS_CHANGED(connection_state_change->peer_role) ||
+ HAS_CHANGED(connection_state_change->cstate))
+ REMEMBER_STATE_CHANGE(notify_connection_state_change,
+ connection_state_change, NOTIFY_CHANGE);
+ }
+
+ for (n_device = 0; n_device < state_change->n_devices; n_device++) {
+ struct drbd_device_state_change *device_state_change =
+ &state_change->devices[n_device];
+
+ if (HAS_CHANGED(device_state_change->disk_state))
+ REMEMBER_STATE_CHANGE(notify_device_state_change,
+ device_state_change, NOTIFY_CHANGE);
+ }
+
+ n_peer_devices = state_change->n_devices * state_change->n_connections;
+ for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
+ struct drbd_peer_device_state_change *p =
+ &state_change->peer_devices[n_peer_device];
+
+ if (HAS_CHANGED(p->disk_state) ||
+ HAS_CHANGED(p->repl_state) ||
+ HAS_CHANGED(p->resync_susp_user) ||
+ HAS_CHANGED(p->resync_susp_peer) ||
+ HAS_CHANGED(p->resync_susp_dependency))
+ REMEMBER_STATE_CHANGE(notify_peer_device_state_change,
+ p, NOTIFY_CHANGE);
+ }
+
+ FINAL_STATE_CHANGE(NOTIFY_CHANGE);
+ mutex_unlock(¬ification_mutex);
+
+#undef HAS_CHANGED
+#undef FINAL_STATE_CHANGE
+#undef REMEMBER_STATE_CHANGE
+}
+
/**
* after_state_ch() - Perform after state change actions that may sleep
* @device: DRBD device.
* @flags: Flags
*/
static void after_state_ch(struct drbd_device *device, union drbd_state os,
- union drbd_state ns, enum chg_state_flags flags)
+ union drbd_state ns, enum chg_state_flags flags,
+ struct drbd_state_change *state_change)
{
struct drbd_resource *resource = device->resource;
struct drbd_peer_device *peer_device = first_peer_device(device);
struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
struct sib_info sib;
+ broadcast_state_change(state_change);
+
sib.sib_reason = SIB_STATE_CHANGE;
sib.os = os;
sib.ns = ns;
}
if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
- if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
+ if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
drbd_uuid_new_current(device);
drbd_send_uuids(peer_device);
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh = EP_PASS_ON;
int was_io_error = 0;
- /* corresponding get_ldev was in __drbd_set_state, to serialize
+ /* corresponding get_ldev was in _drbd_set_state, to serialize
* our cleanup here with the transition to D_DISKLESS.
* But is is still not save to dreference ldev here, since
* we might come from an failed Attach before ldev was set. */
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags);
+ /* Intentionally call this handler first, before drbd_send_state().
+ * See: 2932204 drbd: call local-io-error handler early
+ * People may chose to hard-reset the box from this handler.
+ * It is useful if this looks like a "regular node crash". */
if (was_io_error && eh == EP_CALL_HELPER)
drbd_khelper(device, "local-io-error");
union drbd_state ns_max; /* new, max state, over all devices */
enum chg_state_flags flags;
struct drbd_connection *connection;
+ struct drbd_state_change *state_change;
};
static int w_after_conn_state_ch(struct drbd_work *w, int unused)
struct drbd_peer_device *peer_device;
int vnr;
+ broadcast_state_change(acscw->state_change);
+ forget_state_change(acscw->state_change);
kfree(acscw);
/* Upon network configuration, we need to start the receiver */
if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
struct net_conf *old_conf;
+ mutex_lock(¬ification_mutex);
+ idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
+ notify_peer_device_state(NULL, 0, peer_device, NULL,
+ NOTIFY_DESTROY | NOTIFY_CONTINUES);
+ notify_connection_state(NULL, 0, connection, NULL, NOTIFY_DESTROY);
+ mutex_unlock(¬ification_mutex);
+
mutex_lock(&connection->resource->conf_update);
old_conf = connection->net_conf;
connection->my_addr_len = 0;
if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
ns.disk = os.disk;
- rv = __drbd_set_state(device, ns, flags, NULL);
+ rv = _drbd_set_state(device, ns, flags, NULL);
if (rv < SS_SUCCESS)
BUG();
enum drbd_conns oc = connection->cstate;
union drbd_state ns_max, ns_min, os;
bool have_mutex = false;
+ struct drbd_state_change *state_change;
if (mask.conn) {
rv = is_valid_conn_transition(oc, val.conn);
goto abort;
}
+ state_change = remember_old_state(connection->resource, GFP_ATOMIC);
conn_old_common_state(connection, &os, &flags);
flags |= CS_DC_SUSP;
conn_set_state(connection, mask, val, &ns_min, &ns_max, flags);
conn_pr_state_change(connection, os, ns_max, flags);
+ remember_new_state(state_change);
acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
if (acscw) {
acscw->w.cb = w_after_conn_state_ch;
kref_get(&connection->kref);
acscw->connection = connection;
+ acscw->state_change = state_change;
drbd_queue_work(&connection->sender_work, &acscw->w);
} else {
drbd_err(connection, "Could not kmalloc an acscw\n");
_drbd_request_state_holding_state_mutex(struct drbd_device *, union drbd_state,
union drbd_state, enum chg_state_flags);
-extern enum drbd_state_rv __drbd_set_state(struct drbd_device *, union drbd_state,
- enum chg_state_flags,
- struct completion *done);
+extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state,
+ enum chg_state_flags,
+ struct completion *done);
extern void print_st_err(struct drbd_device *, union drbd_state,
union drbd_state, int);
--- /dev/null
+#ifndef DRBD_STATE_CHANGE_H
+#define DRBD_STATE_CHANGE_H
+
+struct drbd_resource_state_change {
+ struct drbd_resource *resource;
+ enum drbd_role role[2];
+ bool susp[2];
+ bool susp_nod[2];
+ bool susp_fen[2];
+};
+
+struct drbd_device_state_change {
+ struct drbd_device *device;
+ enum drbd_disk_state disk_state[2];
+};
+
+struct drbd_connection_state_change {
+ struct drbd_connection *connection;
+ enum drbd_conns cstate[2]; /* drbd9: enum drbd_conn_state */
+ enum drbd_role peer_role[2];
+};
+
+struct drbd_peer_device_state_change {
+ struct drbd_peer_device *peer_device;
+ enum drbd_disk_state disk_state[2];
+ enum drbd_conns repl_state[2]; /* drbd9: enum drbd_repl_state */
+ bool resync_susp_user[2];
+ bool resync_susp_peer[2];
+ bool resync_susp_dependency[2];
+};
+
+struct drbd_state_change {
+ struct list_head list;
+ unsigned int n_devices;
+ unsigned int n_connections;
+ struct drbd_resource_state_change resource[1];
+ struct drbd_device_state_change *devices;
+ struct drbd_connection_state_change *connections;
+ struct drbd_peer_device_state_change *peer_devices;
+};
+
+extern struct drbd_state_change *remember_old_state(struct drbd_resource *, gfp_t);
+extern void copy_old_to_new_state_change(struct drbd_state_change *);
+extern void forget_state_change(struct drbd_state_change *);
+
+extern void notify_resource_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_resource_state_change *,
+ enum drbd_notification_type type);
+extern void notify_connection_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_connection_state_change *,
+ enum drbd_notification_type type);
+extern void notify_device_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_device_state_change *,
+ enum drbd_notification_type type);
+extern void notify_peer_device_state_change(struct sk_buff *,
+ unsigned int,
+ struct drbd_peer_device_state_change *,
+ enum drbd_notification_type type);
+
+#endif /* DRBD_STATE_CHANGE_H */
*
*/
-
-/* About the global_state_lock
- Each state transition on an device holds a read lock. In case we have
- to evaluate the resync after dependencies, we grab a write lock, because
- we need stable states on all devices for that. */
-rwlock_t global_state_lock;
-
/* used for synchronous meta data and bitmap IO
* submitted by drbd_md_sync_page_io()
*/
unsigned long flags = 0;
struct drbd_peer_device *peer_device = peer_req->peer_device;
struct drbd_device *device = peer_device->device;
+ struct drbd_connection *connection = peer_device->connection;
struct drbd_interval i;
int do_wake;
u64 block_id;
* ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
if (peer_req->flags & EE_WAS_ERROR)
__drbd_chk_io_error(device, DRBD_WRITE_ERROR);
+
+ if (connection->cstate >= C_WF_REPORT_PARAMS) {
+ kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
+ if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
+ kref_put(&device->kref, drbd_destroy_device);
+ }
spin_unlock_irqrestore(&device->resource->req_lock, flags);
if (block_id == ID_SYNCER)
if (do_al_complete_io)
drbd_al_complete_io(device, &i);
- wake_asender(peer_device->connection);
put_ldev(device);
}
}
}
+void drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
+{
+ panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
+ device->minor, device->resource->name, device->vnr);
+}
+
/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
*/
void drbd_request_endio(struct bio *bio)
drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
if (!bio->bi_error)
- panic("possible random memory corruption caused by delayed completion of aborted local request\n");
+ drbd_panic_after_delayed_completion_of_aborted_request(device);
}
/* to avoid recursion in __req_mod */
p->barrier = connection->send.current_epoch_nr;
p->pad = 0;
connection->send.current_epoch_writes = 0;
+ connection->send.last_sent_barrier_jif = jiffies;
return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
}
connection->send.seen_any_write_yet = true;
connection->send.current_epoch_nr = epoch;
connection->send.current_epoch_writes = 0;
+ connection->send.last_sent_barrier_jif = jiffies;
}
}
}
/**
- * _drbd_pause_after() - Pause resync on all devices that may not resync now
+ * drbd_pause_after() - Pause resync on all devices that may not resync now
* @device: DRBD device.
*
* Called from process context only (admin command and after_state_ch).
*/
-static int _drbd_pause_after(struct drbd_device *device)
+static bool drbd_pause_after(struct drbd_device *device)
{
+ bool changed = false;
struct drbd_device *odev;
- int i, rv = 0;
+ int i;
rcu_read_lock();
idr_for_each_entry(&drbd_devices, odev, i) {
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
continue;
- if (!_drbd_may_sync_now(odev))
- rv |= (__drbd_set_state(_NS(odev, aftr_isp, 1), CS_HARD, NULL)
- != SS_NOTHING_TO_DO);
+ if (!_drbd_may_sync_now(odev) &&
+ _drbd_set_state(_NS(odev, aftr_isp, 1),
+ CS_HARD, NULL) != SS_NOTHING_TO_DO)
+ changed = true;
}
rcu_read_unlock();
- return rv;
+ return changed;
}
/**
- * _drbd_resume_next() - Resume resync on all devices that may resync now
+ * drbd_resume_next() - Resume resync on all devices that may resync now
* @device: DRBD device.
*
* Called from process context only (admin command and worker).
*/
-static int _drbd_resume_next(struct drbd_device *device)
+static bool drbd_resume_next(struct drbd_device *device)
{
+ bool changed = false;
struct drbd_device *odev;
- int i, rv = 0;
+ int i;
rcu_read_lock();
idr_for_each_entry(&drbd_devices, odev, i) {
if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
continue;
if (odev->state.aftr_isp) {
- if (_drbd_may_sync_now(odev))
- rv |= (__drbd_set_state(_NS(odev, aftr_isp, 0),
- CS_HARD, NULL)
- != SS_NOTHING_TO_DO) ;
+ if (_drbd_may_sync_now(odev) &&
+ _drbd_set_state(_NS(odev, aftr_isp, 0),
+ CS_HARD, NULL) != SS_NOTHING_TO_DO)
+ changed = true;
}
}
rcu_read_unlock();
- return rv;
+ return changed;
}
void resume_next_sg(struct drbd_device *device)
{
- write_lock_irq(&global_state_lock);
- _drbd_resume_next(device);
- write_unlock_irq(&global_state_lock);
+ lock_all_resources();
+ drbd_resume_next(device);
+ unlock_all_resources();
}
void suspend_other_sg(struct drbd_device *device)
{
- write_lock_irq(&global_state_lock);
- _drbd_pause_after(device);
- write_unlock_irq(&global_state_lock);
+ lock_all_resources();
+ drbd_pause_after(device);
+ unlock_all_resources();
}
-/* caller must hold global_state_lock */
+/* caller must lock_all_resources() */
enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
{
struct drbd_device *odev;
}
}
-/* caller must hold global_state_lock */
+/* caller must lock_all_resources() */
void drbd_resync_after_changed(struct drbd_device *device)
{
- int changes;
+ int changed;
do {
- changes = _drbd_pause_after(device);
- changes |= _drbd_resume_next(device);
- } while (changes);
+ changed = drbd_pause_after(device);
+ changed |= drbd_resume_next(device);
+ } while (changed);
}
void drbd_rs_controller_reset(struct drbd_device *device)
} else {
mutex_lock(device->state_mutex);
}
- clear_bit(B_RS_H_DONE, &device->flags);
- /* req_lock: serialize with drbd_send_and_submit() and others
- * global_state_lock: for stable sync-after dependencies */
- spin_lock_irq(&device->resource->req_lock);
- write_lock(&global_state_lock);
+ lock_all_resources();
+ clear_bit(B_RS_H_DONE, &device->flags);
/* Did some connection breakage or IO error race with us? */
if (device->state.conn < C_CONNECTED
|| !get_ldev_if_state(device, D_NEGOTIATING)) {
- write_unlock(&global_state_lock);
- spin_unlock_irq(&device->resource->req_lock);
- mutex_unlock(device->state_mutex);
- return;
+ unlock_all_resources();
+ goto out;
}
ns = drbd_read_state(device);
else /* side == C_SYNC_SOURCE */
ns.pdsk = D_INCONSISTENT;
- r = __drbd_set_state(device, ns, CS_VERBOSE, NULL);
+ r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
ns = drbd_read_state(device);
if (ns.conn < C_CONNECTED)
device->rs_mark_left[i] = tw;
device->rs_mark_time[i] = now;
}
- _drbd_pause_after(device);
+ drbd_pause_after(device);
/* Forget potentially stale cached per resync extent bit-counts.
* Open coded drbd_rs_cancel_all(device), we already have IRQs
* disabled, and know the disk state is ok. */
device->resync_wenr = LC_FREE;
spin_unlock(&device->al_lock);
}
- write_unlock(&global_state_lock);
- spin_unlock_irq(&device->resource->req_lock);
+ unlock_all_resources();
if (r == SS_SUCCESS) {
wake_up(&device->al_wait); /* for lc_reset() above */
drbd_md_sync(device);
}
put_ldev(device);
+out:
mutex_unlock(device->state_mutex);
}
device->act_log = NULL;
__acquire(local);
- drbd_free_ldev(device->ldev);
+ drbd_backing_dev_free(device, device->ldev);
device->ldev = NULL;
__release(local);
/* Device instance number, incremented each time a device is probed. */
static int instance;
-struct list_head online_list;
-struct list_head removing_list;
-spinlock_t dev_lock;
+static struct list_head online_list;
+static struct list_head removing_list;
+static spinlock_t dev_lock;
/*
* Global variable used to hold the major block device number
static void null_lnvm_end_io(struct request *rq, int error)
{
struct nvm_rq *rqd = rq->end_io_data;
- struct nvm_dev *dev = rqd->dev;
- dev->mt->end_io(rqd, error);
+ nvm_end_io(rqd, error);
blk_put_request(rq);
}
id->ppaf.ch_offset = 56;
id->ppaf.ch_len = 8;
- do_div(size, bs); /* convert size to pages */
- do_div(size, 256); /* concert size to pgs pr blk */
+ sector_div(size, bs); /* convert size to pages */
+ size >>= 8; /* concert size to pgs pr blk */
grp = &id->groups[0];
grp->mtype = 0;
grp->fmtype = 0;
grp->num_ch = 1;
grp->num_pg = 256;
blksize = size;
- do_div(size, (1 << 16));
+ size >>= 16;
grp->num_lun = size + 1;
- do_div(blksize, grp->num_lun);
+ sector_div(blksize, grp->num_lun);
grp->num_blk = blksize;
grp->num_pln = 1;
out_err:
rbd_dev_unparent(rbd_dev);
- if (parent)
- rbd_dev_destroy(parent);
+ rbd_dev_destroy(parent);
return ret;
}
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/delay.h>
-#include <linux/time.h>
+#include <linux/ktime.h>
#include <linux/hdreg.h>
#include <linux/dma-mapping.h>
#include <linux/completion.h>
static unsigned int carm_fill_sync_time(struct carm_host *host,
unsigned int idx, void *mem)
{
- struct timeval tv;
struct carm_msg_sync_time *st = mem;
- do_gettimeofday(&tv);
+ time64_t tv = ktime_get_real_seconds();
memset(st, 0, sizeof(*st));
st->type = CARM_MSG_MISC;
st->subtype = MISC_SET_TIME;
st->handle = cpu_to_le32(TAG_ENCODE(idx));
- st->timestamp = cpu_to_le32(tv.tv_sec);
+ st->timestamp = cpu_to_le32(tv);
return sizeof(struct carm_msg_sync_time);
}
MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
+/*
+ * Maximum number of rings/queues blkback supports, allow as many queues as there
+ * are CPUs if user has not specified a value.
+ */
+unsigned int xenblk_max_queues;
+module_param_named(max_queues, xenblk_max_queues, uint, 0644);
+MODULE_PARM_DESC(max_queues,
+ "Maximum number of hardware queues per virtual disk." \
+ "By default it is the number of online CPUs.");
+
/*
* Maximum order of pages to be used for the shared ring between front and
* backend, 4KB page granularity is used.
/* Number of free pages to remove on each call to gnttab_free_pages */
#define NUM_BATCH_FREE_PAGES 10
-static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
+static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
{
unsigned long flags;
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
- if (list_empty(&blkif->free_pages)) {
- BUG_ON(blkif->free_pages_num != 0);
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
+ if (list_empty(&ring->free_pages)) {
+ BUG_ON(ring->free_pages_num != 0);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
return gnttab_alloc_pages(1, page);
}
- BUG_ON(blkif->free_pages_num == 0);
- page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+ BUG_ON(ring->free_pages_num == 0);
+ page[0] = list_first_entry(&ring->free_pages, struct page, lru);
list_del(&page[0]->lru);
- blkif->free_pages_num--;
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ ring->free_pages_num--;
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
return 0;
}
-static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
int num)
{
unsigned long flags;
int i;
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
for (i = 0; i < num; i++)
- list_add(&page[i]->lru, &blkif->free_pages);
- blkif->free_pages_num += num;
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ list_add(&page[i]->lru, &ring->free_pages);
+ ring->free_pages_num += num;
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
}
-static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
+static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
{
/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
struct page *page[NUM_BATCH_FREE_PAGES];
unsigned int num_pages = 0;
unsigned long flags;
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
- while (blkif->free_pages_num > num) {
- BUG_ON(list_empty(&blkif->free_pages));
- page[num_pages] = list_first_entry(&blkif->free_pages,
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
+ while (ring->free_pages_num > num) {
+ BUG_ON(list_empty(&ring->free_pages));
+ page[num_pages] = list_first_entry(&ring->free_pages,
struct page, lru);
list_del(&page[num_pages]->lru);
- blkif->free_pages_num--;
+ ring->free_pages_num--;
if (++num_pages == NUM_BATCH_FREE_PAGES) {
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
gnttab_free_pages(num_pages, page);
- spin_lock_irqsave(&blkif->free_pages_lock, flags);
+ spin_lock_irqsave(&ring->free_pages_lock, flags);
num_pages = 0;
}
}
- spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+ spin_unlock_irqrestore(&ring->free_pages_lock, flags);
if (num_pages != 0)
gnttab_free_pages(num_pages, page);
}
#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
-static int do_block_io_op(struct xen_blkif *blkif);
-static int dispatch_rw_block_io(struct xen_blkif *blkif,
+static int do_block_io_op(struct xen_blkif_ring *ring);
+static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req);
-static void make_response(struct xen_blkif *blkif, u64 id,
+static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st);
#define foreach_grant_safe(pos, n, rbtree, node) \
/*
* We don't need locking around the persistent grant helpers
- * because blkback uses a single-thread for each backed, so we
+ * because blkback uses a single-thread for each backend, so we
* can be sure that this functions will never be called recursively.
*
* The only exception to that is put_persistent_grant, that can be called
* bit operations to modify the flags of a persistent grant and to count
* the number of used grants.
*/
-static int add_persistent_gnt(struct xen_blkif *blkif,
+static int add_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
struct rb_node **new = NULL, *parent = NULL;
struct persistent_gnt *this;
+ struct xen_blkif *blkif = ring->blkif;
- if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+ if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
if (!blkif->vbd.overflow_max_grants)
blkif->vbd.overflow_max_grants = 1;
return -EBUSY;
}
/* Figure out where to put new node */
- new = &blkif->persistent_gnts.rb_node;
+ new = &ring->persistent_gnts.rb_node;
while (*new) {
this = container_of(*new, struct persistent_gnt, node);
set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
/* Add new node and rebalance tree. */
rb_link_node(&(persistent_gnt->node), parent, new);
- rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
- blkif->persistent_gnt_c++;
- atomic_inc(&blkif->persistent_gnt_in_use);
+ rb_insert_color(&(persistent_gnt->node), &ring->persistent_gnts);
+ ring->persistent_gnt_c++;
+ atomic_inc(&ring->persistent_gnt_in_use);
return 0;
}
-static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif_ring *ring,
grant_ref_t gref)
{
struct persistent_gnt *data;
struct rb_node *node = NULL;
- node = blkif->persistent_gnts.rb_node;
+ node = ring->persistent_gnts.rb_node;
while (node) {
data = container_of(node, struct persistent_gnt, node);
return NULL;
}
set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
- atomic_inc(&blkif->persistent_gnt_in_use);
+ atomic_inc(&ring->persistent_gnt_in_use);
return data;
}
}
return NULL;
}
-static void put_persistent_gnt(struct xen_blkif *blkif,
+static void put_persistent_gnt(struct xen_blkif_ring *ring,
struct persistent_gnt *persistent_gnt)
{
if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
pr_alert_ratelimited("freeing a grant already unused\n");
set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
- atomic_dec(&blkif->persistent_gnt_in_use);
+ atomic_dec(&ring->persistent_gnt_in_use);
}
-static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+static void free_persistent_gnts(struct xen_blkif_ring *ring, struct rb_root *root,
unsigned int num)
{
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
segs_to_unmap = 0;
}
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct persistent_gnt *persistent_gnt;
int segs_to_unmap = 0;
- struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+ struct xen_blkif_ring *ring = container_of(work, typeof(*ring), persistent_purge_work);
struct gntab_unmap_queue_data unmap_data;
unmap_data.pages = pages;
unmap_data.unmap_ops = unmap;
unmap_data.kunmap_ops = NULL;
- while(!list_empty(&blkif->persistent_purge_list)) {
- persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+ while(!list_empty(&ring->persistent_purge_list)) {
+ persistent_gnt = list_first_entry(&ring->persistent_purge_list,
struct persistent_gnt,
remove_node);
list_del(&persistent_gnt->remove_node);
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
segs_to_unmap = 0;
}
kfree(persistent_gnt);
if (segs_to_unmap > 0) {
unmap_data.count = segs_to_unmap;
BUG_ON(gnttab_unmap_refs_sync(&unmap_data));
- put_free_pages(blkif, pages, segs_to_unmap);
+ put_free_pages(ring, pages, segs_to_unmap);
}
}
-static void purge_persistent_gnt(struct xen_blkif *blkif)
+static void purge_persistent_gnt(struct xen_blkif_ring *ring)
{
struct persistent_gnt *persistent_gnt;
struct rb_node *n;
bool scan_used = false, clean_used = false;
struct rb_root *root;
- if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
- (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
- !blkif->vbd.overflow_max_grants)) {
- return;
+ if (ring->persistent_gnt_c < xen_blkif_max_pgrants ||
+ (ring->persistent_gnt_c == xen_blkif_max_pgrants &&
+ !ring->blkif->vbd.overflow_max_grants)) {
+ goto out;
}
- if (work_busy(&blkif->persistent_purge_work)) {
+ if (work_busy(&ring->persistent_purge_work)) {
pr_alert_ratelimited("Scheduled work from previous purge is still busy, cannot purge list\n");
- return;
+ goto out;
}
num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
- num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
- num_clean = min(blkif->persistent_gnt_c, num_clean);
+ num_clean = ring->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+ num_clean = min(ring->persistent_gnt_c, num_clean);
if ((num_clean == 0) ||
- (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
- return;
+ (num_clean > (ring->persistent_gnt_c - atomic_read(&ring->persistent_gnt_in_use))))
+ goto out;
/*
* At this point, we can assure that there will be no calls
pr_debug("Going to purge %u persistent grants\n", num_clean);
- BUG_ON(!list_empty(&blkif->persistent_purge_list));
- root = &blkif->persistent_gnts;
+ BUG_ON(!list_empty(&ring->persistent_purge_list));
+ root = &ring->persistent_gnts;
purge_list:
foreach_grant_safe(persistent_gnt, n, root, node) {
BUG_ON(persistent_gnt->handle ==
rb_erase(&persistent_gnt->node, root);
list_add(&persistent_gnt->remove_node,
- &blkif->persistent_purge_list);
+ &ring->persistent_purge_list);
if (--num_clean == 0)
goto finished;
}
goto purge_list;
}
- blkif->persistent_gnt_c -= (total - num_clean);
- blkif->vbd.overflow_max_grants = 0;
+ ring->persistent_gnt_c -= (total - num_clean);
+ ring->blkif->vbd.overflow_max_grants = 0;
/* We can defer this work */
- schedule_work(&blkif->persistent_purge_work);
+ schedule_work(&ring->persistent_purge_work);
pr_debug("Purged %u/%u\n", (total - num_clean), total);
+
+out:
return;
}
/*
* Retrieve from the 'pending_reqs' a free pending_req structure to be used.
*/
-static struct pending_req *alloc_req(struct xen_blkif *blkif)
+static struct pending_req *alloc_req(struct xen_blkif_ring *ring)
{
struct pending_req *req = NULL;
unsigned long flags;
- spin_lock_irqsave(&blkif->pending_free_lock, flags);
- if (!list_empty(&blkif->pending_free)) {
- req = list_entry(blkif->pending_free.next, struct pending_req,
+ spin_lock_irqsave(&ring->pending_free_lock, flags);
+ if (!list_empty(&ring->pending_free)) {
+ req = list_entry(ring->pending_free.next, struct pending_req,
free_list);
list_del(&req->free_list);
}
- spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
+ spin_unlock_irqrestore(&ring->pending_free_lock, flags);
return req;
}
* Return the 'pending_req' structure back to the freepool. We also
* wake up the thread if it was waiting for a free page.
*/
-static void free_req(struct xen_blkif *blkif, struct pending_req *req)
+static void free_req(struct xen_blkif_ring *ring, struct pending_req *req)
{
unsigned long flags;
int was_empty;
- spin_lock_irqsave(&blkif->pending_free_lock, flags);
- was_empty = list_empty(&blkif->pending_free);
- list_add(&req->free_list, &blkif->pending_free);
- spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
+ spin_lock_irqsave(&ring->pending_free_lock, flags);
+ was_empty = list_empty(&ring->pending_free);
+ list_add(&req->free_list, &ring->pending_free);
+ spin_unlock_irqrestore(&ring->pending_free_lock, flags);
if (was_empty)
- wake_up(&blkif->pending_free_wq);
+ wake_up(&ring->pending_free_wq);
}
/*
/*
* Notification from the guest OS.
*/
-static void blkif_notify_work(struct xen_blkif *blkif)
+static void blkif_notify_work(struct xen_blkif_ring *ring)
{
- blkif->waiting_reqs = 1;
- wake_up(&blkif->wq);
+ ring->waiting_reqs = 1;
+ wake_up(&ring->wq);
}
irqreturn_t xen_blkif_be_int(int irq, void *dev_id)
* SCHEDULER FUNCTIONS
*/
-static void print_stats(struct xen_blkif *blkif)
+static void print_stats(struct xen_blkif_ring *ring)
{
pr_info("(%s): oo %3llu | rd %4llu | wr %4llu | f %4llu"
" | ds %4llu | pg: %4u/%4d\n",
- current->comm, blkif->st_oo_req,
- blkif->st_rd_req, blkif->st_wr_req,
- blkif->st_f_req, blkif->st_ds_req,
- blkif->persistent_gnt_c,
+ current->comm, ring->st_oo_req,
+ ring->st_rd_req, ring->st_wr_req,
+ ring->st_f_req, ring->st_ds_req,
+ ring->persistent_gnt_c,
xen_blkif_max_pgrants);
- blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
- blkif->st_rd_req = 0;
- blkif->st_wr_req = 0;
- blkif->st_oo_req = 0;
- blkif->st_ds_req = 0;
+ ring->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+ ring->st_rd_req = 0;
+ ring->st_wr_req = 0;
+ ring->st_oo_req = 0;
+ ring->st_ds_req = 0;
}
int xen_blkif_schedule(void *arg)
{
- struct xen_blkif *blkif = arg;
+ struct xen_blkif_ring *ring = arg;
+ struct xen_blkif *blkif = ring->blkif;
struct xen_vbd *vbd = &blkif->vbd;
unsigned long timeout;
int ret;
xen_blkif_get(blkif);
+ set_freezable();
while (!kthread_should_stop()) {
if (try_to_freeze())
continue;
timeout = msecs_to_jiffies(LRU_INTERVAL);
timeout = wait_event_interruptible_timeout(
- blkif->wq,
- blkif->waiting_reqs || kthread_should_stop(),
+ ring->wq,
+ ring->waiting_reqs || kthread_should_stop(),
timeout);
if (timeout == 0)
goto purge_gnt_list;
timeout = wait_event_interruptible_timeout(
- blkif->pending_free_wq,
- !list_empty(&blkif->pending_free) ||
+ ring->pending_free_wq,
+ !list_empty(&ring->pending_free) ||
kthread_should_stop(),
timeout);
if (timeout == 0)
goto purge_gnt_list;
- blkif->waiting_reqs = 0;
+ ring->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
- ret = do_block_io_op(blkif);
+ ret = do_block_io_op(ring);
if (ret > 0)
- blkif->waiting_reqs = 1;
+ ring->waiting_reqs = 1;
if (ret == -EACCES)
- wait_event_interruptible(blkif->shutdown_wq,
+ wait_event_interruptible(ring->shutdown_wq,
kthread_should_stop());
purge_gnt_list:
if (blkif->vbd.feature_gnt_persistent &&
- time_after(jiffies, blkif->next_lru)) {
- purge_persistent_gnt(blkif);
- blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+ time_after(jiffies, ring->next_lru)) {
+ purge_persistent_gnt(ring);
+ ring->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
}
/* Shrink if we have more than xen_blkif_max_buffer_pages */
- shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
+ shrink_free_pagepool(ring, xen_blkif_max_buffer_pages);
- if (log_stats && time_after(jiffies, blkif->st_print))
- print_stats(blkif);
+ if (log_stats && time_after(jiffies, ring->st_print))
+ print_stats(ring);
}
/* Drain pending purge work */
- flush_work(&blkif->persistent_purge_work);
+ flush_work(&ring->persistent_purge_work);
if (log_stats)
- print_stats(blkif);
+ print_stats(ring);
- blkif->xenblkd = NULL;
+ ring->xenblkd = NULL;
xen_blkif_put(blkif);
return 0;
/*
* Remove persistent grants and empty the pool of free pages
*/
-void xen_blkbk_free_caches(struct xen_blkif *blkif)
+void xen_blkbk_free_caches(struct xen_blkif_ring *ring)
{
/* Free all persistent grant pages */
- if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
- free_persistent_gnts(blkif, &blkif->persistent_gnts,
- blkif->persistent_gnt_c);
+ if (!RB_EMPTY_ROOT(&ring->persistent_gnts))
+ free_persistent_gnts(ring, &ring->persistent_gnts,
+ ring->persistent_gnt_c);
- BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
- blkif->persistent_gnt_c = 0;
+ BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+ ring->persistent_gnt_c = 0;
/* Since we are shutting down remove all pages from the buffer */
- shrink_free_pagepool(blkif, 0 /* All */);
+ shrink_free_pagepool(ring, 0 /* All */);
}
static unsigned int xen_blkbk_unmap_prepare(
- struct xen_blkif *blkif,
+ struct xen_blkif_ring *ring,
struct grant_page **pages,
unsigned int num,
struct gnttab_unmap_grant_ref *unmap_ops,
for (i = 0; i < num; i++) {
if (pages[i]->persistent_gnt != NULL) {
- put_persistent_gnt(blkif, pages[i]->persistent_gnt);
+ put_persistent_gnt(ring, pages[i]->persistent_gnt);
continue;
}
if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data)
{
- struct pending_req* pending_req = (struct pending_req*) (data->data);
- struct xen_blkif *blkif = pending_req->blkif;
+ struct pending_req *pending_req = (struct pending_req *)(data->data);
+ struct xen_blkif_ring *ring = pending_req->ring;
+ struct xen_blkif *blkif = ring->blkif;
/* BUG_ON used to reproduce existing behaviour,
but is this the best way to deal with this? */
BUG_ON(result);
- put_free_pages(blkif, data->pages, data->count);
- make_response(blkif, pending_req->id,
+ put_free_pages(ring, data->pages, data->count);
+ make_response(ring, pending_req->id,
pending_req->operation, pending_req->status);
- free_req(blkif, pending_req);
+ free_req(ring, pending_req);
/*
* Make sure the request is freed before releasing blkif,
* or there could be a race between free_req and the
* pending_free_wq if there's a drain going on, but it has
* to be taken into account if the current model is changed.
*/
- if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
+ if (atomic_dec_and_test(&ring->inflight) && atomic_read(&blkif->drain)) {
complete(&blkif->drain_complete);
}
xen_blkif_put(blkif);
static void xen_blkbk_unmap_and_respond(struct pending_req *req)
{
struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data;
- struct xen_blkif *blkif = req->blkif;
+ struct xen_blkif_ring *ring = req->ring;
struct grant_page **pages = req->segments;
unsigned int invcount;
- invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
+ invcount = xen_blkbk_unmap_prepare(ring, pages, req->nr_segs,
req->unmap, req->unmap_pages);
work->data = req;
* of hypercalls, but since this is only used in error paths there's
* no real need.
*/
-static void xen_blkbk_unmap(struct xen_blkif *blkif,
+static void xen_blkbk_unmap(struct xen_blkif_ring *ring,
struct grant_page *pages[],
int num)
{
while (num) {
unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
- invcount = xen_blkbk_unmap_prepare(blkif, pages, batch,
+
+ invcount = xen_blkbk_unmap_prepare(ring, pages, batch,
unmap, unmap_pages);
if (invcount) {
ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
BUG_ON(ret);
- put_free_pages(blkif, unmap_pages, invcount);
+ put_free_pages(ring, unmap_pages, invcount);
}
pages += batch;
num -= batch;
}
}
-static int xen_blkbk_map(struct xen_blkif *blkif,
+static int xen_blkbk_map(struct xen_blkif_ring *ring,
struct grant_page *pages[],
int num, bool ro)
{
int ret = 0;
int last_map = 0, map_until = 0;
int use_persistent_gnts;
+ struct xen_blkif *blkif = ring->blkif;
use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
for (i = map_until; i < num; i++) {
uint32_t flags;
- if (use_persistent_gnts)
+ if (use_persistent_gnts) {
persistent_gnt = get_persistent_gnt(
- blkif,
+ ring,
pages[i]->gref);
+ }
if (persistent_gnt) {
/*
pages[i]->page = persistent_gnt->page;
pages[i]->persistent_gnt = persistent_gnt;
} else {
- if (get_free_page(blkif, &pages[i]->page))
+ if (get_free_page(ring, &pages[i]->page))
goto out_of_memory;
addr = vaddr(pages[i]->page);
pages_to_gnt[segs_to_map] = pages[i]->page;
BUG_ON(new_map_idx >= segs_to_map);
if (unlikely(map[new_map_idx].status != 0)) {
pr_debug("invalid buffer -- could not remap it\n");
- put_free_pages(blkif, &pages[seg_idx]->page, 1);
+ put_free_pages(ring, &pages[seg_idx]->page, 1);
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
ret |= 1;
goto next;
continue;
}
if (use_persistent_gnts &&
- blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+ ring->persistent_gnt_c < xen_blkif_max_pgrants) {
/*
* We are using persistent grants, the grant is
* not mapped but we might have room for it.
persistent_gnt->gnt = map[new_map_idx].ref;
persistent_gnt->handle = map[new_map_idx].handle;
persistent_gnt->page = pages[seg_idx]->page;
- if (add_persistent_gnt(blkif,
+ if (add_persistent_gnt(ring,
persistent_gnt)) {
kfree(persistent_gnt);
persistent_gnt = NULL;
}
pages[seg_idx]->persistent_gnt = persistent_gnt;
pr_debug("grant %u added to the tree of persistent grants, using %u/%u\n",
- persistent_gnt->gnt, blkif->persistent_gnt_c,
+ persistent_gnt->gnt, ring->persistent_gnt_c,
xen_blkif_max_pgrants);
goto next;
}
out_of_memory:
pr_alert("%s: out of memory\n", __func__);
- put_free_pages(blkif, pages_to_gnt, segs_to_map);
+ put_free_pages(ring, pages_to_gnt, segs_to_map);
return -ENOMEM;
}
{
int rc;
- rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
+ rc = xen_blkbk_map(pending_req->ring, pending_req->segments,
pending_req->nr_segs,
(pending_req->operation != BLKIF_OP_READ));
struct phys_req *preq)
{
struct grant_page **pages = pending_req->indirect_pages;
- struct xen_blkif *blkif = pending_req->blkif;
+ struct xen_blkif_ring *ring = pending_req->ring;
int indirect_grefs, rc, n, nseg, i;
struct blkif_request_segment *segments = NULL;
for (i = 0; i < indirect_grefs; i++)
pages[i]->gref = req->u.indirect.indirect_grefs[i];
- rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
+ rc = xen_blkbk_map(ring, pages, indirect_grefs, true);
if (rc)
goto unmap;
unmap:
if (segments)
kunmap_atomic(segments);
- xen_blkbk_unmap(blkif, pages, indirect_grefs);
+ xen_blkbk_unmap(ring, pages, indirect_grefs);
return rc;
}
-static int dispatch_discard_io(struct xen_blkif *blkif,
+static int dispatch_discard_io(struct xen_blkif_ring *ring,
struct blkif_request *req)
{
int err = 0;
int status = BLKIF_RSP_OKAY;
+ struct xen_blkif *blkif = ring->blkif;
struct block_device *bdev = blkif->vbd.bdev;
unsigned long secure;
struct phys_req preq;
preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
goto fail_response;
}
- blkif->st_ds_req++;
+ ring->st_ds_req++;
secure = (blkif->vbd.discard_secure &&
(req->u.discard.flag & BLKIF_DISCARD_SECURE)) ?
} else if (err)
status = BLKIF_RSP_ERROR;
- make_response(blkif, req->u.discard.id, req->operation, status);
+ make_response(ring, req->u.discard.id, req->operation, status);
xen_blkif_put(blkif);
return err;
}
-static int dispatch_other_io(struct xen_blkif *blkif,
+static int dispatch_other_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req)
{
- free_req(blkif, pending_req);
- make_response(blkif, req->u.other.id, req->operation,
+ free_req(ring, pending_req);
+ make_response(ring, req->u.other.id, req->operation,
BLKIF_RSP_EOPNOTSUPP);
return -EIO;
}
-static void xen_blk_drain_io(struct xen_blkif *blkif)
+static void xen_blk_drain_io(struct xen_blkif_ring *ring)
{
+ struct xen_blkif *blkif = ring->blkif;
+
atomic_set(&blkif->drain, 1);
do {
- if (atomic_read(&blkif->inflight) == 0)
+ if (atomic_read(&ring->inflight) == 0)
break;
wait_for_completion_interruptible_timeout(
&blkif->drain_complete, HZ);
if ((pending_req->operation == BLKIF_OP_FLUSH_DISKCACHE) &&
(error == -EOPNOTSUPP)) {
pr_debug("flush diskcache op failed, not supported\n");
- xen_blkbk_flush_diskcache(XBT_NIL, pending_req->blkif->be, 0);
+ xen_blkbk_flush_diskcache(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
(error == -EOPNOTSUPP)) {
pr_debug("write barrier op failed, not supported\n");
- xen_blkbk_barrier(XBT_NIL, pending_req->blkif->be, 0);
+ xen_blkbk_barrier(XBT_NIL, pending_req->ring->blkif->be, 0);
pending_req->status = BLKIF_RSP_EOPNOTSUPP;
} else if (error) {
pr_debug("Buffer not up-to-date at end of operation,"
* and transmute it to the block API to hand it over to the proper block disk.
*/
static int
-__do_block_io_op(struct xen_blkif *blkif)
+__do_block_io_op(struct xen_blkif_ring *ring)
{
- union blkif_back_rings *blk_rings = &blkif->blk_rings;
+ union blkif_back_rings *blk_rings = &ring->blk_rings;
struct blkif_request req;
struct pending_req *pending_req;
RING_IDX rc, rp;
if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
rc = blk_rings->common.rsp_prod_pvt;
pr_warn("Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
- rp, rc, rp - rc, blkif->vbd.pdevice);
+ rp, rc, rp - rc, ring->blkif->vbd.pdevice);
return -EACCES;
}
while (rc != rp) {
break;
}
- pending_req = alloc_req(blkif);
+ pending_req = alloc_req(ring);
if (NULL == pending_req) {
- blkif->st_oo_req++;
+ ring->st_oo_req++;
more_to_do = 1;
break;
}
- switch (blkif->blk_protocol) {
+ switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
break;
case BLKIF_OP_WRITE_BARRIER:
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_INDIRECT:
- if (dispatch_rw_block_io(blkif, &req, pending_req))
+ if (dispatch_rw_block_io(ring, &req, pending_req))
goto done;
break;
case BLKIF_OP_DISCARD:
- free_req(blkif, pending_req);
- if (dispatch_discard_io(blkif, &req))
+ free_req(ring, pending_req);
+ if (dispatch_discard_io(ring, &req))
goto done;
break;
default:
- if (dispatch_other_io(blkif, &req, pending_req))
+ if (dispatch_other_io(ring, &req, pending_req))
goto done;
break;
}
}
static int
-do_block_io_op(struct xen_blkif *blkif)
+do_block_io_op(struct xen_blkif_ring *ring)
{
- union blkif_back_rings *blk_rings = &blkif->blk_rings;
+ union blkif_back_rings *blk_rings = &ring->blk_rings;
int more_to_do;
do {
- more_to_do = __do_block_io_op(blkif);
+ more_to_do = __do_block_io_op(ring);
if (more_to_do)
break;
* Transmutation of the 'struct blkif_request' to a proper 'struct bio'
* and call the 'submit_bio' to pass it to the underlying storage.
*/
-static int dispatch_rw_block_io(struct xen_blkif *blkif,
+static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
struct blkif_request *req,
struct pending_req *pending_req)
{
switch (req_operation) {
case BLKIF_OP_READ:
- blkif->st_rd_req++;
+ ring->st_rd_req++;
operation = READ;
break;
case BLKIF_OP_WRITE:
- blkif->st_wr_req++;
+ ring->st_wr_req++;
operation = WRITE_ODIRECT;
break;
case BLKIF_OP_WRITE_BARRIER:
drain = true;
case BLKIF_OP_FLUSH_DISKCACHE:
- blkif->st_f_req++;
+ ring->st_f_req++;
operation = WRITE_FLUSH;
break;
default:
preq.nr_sects = 0;
- pending_req->blkif = blkif;
+ pending_req->ring = ring;
pending_req->id = req->u.rw.id;
pending_req->operation = req_operation;
pending_req->status = BLKIF_RSP_OKAY;
goto fail_response;
}
- if (xen_vbd_translate(&preq, blkif, operation) != 0) {
+ if (xen_vbd_translate(&preq, ring->blkif, operation) != 0) {
pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n",
operation == READ ? "read" : "write",
preq.sector_number,
preq.sector_number + preq.nr_sects,
- blkif->vbd.pdevice);
+ ring->blkif->vbd.pdevice);
goto fail_response;
}
if (((int)preq.sector_number|(int)seg[i].nsec) &
((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
pr_debug("Misaligned I/O request from domain %d\n",
- blkif->domid);
+ ring->blkif->domid);
goto fail_response;
}
}
* issue the WRITE_FLUSH.
*/
if (drain)
- xen_blk_drain_io(pending_req->blkif);
+ xen_blk_drain_io(pending_req->ring);
/*
* If we have failed at this point, we need to undo the M2P override,
* This corresponding xen_blkif_put is done in __end_block_io_op, or
* below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
*/
- xen_blkif_get(blkif);
- atomic_inc(&blkif->inflight);
+ xen_blkif_get(ring->blkif);
+ atomic_inc(&ring->inflight);
for (i = 0; i < nseg; i++) {
while ((bio == NULL) ||
blk_finish_plug(&plug);
if (operation == READ)
- blkif->st_rd_sect += preq.nr_sects;
+ ring->st_rd_sect += preq.nr_sects;
else if (operation & WRITE)
- blkif->st_wr_sect += preq.nr_sects;
+ ring->st_wr_sect += preq.nr_sects;
return 0;
fail_flush:
- xen_blkbk_unmap(blkif, pending_req->segments,
+ xen_blkbk_unmap(ring, pending_req->segments,
pending_req->nr_segs);
fail_response:
/* Haven't submitted any bio's yet. */
- make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
- free_req(blkif, pending_req);
+ make_response(ring, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
+ free_req(ring, pending_req);
msleep(1); /* back off a bit */
return -EIO;
/*
* Put a response on the ring on how the operation fared.
*/
-static void make_response(struct xen_blkif *blkif, u64 id,
+static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st)
{
struct blkif_response resp;
unsigned long flags;
- union blkif_back_rings *blk_rings = &blkif->blk_rings;
+ union blkif_back_rings *blk_rings;
int notify;
resp.id = id;
resp.operation = op;
resp.status = st;
- spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+ spin_lock_irqsave(&ring->blk_ring_lock, flags);
+ blk_rings = &ring->blk_rings;
/* Place on the response ring for the relevant domain. */
- switch (blkif->blk_protocol) {
+ switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
&resp, sizeof(resp));
}
blk_rings->common.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
- spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+ spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
if (notify)
- notify_remote_via_irq(blkif->irq);
+ notify_remote_via_irq(ring->irq);
}
static int __init xen_blkif_init(void)
xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
}
+ if (xenblk_max_queues == 0)
+ xenblk_max_queues = num_online_cpus();
+
rc = xen_blkif_interface_init();
if (rc)
goto failed_init;
#include <xen/interface/io/protocols.h>
extern unsigned int xen_blkif_max_ring_order;
+extern unsigned int xenblk_max_queues;
/*
* This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend.
struct list_head remove_node;
};
-struct xen_blkif {
- /* Unique identifier for this interface. */
- domid_t domid;
- unsigned int handle;
+/* Per-ring information. */
+struct xen_blkif_ring {
/* Physical parameters of the comms window. */
unsigned int irq;
- /* Comms information. */
- enum blkif_protocol blk_protocol;
union blkif_back_rings blk_rings;
void *blk_ring;
- /* The VBD attached to this interface. */
- struct xen_vbd vbd;
- /* Back pointer to the backend_info. */
- struct backend_info *be;
/* Private fields. */
spinlock_t blk_ring_lock;
- atomic_t refcnt;
wait_queue_head_t wq;
- /* for barrier (drain) requests */
- struct completion drain_complete;
- atomic_t drain;
atomic_t inflight;
- /* One thread per one blkif. */
+ /* One thread per blkif ring. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
- /* tree to store persistent grants */
+ /* List of all 'pending_req' available */
+ struct list_head pending_free;
+ /* And its spinlock. */
+ spinlock_t pending_free_lock;
+ wait_queue_head_t pending_free_wq;
+
+ /* Tree to store persistent grants. */
+ spinlock_t pers_gnts_lock;
struct rb_root persistent_gnts;
unsigned int persistent_gnt_c;
atomic_t persistent_gnt_in_use;
unsigned long next_lru;
- /* used by the kworker that offload work from the persistent purge */
+ /* Statistics. */
+ unsigned long st_print;
+ unsigned long long st_rd_req;
+ unsigned long long st_wr_req;
+ unsigned long long st_oo_req;
+ unsigned long long st_f_req;
+ unsigned long long st_ds_req;
+ unsigned long long st_rd_sect;
+ unsigned long long st_wr_sect;
+
+ /* Used by the kworker that offload work from the persistent purge. */
struct list_head persistent_purge_list;
struct work_struct persistent_purge_work;
- /* buffer of free pages to map grant refs */
+ /* Buffer of free pages to map grant refs. */
spinlock_t free_pages_lock;
int free_pages_num;
struct list_head free_pages;
- /* List of all 'pending_req' available */
- struct list_head pending_free;
- /* And its spinlock. */
- spinlock_t pending_free_lock;
- wait_queue_head_t pending_free_wq;
-
- /* statistics */
- unsigned long st_print;
- unsigned long long st_rd_req;
- unsigned long long st_wr_req;
- unsigned long long st_oo_req;
- unsigned long long st_f_req;
- unsigned long long st_ds_req;
- unsigned long long st_rd_sect;
- unsigned long long st_wr_sect;
-
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
- unsigned int nr_ring_pages;
+ struct xen_blkif *blkif;
+};
+
+struct xen_blkif {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+ /* Comms information. */
+ enum blkif_protocol blk_protocol;
+ /* The VBD attached to this interface. */
+ struct xen_vbd vbd;
+ /* Back pointer to the backend_info. */
+ struct backend_info *be;
+ atomic_t refcnt;
+ /* for barrier (drain) requests */
+ struct completion drain_complete;
+ atomic_t drain;
+
+ struct work_struct free_work;
+ unsigned int nr_ring_pages;
+ /* All rings for this device. */
+ struct xen_blkif_ring *rings;
+ unsigned int nr_rings;
};
struct seg_buf {
* response queued for it, with the saved 'id' passed back.
*/
struct pending_req {
- struct xen_blkif *blkif;
+ struct xen_blkif_ring *ring;
u64 id;
int nr_segs;
atomic_t pendcnt;
irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
int xen_blkif_schedule(void *arg);
int xen_blkif_purge_persistent(void *arg);
-void xen_blkbk_free_caches(struct xen_blkif *blkif);
+void xen_blkbk_free_caches(struct xen_blkif_ring *ring);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state);
{
int err;
char name[BLKBACK_NAME_LEN];
+ struct xen_blkif_ring *ring;
+ int i;
/* Not ready to connect? */
- if (!blkif->irq || !blkif->vbd.bdev)
+ if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
return;
/* Already connected? */
}
invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
- blkif->xenblkd = kthread_run(xen_blkif_schedule, blkif, "%s", name);
- if (IS_ERR(blkif->xenblkd)) {
- err = PTR_ERR(blkif->xenblkd);
- blkif->xenblkd = NULL;
- xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
- return;
+ for (i = 0; i < blkif->nr_rings; i++) {
+ ring = &blkif->rings[i];
+ ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
+ if (IS_ERR(ring->xenblkd)) {
+ err = PTR_ERR(ring->xenblkd);
+ ring->xenblkd = NULL;
+ xenbus_dev_fatal(blkif->be->dev, err,
+ "start %s-%d xenblkd", name, i);
+ goto out;
+ }
+ }
+ return;
+
+out:
+ while (--i >= 0) {
+ ring = &blkif->rings[i];
+ kthread_stop(ring->xenblkd);
+ }
+ return;
+}
+
+static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
+{
+ unsigned int r;
+
+ blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct xen_blkif_ring), GFP_KERNEL);
+ if (!blkif->rings)
+ return -ENOMEM;
+
+ for (r = 0; r < blkif->nr_rings; r++) {
+ struct xen_blkif_ring *ring = &blkif->rings[r];
+
+ spin_lock_init(&ring->blk_ring_lock);
+ init_waitqueue_head(&ring->wq);
+ INIT_LIST_HEAD(&ring->pending_free);
+ INIT_LIST_HEAD(&ring->persistent_purge_list);
+ INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
+ spin_lock_init(&ring->free_pages_lock);
+ INIT_LIST_HEAD(&ring->free_pages);
+
+ spin_lock_init(&ring->pending_free_lock);
+ init_waitqueue_head(&ring->pending_free_wq);
+ init_waitqueue_head(&ring->shutdown_wq);
+ ring->blkif = blkif;
+ ring->st_print = jiffies;
+ xen_blkif_get(blkif);
}
+
+ return 0;
}
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
return ERR_PTR(-ENOMEM);
blkif->domid = domid;
- spin_lock_init(&blkif->blk_ring_lock);
atomic_set(&blkif->refcnt, 1);
- init_waitqueue_head(&blkif->wq);
init_completion(&blkif->drain_complete);
- atomic_set(&blkif->drain, 0);
- blkif->st_print = jiffies;
- blkif->persistent_gnts.rb_node = NULL;
- spin_lock_init(&blkif->free_pages_lock);
- INIT_LIST_HEAD(&blkif->free_pages);
- INIT_LIST_HEAD(&blkif->persistent_purge_list);
- blkif->free_pages_num = 0;
- atomic_set(&blkif->persistent_gnt_in_use, 0);
- atomic_set(&blkif->inflight, 0);
- INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
-
- INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
- spin_lock_init(&blkif->pending_free_lock);
- init_waitqueue_head(&blkif->pending_free_wq);
- init_waitqueue_head(&blkif->shutdown_wq);
return blkif;
}
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
unsigned int nr_grefs, unsigned int evtchn)
{
int err;
+ struct xen_blkif *blkif = ring->blkif;
/* Already connected through? */
- if (blkif->irq)
+ if (ring->irq)
return 0;
err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
- &blkif->blk_ring);
+ &ring->blk_ring);
if (err < 0)
return err;
case BLKIF_PROTOCOL_NATIVE:
{
struct blkif_sring *sring;
- sring = (struct blkif_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.native, sring,
+ sring = (struct blkif_sring *)ring->blk_ring;
+ BACK_RING_INIT(&ring->blk_rings.native, sring,
XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
- sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+ sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
+ BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
- sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+ sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
+ BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
XEN_PAGE_SIZE * nr_grefs);
break;
}
err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
xen_blkif_be_int, 0,
- "blkif-backend", blkif);
+ "blkif-backend", ring);
if (err < 0) {
- xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
- blkif->blk_rings.common.sring = NULL;
+ xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+ ring->blk_rings.common.sring = NULL;
return err;
}
- blkif->irq = err;
+ ring->irq = err;
return 0;
}
static int xen_blkif_disconnect(struct xen_blkif *blkif)
{
struct pending_req *req, *n;
- int i = 0, j;
+ unsigned int j, r;
- if (blkif->xenblkd) {
- kthread_stop(blkif->xenblkd);
- wake_up(&blkif->shutdown_wq);
- blkif->xenblkd = NULL;
- }
+ for (r = 0; r < blkif->nr_rings; r++) {
+ struct xen_blkif_ring *ring = &blkif->rings[r];
+ unsigned int i = 0;
- /* The above kthread_stop() guarantees that at this point we
- * don't have any discard_io or other_io requests. So, checking
- * for inflight IO is enough.
- */
- if (atomic_read(&blkif->inflight) > 0)
- return -EBUSY;
+ if (ring->xenblkd) {
+ kthread_stop(ring->xenblkd);
+ wake_up(&ring->shutdown_wq);
+ ring->xenblkd = NULL;
+ }
- if (blkif->irq) {
- unbind_from_irqhandler(blkif->irq, blkif);
- blkif->irq = 0;
- }
+ /* The above kthread_stop() guarantees that at this point we
+ * don't have any discard_io or other_io requests. So, checking
+ * for inflight IO is enough.
+ */
+ if (atomic_read(&ring->inflight) > 0)
+ return -EBUSY;
- if (blkif->blk_rings.common.sring) {
- xenbus_unmap_ring_vfree(blkif->be->dev, blkif->blk_ring);
- blkif->blk_rings.common.sring = NULL;
- }
+ if (ring->irq) {
+ unbind_from_irqhandler(ring->irq, ring);
+ ring->irq = 0;
+ }
- /* Remove all persistent grants and the cache of ballooned pages. */
- xen_blkbk_free_caches(blkif);
+ if (ring->blk_rings.common.sring) {
+ xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+ ring->blk_rings.common.sring = NULL;
+ }
- /* Check that there is no request in use */
- list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
- list_del(&req->free_list);
+ /* Remove all persistent grants and the cache of ballooned pages. */
+ xen_blkbk_free_caches(ring);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
- kfree(req->segments[j]);
+ /* Check that there is no request in use */
+ list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
+ list_del(&req->free_list);
- for (j = 0; j < MAX_INDIRECT_PAGES; j++)
- kfree(req->indirect_pages[j]);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+ kfree(req->segments[j]);
- kfree(req);
- i++;
- }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+ kfree(req->indirect_pages[j]);
+
+ kfree(req);
+ i++;
+ }
- WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
+ BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
+ BUG_ON(!list_empty(&ring->persistent_purge_list));
+ BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
+ BUG_ON(!list_empty(&ring->free_pages));
+ BUG_ON(ring->free_pages_num != 0);
+ BUG_ON(ring->persistent_gnt_c != 0);
+ WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
+ xen_blkif_put(blkif);
+ }
blkif->nr_ring_pages = 0;
+ /*
+ * blkif->rings was allocated in connect_ring, so we should free it in
+ * here.
+ */
+ kfree(blkif->rings);
+ blkif->rings = NULL;
+ blkif->nr_rings = 0;
return 0;
}
xen_vbd_free(&blkif->vbd);
/* Make sure everything is drained before shutting down */
- BUG_ON(blkif->persistent_gnt_c != 0);
- BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
- BUG_ON(blkif->free_pages_num != 0);
- BUG_ON(!list_empty(&blkif->persistent_purge_list));
- BUG_ON(!list_empty(&blkif->free_pages));
- BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
-
kmem_cache_free(xen_blkif_cachep, blkif);
}
* sysfs interface for VBD I/O requests
*/
-#define VBD_SHOW(name, format, args...) \
+#define VBD_SHOW_ALLRING(name, format) \
static ssize_t show_##name(struct device *_dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct xenbus_device *dev = to_xenbus_device(_dev); \
struct backend_info *be = dev_get_drvdata(&dev->dev); \
+ struct xen_blkif *blkif = be->blkif; \
+ unsigned int i; \
+ unsigned long long result = 0; \
\
- return sprintf(buf, format, ##args); \
+ if (!blkif->rings) \
+ goto out; \
+ \
+ for (i = 0; i < blkif->nr_rings; i++) { \
+ struct xen_blkif_ring *ring = &blkif->rings[i]; \
+ \
+ result += ring->st_##name; \
+ } \
+ \
+out: \
+ return sprintf(buf, format, result); \
} \
static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
-VBD_SHOW(oo_req, "%llu\n", be->blkif->st_oo_req);
-VBD_SHOW(rd_req, "%llu\n", be->blkif->st_rd_req);
-VBD_SHOW(wr_req, "%llu\n", be->blkif->st_wr_req);
-VBD_SHOW(f_req, "%llu\n", be->blkif->st_f_req);
-VBD_SHOW(ds_req, "%llu\n", be->blkif->st_ds_req);
-VBD_SHOW(rd_sect, "%llu\n", be->blkif->st_rd_sect);
-VBD_SHOW(wr_sect, "%llu\n", be->blkif->st_wr_sect);
+VBD_SHOW_ALLRING(oo_req, "%llu\n");
+VBD_SHOW_ALLRING(rd_req, "%llu\n");
+VBD_SHOW_ALLRING(wr_req, "%llu\n");
+VBD_SHOW_ALLRING(f_req, "%llu\n");
+VBD_SHOW_ALLRING(ds_req, "%llu\n");
+VBD_SHOW_ALLRING(rd_sect, "%llu\n");
+VBD_SHOW_ALLRING(wr_sect, "%llu\n");
static struct attribute *xen_vbdstat_attrs[] = {
&dev_attr_oo_req.attr,
.attrs = xen_vbdstat_attrs,
};
+#define VBD_SHOW(name, format, args...) \
+ static ssize_t show_##name(struct device *_dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+ { \
+ struct xenbus_device *dev = to_xenbus_device(_dev); \
+ struct backend_info *be = dev_get_drvdata(&dev->dev); \
+ \
+ return sprintf(buf, format, ##args); \
+ } \
+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);
dev_set_drvdata(&dev->dev, NULL);
- if (be->blkif) {
+ if (be->blkif)
xen_blkif_disconnect(be->blkif);
- xen_blkif_put(be->blkif);
- }
+ /* Put the reference we set in xen_blkif_alloc(). */
+ xen_blkif_put(be->blkif);
kfree(be->mode);
kfree(be);
return 0;
goto fail;
}
+ /* Multi-queue: advertise how many queues are supported by us.*/
+ err = xenbus_printf(XBT_NIL, dev->nodename,
+ "multi-queue-max-queues", "%u", xenblk_max_queues);
+ if (err)
+ pr_warn("Error writing multi-queue-max-queues\n");
+
/* setup back pointer */
be->blkif->be = be;
}
err = connect_ring(be);
- if (err)
+ if (err) {
+ /*
+ * Clean up so that memory resources can be used by
+ * other devices. connect_ring reported already error.
+ */
+ xen_blkif_disconnect(be->blkif);
break;
+ }
xen_update_blkif_status(be->blkif);
break;
xenbus_transaction_end(xbt, 1);
}
-
-static int connect_ring(struct backend_info *be)
+/*
+ * Each ring may have multi pages, depends on "ring-page-order".
+ */
+static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
{
- struct xenbus_device *dev = be->dev;
unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
- unsigned int evtchn, nr_grefs, ring_page_order;
- unsigned int pers_grants;
- char protocol[64] = "";
struct pending_req *req, *n;
int err, i, j;
+ struct xen_blkif *blkif = ring->blkif;
+ struct xenbus_device *dev = blkif->be->dev;
+ unsigned int ring_page_order, nr_grefs, evtchn;
- pr_debug("%s %s\n", __func__, dev->otherend);
-
- err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+ err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
&evtchn);
if (err != 1) {
err = -EINVAL;
- xenbus_dev_fatal(dev, err, "reading %s/event-channel",
- dev->otherend);
+ xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
return err;
}
- pr_info("event-channel %u\n", evtchn);
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
&ring_page_order);
if (err != 1) {
- err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
- "%u", &ring_ref[0]);
+ err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
if (err != 1) {
err = -EINVAL;
- xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
- dev->otherend);
+ xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
return err;
}
nr_grefs = 1;
- pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
- ring_ref[0]);
} else {
unsigned int i;
if (ring_page_order > xen_blkif_max_ring_order) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
- dev->otherend, ring_page_order,
+ dir, ring_page_order,
xen_blkif_max_ring_order);
return err;
}
char ring_ref_name[RINGREF_NAME_LEN];
snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
- err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+ err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
"%u", &ring_ref[i]);
if (err != 1) {
err = -EINVAL;
xenbus_dev_fatal(dev, err, "reading %s/%s",
- dev->otherend, ring_ref_name);
+ dir, ring_ref_name);
return err;
}
- pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
}
}
-
- be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
- err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
- "%63s", protocol, NULL);
- if (err)
- strcpy(protocol, "unspecified, assuming default");
- else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
- be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
- else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
- be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
- else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
- be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
- else {
- xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
- return -1;
- }
- err = xenbus_gather(XBT_NIL, dev->otherend,
- "feature-persistent", "%u",
- &pers_grants, NULL);
- if (err)
- pers_grants = 0;
-
- be->blkif->vbd.feature_gnt_persistent = pers_grants;
- be->blkif->vbd.overflow_max_grants = 0;
- be->blkif->nr_ring_pages = nr_grefs;
-
- pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
- nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
- pers_grants ? "persistent grants" : "");
+ blkif->nr_ring_pages = nr_grefs;
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
req = kzalloc(sizeof(*req), GFP_KERNEL);
if (!req)
goto fail;
- list_add_tail(&req->free_list, &be->blkif->pending_free);
+ list_add_tail(&req->free_list, &ring->pending_free);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
if (!req->segments[j])
}
/* Map the shared frame, irq etc. */
- err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
+ err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
if (err) {
xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err;
return 0;
fail:
- list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+ list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
list_del(&req->free_list);
for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
if (!req->segments[j])
kfree(req);
}
return -ENOMEM;
+
+}
+
+static int connect_ring(struct backend_info *be)
+{
+ struct xenbus_device *dev = be->dev;
+ unsigned int pers_grants;
+ char protocol[64] = "";
+ int err, i;
+ char *xspath;
+ size_t xspathsize;
+ const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
+ unsigned int requested_num_queues = 0;
+
+ pr_debug("%s %s\n", __func__, dev->otherend);
+
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
+ "%63s", protocol, NULL);
+ if (err)
+ strcpy(protocol, "unspecified, assuming default");
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
+ else {
+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
+ return -ENOSYS;
+ }
+ err = xenbus_gather(XBT_NIL, dev->otherend,
+ "feature-persistent", "%u",
+ &pers_grants, NULL);
+ if (err)
+ pers_grants = 0;
+
+ be->blkif->vbd.feature_gnt_persistent = pers_grants;
+ be->blkif->vbd.overflow_max_grants = 0;
+
+ /*
+ * Read the number of hardware queues from frontend.
+ */
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "multi-queue-num-queues",
+ "%u", &requested_num_queues);
+ if (err < 0) {
+ requested_num_queues = 1;
+ } else {
+ if (requested_num_queues > xenblk_max_queues
+ || requested_num_queues == 0) {
+ /* Buggy or malicious guest. */
+ xenbus_dev_fatal(dev, err,
+ "guest requested %u queues, exceeding the maximum of %u.",
+ requested_num_queues, xenblk_max_queues);
+ return -ENOSYS;
+ }
+ }
+ be->blkif->nr_rings = requested_num_queues;
+ if (xen_blkif_alloc_rings(be->blkif))
+ return -ENOMEM;
+
+ pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
+ be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
+ pers_grants ? "persistent grants" : "");
+
+ if (be->blkif->nr_rings == 1)
+ return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
+ else {
+ xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
+ xspath = kmalloc(xspathsize, GFP_KERNEL);
+ if (!xspath) {
+ xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < be->blkif->nr_rings; i++) {
+ memset(xspath, 0, xspathsize);
+ snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
+ err = read_per_ring_refs(&be->blkif->rings[i], xspath);
+ if (err) {
+ kfree(xspath);
+ return err;
+ }
+ }
+ kfree(xspath);
+ }
+ return 0;
}
static const struct xenbus_device_id xen_blkbk_ids[] = {
#include <asm/xen/hypervisor.h>
+/*
+ * The minimal size of segment supported by the block framework is PAGE_SIZE.
+ * When Linux is using a different page size than Xen, it may not be possible
+ * to put all the data in a single segment.
+ * This can happen when the backend doesn't support indirect descriptor and
+ * therefore the maximum amount of data that a request can carry is
+ * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE = 44KB
+ *
+ * Note that we only support one extra request. So the Linux page size
+ * should be <= ( 2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) =
+ * 88KB.
+ */
+#define HAS_EXTRA_REQ (BLKIF_MAX_SEGMENTS_PER_REQUEST < XEN_PFN_PER_PAGE)
+
enum blkif_state {
BLKIF_STATE_DISCONNECTED,
BLKIF_STATE_CONNECTED,
struct list_head node;
};
+enum blk_req_status {
+ REQ_WAITING,
+ REQ_DONE,
+ REQ_ERROR,
+ REQ_EOPNOTSUPP,
+};
+
struct blk_shadow {
struct blkif_request req;
struct request *request;
struct grant **indirect_grants;
struct scatterlist *sg;
unsigned int num_sg;
+ enum blk_req_status status;
+
+ #define NO_ASSOCIATED_ID ~0UL
+ /*
+ * Id of the sibling if we ever need 2 requests when handling a
+ * block I/O request
+ */
+ unsigned long associated_id;
};
struct split_bio {
module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
+static unsigned int xen_blkif_max_queues = 4;
+module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);
+MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
+
/*
* Maximum order of pages to be used for the shared ring between front and
* backend, 4KB page granularity is used.
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS)
/*
- * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
- * characters are enough. Define to 20 to keep consist with backend.
+ * ring-ref%u i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
+ * characters are enough. Define to 20 to keep consistent with backend.
*/
#define RINGREF_NAME_LEN (20)
+/*
+ * queue-%u would take 7 + 10(UINT_MAX) = 17 characters.
+ */
+#define QUEUE_NAME_LEN (17)
+
+/*
+ * Per-ring info.
+ * Every blkfront device can associate with one or more blkfront_ring_info,
+ * depending on how many hardware queues/rings to be used.
+ */
+struct blkfront_ring_info {
+ /* Lock to protect data in every ring buffer. */
+ spinlock_t ring_lock;
+ struct blkif_front_ring ring;
+ unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
+ unsigned int evtchn, irq;
+ struct work_struct work;
+ struct gnttab_free_callback callback;
+ struct blk_shadow shadow[BLK_MAX_RING_SIZE];
+ struct list_head indirect_pages;
+ struct list_head grants;
+ unsigned int persistent_gnts_c;
+ unsigned long shadow_free;
+ struct blkfront_info *dev_info;
+};
/*
* We have one of these per vbd, whether ide, scsi or 'other'. They
*/
struct blkfront_info
{
- spinlock_t io_lock;
struct mutex mutex;
struct xenbus_device *xbdev;
struct gendisk *gd;
int vdevice;
blkif_vdev_t handle;
enum blkif_state connected;
- int ring_ref[XENBUS_MAX_RING_GRANTS];
+ /* Number of pages per ring buffer. */
unsigned int nr_ring_pages;
- struct blkif_front_ring ring;
- unsigned int evtchn, irq;
struct request_queue *rq;
- struct work_struct work;
- struct gnttab_free_callback callback;
- struct blk_shadow shadow[BLK_MAX_RING_SIZE];
- struct list_head grants;
- struct list_head indirect_pages;
- unsigned int persistent_gnts_c;
- unsigned long shadow_free;
unsigned int feature_flush;
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
unsigned int max_indirect_segments;
int is_ready;
struct blk_mq_tag_set tag_set;
+ struct blkfront_ring_info *rinfo;
+ unsigned int nr_rings;
};
static unsigned int nr_minors;
#define GREFS(_psegs) ((_psegs) * GRANTS_PER_PSEG)
-static int blkfront_setup_indirect(struct blkfront_info *info);
-static int blkfront_gather_backend_features(struct blkfront_info *info);
+static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
+static void blkfront_gather_backend_features(struct blkfront_info *info);
-static int get_id_from_freelist(struct blkfront_info *info)
+static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
{
- unsigned long free = info->shadow_free;
- BUG_ON(free >= BLK_RING_SIZE(info));
- info->shadow_free = info->shadow[free].req.u.rw.id;
- info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
+ unsigned long free = rinfo->shadow_free;
+
+ BUG_ON(free >= BLK_RING_SIZE(rinfo->dev_info));
+ rinfo->shadow_free = rinfo->shadow[free].req.u.rw.id;
+ rinfo->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
return free;
}
-static int add_id_to_freelist(struct blkfront_info *info,
- unsigned long id)
+static int add_id_to_freelist(struct blkfront_ring_info *rinfo,
+ unsigned long id)
{
- if (info->shadow[id].req.u.rw.id != id)
+ if (rinfo->shadow[id].req.u.rw.id != id)
return -EINVAL;
- if (info->shadow[id].request == NULL)
+ if (rinfo->shadow[id].request == NULL)
return -EINVAL;
- info->shadow[id].req.u.rw.id = info->shadow_free;
- info->shadow[id].request = NULL;
- info->shadow_free = id;
+ rinfo->shadow[id].req.u.rw.id = rinfo->shadow_free;
+ rinfo->shadow[id].request = NULL;
+ rinfo->shadow_free = id;
return 0;
}
-static int fill_grant_buffer(struct blkfront_info *info, int num)
+static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
{
+ struct blkfront_info *info = rinfo->dev_info;
struct page *granted_page;
struct grant *gnt_list_entry, *n;
int i = 0;
- while(i < num) {
+ while (i < num) {
gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
if (!gnt_list_entry)
goto out_of_memory;
}
gnt_list_entry->gref = GRANT_INVALID_REF;
- list_add(&gnt_list_entry->node, &info->grants);
+ list_add(&gnt_list_entry->node, &rinfo->grants);
i++;
}
out_of_memory:
list_for_each_entry_safe(gnt_list_entry, n,
- &info->grants, node) {
+ &rinfo->grants, node) {
list_del(&gnt_list_entry->node);
if (info->feature_persistent)
__free_page(gnt_list_entry->page);
return -ENOMEM;
}
-static struct grant *get_free_grant(struct blkfront_info *info)
+static struct grant *get_free_grant(struct blkfront_ring_info *rinfo)
{
struct grant *gnt_list_entry;
- BUG_ON(list_empty(&info->grants));
- gnt_list_entry = list_first_entry(&info->grants, struct grant,
+ BUG_ON(list_empty(&rinfo->grants));
+ gnt_list_entry = list_first_entry(&rinfo->grants, struct grant,
node);
list_del(&gnt_list_entry->node);
if (gnt_list_entry->gref != GRANT_INVALID_REF)
- info->persistent_gnts_c--;
+ rinfo->persistent_gnts_c--;
return gnt_list_entry;
}
static struct grant *get_grant(grant_ref_t *gref_head,
unsigned long gfn,
- struct blkfront_info *info)
+ struct blkfront_ring_info *rinfo)
{
- struct grant *gnt_list_entry = get_free_grant(info);
+ struct grant *gnt_list_entry = get_free_grant(rinfo);
+ struct blkfront_info *info = rinfo->dev_info;
if (gnt_list_entry->gref != GRANT_INVALID_REF)
return gnt_list_entry;
}
static struct grant *get_indirect_grant(grant_ref_t *gref_head,
- struct blkfront_info *info)
+ struct blkfront_ring_info *rinfo)
{
- struct grant *gnt_list_entry = get_free_grant(info);
+ struct grant *gnt_list_entry = get_free_grant(rinfo);
+ struct blkfront_info *info = rinfo->dev_info;
if (gnt_list_entry->gref != GRANT_INVALID_REF)
return gnt_list_entry;
struct page *indirect_page;
/* Fetch a pre-allocated page to use for indirect grefs */
- BUG_ON(list_empty(&info->indirect_pages));
- indirect_page = list_first_entry(&info->indirect_pages,
+ BUG_ON(list_empty(&rinfo->indirect_pages));
+ indirect_page = list_first_entry(&rinfo->indirect_pages,
struct page, lru);
list_del(&indirect_page->lru);
gnt_list_entry->page = indirect_page;
static void blkif_restart_queue_callback(void *arg)
{
- struct blkfront_info *info = (struct blkfront_info *)arg;
- schedule_work(&info->work);
+ struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)arg;
+ schedule_work(&rinfo->work);
}
static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
return 0;
}
-static int blkif_queue_discard_req(struct request *req)
+static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo,
+ struct request *req,
+ struct blkif_request **ring_req)
{
- struct blkfront_info *info = req->rq_disk->private_data;
+ unsigned long id;
+
+ *ring_req = RING_GET_REQUEST(&rinfo->ring, rinfo->ring.req_prod_pvt);
+ rinfo->ring.req_prod_pvt++;
+
+ id = get_id_from_freelist(rinfo);
+ rinfo->shadow[id].request = req;
+ rinfo->shadow[id].status = REQ_WAITING;
+ rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID;
+
+ (*ring_req)->u.rw.id = id;
+
+ return id;
+}
+
+static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo)
+{
+ struct blkfront_info *info = rinfo->dev_info;
struct blkif_request *ring_req;
unsigned long id;
/* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
- id = get_id_from_freelist(info);
- info->shadow[id].request = req;
+ id = blkif_ring_get_request(rinfo, req, &ring_req);
ring_req->operation = BLKIF_OP_DISCARD;
ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
else
ring_req->u.discard.flag = 0;
- info->ring.req_prod_pvt++;
-
/* Keep a private copy so we can reissue requests when recovering. */
- info->shadow[id].req = *ring_req;
+ rinfo->shadow[id].req = *ring_req;
return 0;
}
struct setup_rw_req {
unsigned int grant_idx;
struct blkif_request_segment *segments;
- struct blkfront_info *info;
+ struct blkfront_ring_info *rinfo;
struct blkif_request *ring_req;
grant_ref_t gref_head;
unsigned int id;
bool need_copy;
unsigned int bvec_off;
char *bvec_data;
+
+ bool require_extra_req;
+ struct blkif_request *extra_ring_req;
};
static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
/* Convenient aliases */
unsigned int grant_idx = setup->grant_idx;
struct blkif_request *ring_req = setup->ring_req;
- struct blkfront_info *info = setup->info;
- struct blk_shadow *shadow = &info->shadow[setup->id];
+ struct blkfront_ring_info *rinfo = setup->rinfo;
+ /*
+ * We always use the shadow of the first request to store the list
+ * of grant associated to the block I/O request. This made the
+ * completion more easy to handle even if the block I/O request is
+ * split.
+ */
+ struct blk_shadow *shadow = &rinfo->shadow[setup->id];
+
+ if (unlikely(setup->require_extra_req &&
+ grant_idx >= BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+ /*
+ * We are using the second request, setup grant_idx
+ * to be the index of the segment array.
+ */
+ grant_idx -= BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ ring_req = setup->extra_ring_req;
+ }
if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
(grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) {
kunmap_atomic(setup->segments);
n = grant_idx / GRANTS_PER_INDIRECT_FRAME;
- gnt_list_entry = get_indirect_grant(&setup->gref_head, info);
+ gnt_list_entry = get_indirect_grant(&setup->gref_head, rinfo);
shadow->indirect_grants[n] = gnt_list_entry;
setup->segments = kmap_atomic(gnt_list_entry->page);
ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
}
- gnt_list_entry = get_grant(&setup->gref_head, gfn, info);
+ gnt_list_entry = get_grant(&setup->gref_head, gfn, rinfo);
ref = gnt_list_entry->gref;
- shadow->grants_used[grant_idx] = gnt_list_entry;
+ /*
+ * All the grants are stored in the shadow of the first
+ * request. Therefore we have to use the global index.
+ */
+ shadow->grants_used[setup->grant_idx] = gnt_list_entry;
if (setup->need_copy) {
void *shared_data;
(setup->grant_idx)++;
}
-static int blkif_queue_rw_req(struct request *req)
+static void blkif_setup_extra_req(struct blkif_request *first,
+ struct blkif_request *second)
{
- struct blkfront_info *info = req->rq_disk->private_data;
- struct blkif_request *ring_req;
- unsigned long id;
+ uint16_t nr_segments = first->u.rw.nr_segments;
+
+ /*
+ * The second request is only present when the first request uses
+ * all its segments. It's always the continuity of the first one.
+ */
+ first->u.rw.nr_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+ second->u.rw.nr_segments = nr_segments - BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ second->u.rw.sector_number = first->u.rw.sector_number +
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * XEN_PAGE_SIZE) / 512;
+
+ second->u.rw.handle = first->u.rw.handle;
+ second->operation = first->operation;
+}
+
+static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *rinfo)
+{
+ struct blkfront_info *info = rinfo->dev_info;
+ struct blkif_request *ring_req, *extra_ring_req = NULL;
+ unsigned long id, extra_id = NO_ASSOCIATED_ID;
+ bool require_extra_req = false;
int i;
struct setup_rw_req setup = {
.grant_idx = 0,
.segments = NULL,
- .info = info,
+ .rinfo = rinfo,
.need_copy = rq_data_dir(req) && info->feature_persistent,
};
* existing persistent grants, or if we have to get new grants,
* as there are not sufficiently many free.
*/
- bool new_persistent_gnts;
struct scatterlist *sg;
int num_sg, max_grefs, num_grant;
*/
max_grefs += INDIRECT_GREFS(max_grefs);
- /* Check if we have enough grants to allocate a requests */
- if (info->persistent_gnts_c < max_grefs) {
- new_persistent_gnts = 1;
- if (gnttab_alloc_grant_references(
- max_grefs - info->persistent_gnts_c,
- &setup.gref_head) < 0) {
+ /*
+ * We have to reserve 'max_grefs' grants because persistent
+ * grants are shared by all rings.
+ */
+ if (max_grefs > 0)
+ if (gnttab_alloc_grant_references(max_grefs, &setup.gref_head) < 0) {
gnttab_request_free_callback(
- &info->callback,
+ &rinfo->callback,
blkif_restart_queue_callback,
- info,
+ rinfo,
max_grefs);
return 1;
}
- } else
- new_persistent_gnts = 0;
/* Fill out a communications ring structure. */
- ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
- id = get_id_from_freelist(info);
- info->shadow[id].request = req;
-
- BUG_ON(info->max_indirect_segments == 0 &&
- GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST);
- BUG_ON(info->max_indirect_segments &&
- GREFS(req->nr_phys_segments) > info->max_indirect_segments);
+ id = blkif_ring_get_request(rinfo, req, &ring_req);
- num_sg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
+ num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
num_grant = 0;
/* Calculate the number of grant used */
- for_each_sg(info->shadow[id].sg, sg, num_sg, i)
+ for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)
num_grant += gnttab_count_grant(sg->offset, sg->length);
- ring_req->u.rw.id = id;
- info->shadow[id].num_sg = num_sg;
- if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+ require_extra_req = info->max_indirect_segments == 0 &&
+ num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ BUG_ON(!HAS_EXTRA_REQ && require_extra_req);
+
+ rinfo->shadow[id].num_sg = num_sg;
+ if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST &&
+ likely(!require_extra_req)) {
/*
* The indirect operation can only be a BLKIF_OP_READ or
* BLKIF_OP_WRITE
}
}
ring_req->u.rw.nr_segments = num_grant;
+ if (unlikely(require_extra_req)) {
+ extra_id = blkif_ring_get_request(rinfo, req,
+ &extra_ring_req);
+ /*
+ * Only the first request contains the scatter-gather
+ * list.
+ */
+ rinfo->shadow[extra_id].num_sg = 0;
+
+ blkif_setup_extra_req(ring_req, extra_ring_req);
+
+ /* Link the 2 requests together */
+ rinfo->shadow[extra_id].associated_id = id;
+ rinfo->shadow[id].associated_id = extra_id;
+ }
}
setup.ring_req = ring_req;
setup.id = id;
- for_each_sg(info->shadow[id].sg, sg, num_sg, i) {
+
+ setup.require_extra_req = require_extra_req;
+ if (unlikely(require_extra_req))
+ setup.extra_ring_req = extra_ring_req;
+
+ for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
if (setup.need_copy) {
if (setup.segments)
kunmap_atomic(setup.segments);
- info->ring.req_prod_pvt++;
-
/* Keep a private copy so we can reissue requests when recovering. */
- info->shadow[id].req = *ring_req;
+ rinfo->shadow[id].req = *ring_req;
+ if (unlikely(require_extra_req))
+ rinfo->shadow[extra_id].req = *extra_ring_req;
- if (new_persistent_gnts)
+ if (max_grefs > 0)
gnttab_free_grant_references(setup.gref_head);
return 0;
*
* @req: a request struct
*/
-static int blkif_queue_request(struct request *req)
+static int blkif_queue_request(struct request *req, struct blkfront_ring_info *rinfo)
{
- struct blkfront_info *info = req->rq_disk->private_data;
-
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+ if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED))
return 1;
if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE)))
- return blkif_queue_discard_req(req);
+ return blkif_queue_discard_req(req, rinfo);
else
- return blkif_queue_rw_req(req);
+ return blkif_queue_rw_req(req, rinfo);
}
-static inline void flush_requests(struct blkfront_info *info)
+static inline void flush_requests(struct blkfront_ring_info *rinfo)
{
int notify;
- RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rinfo->ring, notify);
if (notify)
- notify_remote_via_irq(info->irq);
+ notify_remote_via_irq(rinfo->irq);
}
static inline bool blkif_request_flush_invalid(struct request *req,
}
static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *qd)
+ const struct blk_mq_queue_data *qd)
{
- struct blkfront_info *info = qd->rq->rq_disk->private_data;
+ unsigned long flags;
+ struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
blk_mq_start_request(qd->rq);
- spin_lock_irq(&info->io_lock);
- if (RING_FULL(&info->ring))
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
+ if (RING_FULL(&rinfo->ring))
goto out_busy;
- if (blkif_request_flush_invalid(qd->rq, info))
+ if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
goto out_err;
- if (blkif_queue_request(qd->rq))
+ if (blkif_queue_request(qd->rq, rinfo))
goto out_busy;
- flush_requests(info);
- spin_unlock_irq(&info->io_lock);
+ flush_requests(rinfo);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_MQ_RQ_QUEUE_OK;
out_err:
- spin_unlock_irq(&info->io_lock);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_MQ_RQ_QUEUE_ERROR;
out_busy:
- spin_unlock_irq(&info->io_lock);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
blk_mq_stop_hw_queue(hctx);
return BLK_MQ_RQ_QUEUE_BUSY;
}
+static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+ unsigned int index)
+{
+ struct blkfront_info *info = (struct blkfront_info *)data;
+
+ BUG_ON(info->nr_rings <= index);
+ hctx->driver_data = &info->rinfo[index];
+ return 0;
+}
+
static struct blk_mq_ops blkfront_mq_ops = {
.queue_rq = blkif_queue_rq,
.map_queue = blk_mq_map_queue,
+ .init_hctx = blk_mq_init_hctx,
};
static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
memset(&info->tag_set, 0, sizeof(info->tag_set));
info->tag_set.ops = &blkfront_mq_ops;
- info->tag_set.nr_hw_queues = 1;
- info->tag_set.queue_depth = BLK_RING_SIZE(info);
+ info->tag_set.nr_hw_queues = info->nr_rings;
+ if (HAS_EXTRA_REQ && info->max_indirect_segments == 0) {
+ /*
+ * When indirect descriptior is not supported, the I/O request
+ * will be split between multiple request in the ring.
+ * To avoid problems when sending the request, divide by
+ * 2 the depth of the queue.
+ */
+ info->tag_set.queue_depth = BLK_RING_SIZE(info) / 2;
+ } else
+ info->tag_set.queue_depth = BLK_RING_SIZE(info);
info->tag_set.numa_node = NUMA_NO_NODE;
info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
info->tag_set.cmd_size = 0;
info->tag_set.driver_data = info;
if (blk_mq_alloc_tag_set(&info->tag_set))
- return -1;
+ return -EINVAL;
rq = blk_mq_init_queue(&info->tag_set);
if (IS_ERR(rq)) {
blk_mq_free_tag_set(&info->tag_set);
- return -1;
+ return PTR_ERR(rq);
}
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
static void xlvbd_release_gendisk(struct blkfront_info *info)
{
- unsigned int minor, nr_minors;
+ unsigned int minor, nr_minors, i;
if (info->rq == NULL)
return;
/* No more blkif_request(). */
blk_mq_stop_hw_queues(info->rq);
- /* No more gnttab callback work. */
- gnttab_cancel_free_callback(&info->callback);
+ for (i = 0; i < info->nr_rings; i++) {
+ struct blkfront_ring_info *rinfo = &info->rinfo[i];
- /* Flush gnttab callback work. Must be done with no locks held. */
- flush_work(&info->work);
+ /* No more gnttab callback work. */
+ gnttab_cancel_free_callback(&rinfo->callback);
+
+ /* Flush gnttab callback work. Must be done with no locks held. */
+ flush_work(&rinfo->work);
+ }
del_gendisk(info->gd);
info->gd = NULL;
}
-/* Must be called with io_lock holded */
-static void kick_pending_request_queues(struct blkfront_info *info)
+/* Already hold rinfo->ring_lock. */
+static inline void kick_pending_request_queues_locked(struct blkfront_ring_info *rinfo)
{
- if (!RING_FULL(&info->ring))
- blk_mq_start_stopped_hw_queues(info->rq, true);
+ if (!RING_FULL(&rinfo->ring))
+ blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true);
}
-static void blkif_restart_queue(struct work_struct *work)
+static void kick_pending_request_queues(struct blkfront_ring_info *rinfo)
{
- struct blkfront_info *info = container_of(work, struct blkfront_info, work);
+ unsigned long flags;
- spin_lock_irq(&info->io_lock);
- if (info->connected == BLKIF_STATE_CONNECTED)
- kick_pending_request_queues(info);
- spin_unlock_irq(&info->io_lock);
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
+ kick_pending_request_queues_locked(rinfo);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
}
-static void blkif_free(struct blkfront_info *info, int suspend)
+static void blkif_restart_queue(struct work_struct *work)
{
- struct grant *persistent_gnt;
- struct grant *n;
- int i, j, segs;
+ struct blkfront_ring_info *rinfo = container_of(work, struct blkfront_ring_info, work);
- /* Prevent new requests being issued until we fix things up. */
- spin_lock_irq(&info->io_lock);
- info->connected = suspend ?
- BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
- /* No more blkif_request(). */
- if (info->rq)
- blk_mq_stop_hw_queues(info->rq);
+ if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED)
+ kick_pending_request_queues(rinfo);
+}
- /* Remove all persistent grants */
- if (!list_empty(&info->grants)) {
- list_for_each_entry_safe(persistent_gnt, n,
- &info->grants, node) {
- list_del(&persistent_gnt->node);
- if (persistent_gnt->gref != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(persistent_gnt->gref,
- 0, 0UL);
- info->persistent_gnts_c--;
- }
- if (info->feature_persistent)
- __free_page(persistent_gnt->page);
- kfree(persistent_gnt);
- }
- }
- BUG_ON(info->persistent_gnts_c != 0);
+static void blkif_free_ring(struct blkfront_ring_info *rinfo)
+{
+ struct grant *persistent_gnt, *n;
+ struct blkfront_info *info = rinfo->dev_info;
+ int i, j, segs;
/*
* Remove indirect pages, this only happens when using indirect
* descriptors but not persistent grants
*/
- if (!list_empty(&info->indirect_pages)) {
+ if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
BUG_ON(info->feature_persistent);
- list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
+ list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
}
}
+ /* Remove all persistent grants. */
+ if (!list_empty(&rinfo->grants)) {
+ list_for_each_entry_safe(persistent_gnt, n,
+ &rinfo->grants, node) {
+ list_del(&persistent_gnt->node);
+ if (persistent_gnt->gref != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(persistent_gnt->gref,
+ 0, 0UL);
+ rinfo->persistent_gnts_c--;
+ }
+ if (info->feature_persistent)
+ __free_page(persistent_gnt->page);
+ kfree(persistent_gnt);
+ }
+ }
+ BUG_ON(rinfo->persistent_gnts_c != 0);
+
for (i = 0; i < BLK_RING_SIZE(info); i++) {
/*
* Clear persistent grants present in requests already
* on the shared ring
*/
- if (!info->shadow[i].request)
+ if (!rinfo->shadow[i].request)
goto free_shadow;
- segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
- info->shadow[i].req.u.indirect.nr_segments :
- info->shadow[i].req.u.rw.nr_segments;
+ segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
+ rinfo->shadow[i].req.u.indirect.nr_segments :
+ rinfo->shadow[i].req.u.rw.nr_segments;
for (j = 0; j < segs; j++) {
- persistent_gnt = info->shadow[i].grants_used[j];
+ persistent_gnt = rinfo->shadow[i].grants_used[j];
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
if (info->feature_persistent)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
- if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
+ if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT)
/*
* If this is not an indirect operation don't try to
* free indirect segments
goto free_shadow;
for (j = 0; j < INDIRECT_GREFS(segs); j++) {
- persistent_gnt = info->shadow[i].indirect_grants[j];
+ persistent_gnt = rinfo->shadow[i].indirect_grants[j];
gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
free_shadow:
- kfree(info->shadow[i].grants_used);
- info->shadow[i].grants_used = NULL;
- kfree(info->shadow[i].indirect_grants);
- info->shadow[i].indirect_grants = NULL;
- kfree(info->shadow[i].sg);
- info->shadow[i].sg = NULL;
+ kfree(rinfo->shadow[i].grants_used);
+ rinfo->shadow[i].grants_used = NULL;
+ kfree(rinfo->shadow[i].indirect_grants);
+ rinfo->shadow[i].indirect_grants = NULL;
+ kfree(rinfo->shadow[i].sg);
+ rinfo->shadow[i].sg = NULL;
}
/* No more gnttab callback work. */
- gnttab_cancel_free_callback(&info->callback);
- spin_unlock_irq(&info->io_lock);
+ gnttab_cancel_free_callback(&rinfo->callback);
/* Flush gnttab callback work. Must be done with no locks held. */
- flush_work(&info->work);
+ flush_work(&rinfo->work);
/* Free resources associated with old device channel. */
for (i = 0; i < info->nr_ring_pages; i++) {
- if (info->ring_ref[i] != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
- info->ring_ref[i] = GRANT_INVALID_REF;
+ if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0);
+ rinfo->ring_ref[i] = GRANT_INVALID_REF;
}
}
- free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
- info->ring.sring = NULL;
+ free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+ rinfo->ring.sring = NULL;
- if (info->irq)
- unbind_from_irqhandler(info->irq, info);
- info->evtchn = info->irq = 0;
+ if (rinfo->irq)
+ unbind_from_irqhandler(rinfo->irq, rinfo);
+ rinfo->evtchn = rinfo->irq = 0;
+}
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+ unsigned int i;
+
+ /* Prevent new requests being issued until we fix things up. */
+ info->connected = suspend ?
+ BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+ /* No more blkif_request(). */
+ if (info->rq)
+ blk_mq_stop_hw_queues(info->rq);
+
+ for (i = 0; i < info->nr_rings; i++)
+ blkif_free_ring(&info->rinfo[i]);
+
+ kfree(info->rinfo);
+ info->rinfo = NULL;
+ info->nr_rings = 0;
}
struct copy_from_grant {
kunmap_atomic(shared_data);
}
-static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
+static enum blk_req_status blkif_rsp_to_req_status(int rsp)
+{
+ switch (rsp)
+ {
+ case BLKIF_RSP_OKAY:
+ return REQ_DONE;
+ case BLKIF_RSP_EOPNOTSUPP:
+ return REQ_EOPNOTSUPP;
+ case BLKIF_RSP_ERROR:
+ /* Fallthrough. */
+ default:
+ return REQ_ERROR;
+ }
+}
+
+/*
+ * Get the final status of the block request based on two ring response
+ */
+static int blkif_get_final_status(enum blk_req_status s1,
+ enum blk_req_status s2)
+{
+ BUG_ON(s1 == REQ_WAITING);
+ BUG_ON(s2 == REQ_WAITING);
+
+ if (s1 == REQ_ERROR || s2 == REQ_ERROR)
+ return BLKIF_RSP_ERROR;
+ else if (s1 == REQ_EOPNOTSUPP || s2 == REQ_EOPNOTSUPP)
+ return BLKIF_RSP_EOPNOTSUPP;
+ return BLKIF_RSP_OKAY;
+}
+
+static bool blkif_completion(unsigned long *id,
+ struct blkfront_ring_info *rinfo,
struct blkif_response *bret)
{
int i = 0;
struct scatterlist *sg;
int num_sg, num_grant;
+ struct blkfront_info *info = rinfo->dev_info;
+ struct blk_shadow *s = &rinfo->shadow[*id];
struct copy_from_grant data = {
- .s = s,
.grant_idx = 0,
};
num_grant = s->req.operation == BLKIF_OP_INDIRECT ?
s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
+
+ /* The I/O request may be split in two. */
+ if (unlikely(s->associated_id != NO_ASSOCIATED_ID)) {
+ struct blk_shadow *s2 = &rinfo->shadow[s->associated_id];
+
+ /* Keep the status of the current response in shadow. */
+ s->status = blkif_rsp_to_req_status(bret->status);
+
+ /* Wait the second response if not yet here. */
+ if (s2->status == REQ_WAITING)
+ return 0;
+
+ bret->status = blkif_get_final_status(s->status,
+ s2->status);
+
+ /*
+ * All the grants is stored in the first shadow in order
+ * to make the completion code simpler.
+ */
+ num_grant += s2->req.u.rw.nr_segments;
+
+ /*
+ * The two responses may not come in order. Only the
+ * first request will store the scatter-gather list.
+ */
+ if (s2->num_sg != 0) {
+ /* Update "id" with the ID of the first response. */
+ *id = s->associated_id;
+ s = s2;
+ }
+
+ /*
+ * We don't need anymore the second request, so recycling
+ * it now.
+ */
+ if (add_id_to_freelist(rinfo, s->associated_id))
+ WARN(1, "%s: can't recycle the second part (id = %ld) of the request\n",
+ info->gd->disk_name, s->associated_id);
+ }
+
+ data.s = s;
num_sg = s->num_sg;
if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
if (!info->feature_persistent)
pr_alert_ratelimited("backed has not unmapped grant: %u\n",
s->grants_used[i]->gref);
- list_add(&s->grants_used[i]->node, &info->grants);
- info->persistent_gnts_c++;
+ list_add(&s->grants_used[i]->node, &rinfo->grants);
+ rinfo->persistent_gnts_c++;
} else {
/*
* If the grant is not mapped by the backend we end the
*/
gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
s->grants_used[i]->gref = GRANT_INVALID_REF;
- list_add_tail(&s->grants_used[i]->node, &info->grants);
+ list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
}
}
if (s->req.operation == BLKIF_OP_INDIRECT) {
if (!info->feature_persistent)
pr_alert_ratelimited("backed has not unmapped grant: %u\n",
s->indirect_grants[i]->gref);
- list_add(&s->indirect_grants[i]->node, &info->grants);
- info->persistent_gnts_c++;
+ list_add(&s->indirect_grants[i]->node, &rinfo->grants);
+ rinfo->persistent_gnts_c++;
} else {
struct page *indirect_page;
*/
if (!info->feature_persistent) {
indirect_page = s->indirect_grants[i]->page;
- list_add(&indirect_page->lru, &info->indirect_pages);
+ list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
s->indirect_grants[i]->gref = GRANT_INVALID_REF;
- list_add_tail(&s->indirect_grants[i]->node, &info->grants);
+ list_add_tail(&s->indirect_grants[i]->node, &rinfo->grants);
}
}
}
+
+ return 1;
}
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
struct blkif_response *bret;
RING_IDX i, rp;
unsigned long flags;
- struct blkfront_info *info = (struct blkfront_info *)dev_id;
+ struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
+ struct blkfront_info *info = rinfo->dev_info;
int error;
- spin_lock_irqsave(&info->io_lock, flags);
-
- if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
- spin_unlock_irqrestore(&info->io_lock, flags);
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
return IRQ_HANDLED;
- }
+ spin_lock_irqsave(&rinfo->ring_lock, flags);
again:
- rp = info->ring.sring->rsp_prod;
+ rp = rinfo->ring.sring->rsp_prod;
rmb(); /* Ensure we see queued responses up to 'rp'. */
- for (i = info->ring.rsp_cons; i != rp; i++) {
+ for (i = rinfo->ring.rsp_cons; i != rp; i++) {
unsigned long id;
- bret = RING_GET_RESPONSE(&info->ring, i);
+ bret = RING_GET_RESPONSE(&rinfo->ring, i);
id = bret->id;
/*
* The backend has messed up and given us an id that we would
* the id is busted. */
continue;
}
- req = info->shadow[id].request;
+ req = rinfo->shadow[id].request;
- if (bret->operation != BLKIF_OP_DISCARD)
- blkif_completion(&info->shadow[id], info, bret);
+ if (bret->operation != BLKIF_OP_DISCARD) {
+ /*
+ * We may need to wait for an extra response if the
+ * I/O request is split in 2
+ */
+ if (!blkif_completion(&id, rinfo, bret))
+ continue;
+ }
- if (add_id_to_freelist(info, id)) {
+ if (add_id_to_freelist(rinfo, id)) {
WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
info->gd->disk_name, op_name(bret->operation), id);
continue;
error = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
- info->shadow[id].req.u.rw.nr_segments == 0)) {
+ rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
}
- info->ring.rsp_cons = i;
+ rinfo->ring.rsp_cons = i;
- if (i != info->ring.req_prod_pvt) {
+ if (i != rinfo->ring.req_prod_pvt) {
int more_to_do;
- RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+ RING_FINAL_CHECK_FOR_RESPONSES(&rinfo->ring, more_to_do);
if (more_to_do)
goto again;
} else
- info->ring.sring->rsp_event = i + 1;
+ rinfo->ring.sring->rsp_event = i + 1;
- kick_pending_request_queues(info);
+ kick_pending_request_queues_locked(rinfo);
- spin_unlock_irqrestore(&info->io_lock, flags);
+ spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return IRQ_HANDLED;
}
static int setup_blkring(struct xenbus_device *dev,
- struct blkfront_info *info)
+ struct blkfront_ring_info *rinfo)
{
struct blkif_sring *sring;
int err, i;
+ struct blkfront_info *info = rinfo->dev_info;
unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
for (i = 0; i < info->nr_ring_pages; i++)
- info->ring_ref[i] = GRANT_INVALID_REF;
+ rinfo->ring_ref[i] = GRANT_INVALID_REF;
sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
get_order(ring_size));
return -ENOMEM;
}
SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&info->ring, sring, ring_size);
+ FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
- err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
+ err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
if (err < 0) {
free_pages((unsigned long)sring, get_order(ring_size));
- info->ring.sring = NULL;
+ rinfo->ring.sring = NULL;
goto fail;
}
for (i = 0; i < info->nr_ring_pages; i++)
- info->ring_ref[i] = gref[i];
+ rinfo->ring_ref[i] = gref[i];
- err = xenbus_alloc_evtchn(dev, &info->evtchn);
+ err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
if (err)
goto fail;
- err = bind_evtchn_to_irqhandler(info->evtchn, blkif_interrupt, 0,
- "blkif", info);
+ err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
+ "blkif", rinfo);
if (err <= 0) {
xenbus_dev_fatal(dev, err,
"bind_evtchn_to_irqhandler failed");
goto fail;
}
- info->irq = err;
+ rinfo->irq = err;
return 0;
fail:
return err;
}
+/*
+ * Write out per-ring/queue nodes including ring-ref and event-channel, and each
+ * ring buffer may have multi pages depending on ->nr_ring_pages.
+ */
+static int write_per_ring_nodes(struct xenbus_transaction xbt,
+ struct blkfront_ring_info *rinfo, const char *dir)
+{
+ int err;
+ unsigned int i;
+ const char *message = NULL;
+ struct blkfront_info *info = rinfo->dev_info;
+
+ if (info->nr_ring_pages == 1) {
+ err = xenbus_printf(xbt, dir, "ring-ref", "%u", rinfo->ring_ref[0]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ } else {
+ for (i = 0; i < info->nr_ring_pages; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_printf(xbt, dir, ring_ref_name,
+ "%u", rinfo->ring_ref[i]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ }
+ }
+
+ err = xenbus_printf(xbt, dir, "event-channel", "%u", rinfo->evtchn);
+ if (err) {
+ message = "writing event-channel";
+ goto abort_transaction;
+ }
+
+ return 0;
+
+abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ if (message)
+ xenbus_dev_fatal(info->xbdev, err, "%s", message);
+
+ return err;
+}
/* Common code used when first setting up, and when resuming. */
static int talk_to_blkback(struct xenbus_device *dev,
{
const char *message = NULL;
struct xenbus_transaction xbt;
- int err, i;
- unsigned int max_page_order = 0;
+ int err;
+ unsigned int i, max_page_order = 0;
unsigned int ring_page_order = 0;
err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
info->nr_ring_pages = 1 << ring_page_order;
}
- /* Create shared ring, alloc event channel. */
- err = setup_blkring(dev, info);
- if (err)
- goto out;
+ for (i = 0; i < info->nr_rings; i++) {
+ struct blkfront_ring_info *rinfo = &info->rinfo[i];
+
+ /* Create shared ring, alloc event channel. */
+ err = setup_blkring(dev, rinfo);
+ if (err)
+ goto destroy_blkring;
+ }
again:
err = xenbus_transaction_start(&xbt);
goto destroy_blkring;
}
- if (info->nr_ring_pages == 1) {
- err = xenbus_printf(xbt, dev->nodename,
- "ring-ref", "%u", info->ring_ref[0]);
+ if (info->nr_ring_pages > 1) {
+ err = xenbus_printf(xbt, dev->nodename, "ring-page-order", "%u",
+ ring_page_order);
if (err) {
- message = "writing ring-ref";
+ message = "writing ring-page-order";
goto abort_transaction;
}
+ }
+
+ /* We already got the number of queues/rings in _probe */
+ if (info->nr_rings == 1) {
+ err = write_per_ring_nodes(xbt, &info->rinfo[0], dev->nodename);
+ if (err)
+ goto destroy_blkring;
} else {
- err = xenbus_printf(xbt, dev->nodename,
- "ring-page-order", "%u", ring_page_order);
+ char *path;
+ size_t pathsize;
+
+ err = xenbus_printf(xbt, dev->nodename, "multi-queue-num-queues", "%u",
+ info->nr_rings);
if (err) {
- message = "writing ring-page-order";
+ message = "writing multi-queue-num-queues";
goto abort_transaction;
}
- for (i = 0; i < info->nr_ring_pages; i++) {
- char ring_ref_name[RINGREF_NAME_LEN];
+ pathsize = strlen(dev->nodename) + QUEUE_NAME_LEN;
+ path = kmalloc(pathsize, GFP_KERNEL);
+ if (!path) {
+ err = -ENOMEM;
+ message = "ENOMEM while writing ring references";
+ goto abort_transaction;
+ }
- snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
- err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
- "%u", info->ring_ref[i]);
+ for (i = 0; i < info->nr_rings; i++) {
+ memset(path, 0, pathsize);
+ snprintf(path, pathsize, "%s/queue-%u", dev->nodename, i);
+ err = write_per_ring_nodes(xbt, &info->rinfo[i], path);
if (err) {
- message = "writing ring-ref";
- goto abort_transaction;
+ kfree(path);
+ goto destroy_blkring;
}
}
- }
- err = xenbus_printf(xbt, dev->nodename,
- "event-channel", "%u", info->evtchn);
- if (err) {
- message = "writing event-channel";
- goto abort_transaction;
+ kfree(path);
}
err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
XEN_IO_PROTO_ABI_NATIVE);
goto destroy_blkring;
}
- for (i = 0; i < BLK_RING_SIZE(info); i++)
- info->shadow[i].req.u.rw.id = i+1;
- info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+ for (i = 0; i < info->nr_rings; i++) {
+ unsigned int j;
+ struct blkfront_ring_info *rinfo = &info->rinfo[i];
+
+ for (j = 0; j < BLK_RING_SIZE(info); j++)
+ rinfo->shadow[j].req.u.rw.id = j + 1;
+ rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+ }
xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
xenbus_dev_fatal(dev, err, "%s", message);
destroy_blkring:
blkif_free(info, 0);
- out:
+
+ kfree(info);
+ dev_set_drvdata(&dev->dev, NULL);
+
return err;
}
const struct xenbus_device_id *id)
{
int err, vdevice;
+ unsigned int r_index;
struct blkfront_info *info;
+ unsigned int backend_max_queues = 0;
/* FIXME: Use dynamic device id if this is not set. */
err = xenbus_scanf(XBT_NIL, dev->nodename,
return -ENOMEM;
}
- mutex_init(&info->mutex);
- spin_lock_init(&info->io_lock);
info->xbdev = dev;
+ /* Check if backend supports multiple queues. */
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "multi-queue-max-queues", "%u", &backend_max_queues);
+ if (err < 0)
+ backend_max_queues = 1;
+
+ info->nr_rings = min(backend_max_queues, xen_blkif_max_queues);
+ /* We need at least one ring. */
+ if (!info->nr_rings)
+ info->nr_rings = 1;
+
+ info->rinfo = kzalloc(sizeof(struct blkfront_ring_info) * info->nr_rings, GFP_KERNEL);
+ if (!info->rinfo) {
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating ring_info structure");
+ kfree(info);
+ return -ENOMEM;
+ }
+
+ for (r_index = 0; r_index < info->nr_rings; r_index++) {
+ struct blkfront_ring_info *rinfo;
+
+ rinfo = &info->rinfo[r_index];
+ INIT_LIST_HEAD(&rinfo->indirect_pages);
+ INIT_LIST_HEAD(&rinfo->grants);
+ rinfo->dev_info = info;
+ INIT_WORK(&rinfo->work, blkif_restart_queue);
+ spin_lock_init(&rinfo->ring_lock);
+ }
+
+ mutex_init(&info->mutex);
info->vdevice = vdevice;
- INIT_LIST_HEAD(&info->grants);
- INIT_LIST_HEAD(&info->indirect_pages);
- info->persistent_gnts_c = 0;
info->connected = BLKIF_STATE_DISCONNECTED;
- INIT_WORK(&info->work, blkif_restart_queue);
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
static int blkif_recover(struct blkfront_info *info)
{
- int i;
+ unsigned int i, r_index;
struct request *req, *n;
struct blk_shadow *copy;
int rc;
struct split_bio *split_bio;
struct list_head requests;
- /* Stage 1: Make a safe copy of the shadow state. */
- copy = kmemdup(info->shadow, sizeof(info->shadow),
- GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
- if (!copy)
- return -ENOMEM;
-
- /* Stage 2: Set up free list. */
- memset(&info->shadow, 0, sizeof(info->shadow));
- for (i = 0; i < BLK_RING_SIZE(info); i++)
- info->shadow[i].req.u.rw.id = i+1;
- info->shadow_free = info->ring.req_prod_pvt;
- info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
-
- rc = blkfront_gather_backend_features(info);
- if (rc) {
- kfree(copy);
- return rc;
- }
-
+ blkfront_gather_backend_features(info);
segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
blk_queue_max_segments(info->rq, segs);
bio_list_init(&bio_list);
INIT_LIST_HEAD(&requests);
- for (i = 0; i < BLK_RING_SIZE(info); i++) {
- /* Not in use? */
- if (!copy[i].request)
- continue;
- /*
- * Get the bios in the request so we can re-queue them.
- */
- if (copy[i].request->cmd_flags &
- (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+ for (r_index = 0; r_index < info->nr_rings; r_index++) {
+ struct blkfront_ring_info *rinfo;
+
+ rinfo = &info->rinfo[r_index];
+ /* Stage 1: Make a safe copy of the shadow state. */
+ copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
+ GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
+ if (!copy)
+ return -ENOMEM;
+
+ /* Stage 2: Set up free list. */
+ memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
+ for (i = 0; i < BLK_RING_SIZE(info); i++)
+ rinfo->shadow[i].req.u.rw.id = i+1;
+ rinfo->shadow_free = rinfo->ring.req_prod_pvt;
+ rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
+
+ rc = blkfront_setup_indirect(rinfo);
+ if (rc) {
+ kfree(copy);
+ return rc;
+ }
+
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
+ /* Not in use? */
+ if (!copy[i].request)
+ continue;
+
/*
- * Flush operations don't contain bios, so
- * we need to requeue the whole request
+ * Get the bios in the request so we can re-queue them.
*/
- list_add(©[i].request->queuelist, &requests);
- continue;
+ if (copy[i].request->cmd_flags &
+ (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+ /*
+ * Flush operations don't contain bios, so
+ * we need to requeue the whole request
+ */
+ list_add(©[i].request->queuelist, &requests);
+ continue;
+ }
+ merge_bio.head = copy[i].request->bio;
+ merge_bio.tail = copy[i].request->biotail;
+ bio_list_merge(&bio_list, &merge_bio);
+ copy[i].request->bio = NULL;
+ blk_end_request_all(copy[i].request, 0);
}
- merge_bio.head = copy[i].request->bio;
- merge_bio.tail = copy[i].request->biotail;
- bio_list_merge(&bio_list, &merge_bio);
- copy[i].request->bio = NULL;
- blk_end_request_all(copy[i].request, 0);
- }
-
- kfree(copy);
+ kfree(copy);
+ }
xenbus_switch_state(info->xbdev, XenbusStateConnected);
- spin_lock_irq(&info->io_lock);
-
/* Now safe for us to use the shared ring */
info->connected = BLKIF_STATE_CONNECTED;
- /* Kick any other new requests queued since we resumed */
- kick_pending_request_queues(info);
+ for (r_index = 0; r_index < info->nr_rings; r_index++) {
+ struct blkfront_ring_info *rinfo;
+
+ rinfo = &info->rinfo[r_index];
+ /* Kick any other new requests queued since we resumed */
+ kick_pending_request_queues(rinfo);
+ }
list_for_each_entry_safe(req, n, &requests, queuelist) {
/* Requeue pending requests (flush or discard) */
BUG_ON(req->nr_phys_segments > segs);
blk_mq_requeue_request(req);
}
- spin_unlock_irq(&info->io_lock);
blk_mq_kick_requeue_list(info->rq);
while ((bio = bio_list_pop(&bio_list)) != NULL) {
return err;
}
-static void
-blkfront_closing(struct blkfront_info *info)
+static void blkfront_closing(struct blkfront_info *info)
{
struct xenbus_device *xbdev = info->xbdev;
struct block_device *bdev = NULL;
info->feature_secdiscard = !!discard_secure;
}
-static int blkfront_setup_indirect(struct blkfront_info *info)
+static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
{
unsigned int psegs, grants;
int err, i;
+ struct blkfront_info *info = rinfo->dev_info;
- if (info->max_indirect_segments == 0)
- grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ if (info->max_indirect_segments == 0) {
+ if (!HAS_EXTRA_REQ)
+ grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ else {
+ /*
+ * When an extra req is required, the maximum
+ * grants supported is related to the size of the
+ * Linux block segment.
+ */
+ grants = GRANTS_PER_PSEG;
+ }
+ }
else
grants = info->max_indirect_segments;
psegs = grants / GRANTS_PER_PSEG;
- err = fill_grant_buffer(info,
+ err = fill_grant_buffer(rinfo,
(grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info));
if (err)
goto out_of_memory;
*/
int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
- BUG_ON(!list_empty(&info->indirect_pages));
+ BUG_ON(!list_empty(&rinfo->indirect_pages));
for (i = 0; i < num; i++) {
struct page *indirect_page = alloc_page(GFP_NOIO);
if (!indirect_page)
goto out_of_memory;
- list_add(&indirect_page->lru, &info->indirect_pages);
+ list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
}
for (i = 0; i < BLK_RING_SIZE(info); i++) {
- info->shadow[i].grants_used = kzalloc(
- sizeof(info->shadow[i].grants_used[0]) * grants,
+ rinfo->shadow[i].grants_used = kzalloc(
+ sizeof(rinfo->shadow[i].grants_used[0]) * grants,
GFP_NOIO);
- info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * psegs, GFP_NOIO);
+ rinfo->shadow[i].sg = kzalloc(sizeof(rinfo->shadow[i].sg[0]) * psegs, GFP_NOIO);
if (info->max_indirect_segments)
- info->shadow[i].indirect_grants = kzalloc(
- sizeof(info->shadow[i].indirect_grants[0]) *
+ rinfo->shadow[i].indirect_grants = kzalloc(
+ sizeof(rinfo->shadow[i].indirect_grants[0]) *
INDIRECT_GREFS(grants),
GFP_NOIO);
- if ((info->shadow[i].grants_used == NULL) ||
- (info->shadow[i].sg == NULL) ||
+ if ((rinfo->shadow[i].grants_used == NULL) ||
+ (rinfo->shadow[i].sg == NULL) ||
(info->max_indirect_segments &&
- (info->shadow[i].indirect_grants == NULL)))
+ (rinfo->shadow[i].indirect_grants == NULL)))
goto out_of_memory;
- sg_init_table(info->shadow[i].sg, psegs);
+ sg_init_table(rinfo->shadow[i].sg, psegs);
}
out_of_memory:
for (i = 0; i < BLK_RING_SIZE(info); i++) {
- kfree(info->shadow[i].grants_used);
- info->shadow[i].grants_used = NULL;
- kfree(info->shadow[i].sg);
- info->shadow[i].sg = NULL;
- kfree(info->shadow[i].indirect_grants);
- info->shadow[i].indirect_grants = NULL;
- }
- if (!list_empty(&info->indirect_pages)) {
+ kfree(rinfo->shadow[i].grants_used);
+ rinfo->shadow[i].grants_used = NULL;
+ kfree(rinfo->shadow[i].sg);
+ rinfo->shadow[i].sg = NULL;
+ kfree(rinfo->shadow[i].indirect_grants);
+ rinfo->shadow[i].indirect_grants = NULL;
+ }
+ if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
- list_for_each_entry_safe(indirect_page, n, &info->indirect_pages, lru) {
+ list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
}
/*
* Gather all backend feature-*
*/
-static int blkfront_gather_backend_features(struct blkfront_info *info)
+static void blkfront_gather_backend_features(struct blkfront_info *info)
{
int err;
int barrier, flush, discard, persistent;
else
info->max_indirect_segments = min(indirect_segments,
xen_blkif_max_segments);
-
- return blkfront_setup_indirect(info);
}
/*
unsigned long sector_size;
unsigned int physical_sector_size;
unsigned int binfo;
- int err;
+ int err, i;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
if (err != 1)
physical_sector_size = sector_size;
- err = blkfront_gather_backend_features(info);
- if (err) {
- xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
- info->xbdev->otherend);
- return;
+ blkfront_gather_backend_features(info);
+ for (i = 0; i < info->nr_rings; i++) {
+ err = blkfront_setup_indirect(&info->rinfo[i]);
+ if (err) {
+ xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+ info->xbdev->otherend);
+ blkif_free(info, 0);
+ break;
+ }
}
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
xenbus_switch_state(info->xbdev, XenbusStateConnected);
/* Kick pending requests. */
- spin_lock_irq(&info->io_lock);
info->connected = BLKIF_STATE_CONNECTED;
- kick_pending_request_queues(info);
- spin_unlock_irq(&info->io_lock);
+ for (i = 0; i < info->nr_rings; i++)
+ kick_pending_request_queues(&info->rinfo[i]);
add_disk(info->gd);
case XenbusStateInitWait:
if (dev->state != XenbusStateInitialising)
break;
- if (talk_to_blkback(dev, info)) {
- kfree(info);
- dev_set_drvdata(&dev->dev, NULL);
+ if (talk_to_blkback(dev, info))
break;
- }
case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
break;
case XenbusStateConnected:
+ if (dev->state != XenbusStateInitialised) {
+ if (talk_to_blkback(dev, info))
+ break;
+ }
blkfront_connect(info);
break;
static int __init xlblk_init(void)
{
int ret;
+ int nr_cpus = num_online_cpus();
if (!xen_domain())
return -ENODEV;
if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) {
pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER);
- xen_blkif_max_ring_order = 0;
+ xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
+ }
+
+ if (xen_blkif_max_queues > nr_cpus) {
+ pr_info("Invalid max_queues (%d), will use default max: %d.\n",
+ xen_blkif_max_queues, nr_cpus);
+ xen_blkif_max_queues = nr_cpus;
}
if (!xen_has_pv_disk_devices())
{
loff_t ret;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
switch (orig) {
case SEEK_CUR:
offset += file->f_pos;
default:
ret = -EINVAL;
}
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return ret;
}
spinlock_t lock; /* Serialize access to this structure. */
int count; /* Number of pages allocated. */
enum mspec_page_type type; /* Type of pages allocated. */
- int flags; /* See VMD_xxx below. */
unsigned long vm_start; /* Original (unsplit) base. */
unsigned long vm_end; /* Original (unsplit) end. */
unsigned long maddr[0]; /* Array of MSPEC addresses. */
};
-#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */
-
/* used on shub2 to clear FOP cache in the HUB */
static unsigned long scratch_page[MAX_NUMNODES];
#define SH2_AMO_CACHE_ENTRIES 4
"failed to zero page %ld\n", my_page);
}
- if (vdata->flags & VMD_VMALLOCED)
- vfree(vdata);
- else
- kfree(vdata);
+ kvfree(vdata);
}
/*
enum mspec_page_type type)
{
struct vma_data *vdata;
- int pages, vdata_size, flags = 0;
+ int pages, vdata_size;
if (vma->vm_pgoff != 0)
return -EINVAL;
vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
if (vdata_size <= PAGE_SIZE)
vdata = kzalloc(vdata_size, GFP_KERNEL);
- else {
+ else
vdata = vzalloc(vdata_size);
- flags = VMD_VMALLOCED;
- }
if (!vdata)
return -ENOMEM;
vdata->vm_start = vma->vm_start;
vdata->vm_end = vma->vm_end;
- vdata->flags = flags;
vdata->type = type;
spin_lock_init(&vdata->lock);
atomic_set(&vdata->refcnt, 1);
{
struct inode *inode = file_inode(file);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = ps3flash_writeback(ps3flash_dev);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
config CRYPTO_DEV_ATMEL_AES
tristate "Support for Atmel AES hw accelerator"
+ depends on HAS_DMA
depends on AT_XDMAC || AT_HDMAC || COMPILE_TEST
select CRYPTO_AES
select CRYPTO_AEAD
case AES_GCMHR(2):
case AES_GCMHR(3):
snprintf(tmp, sz, "GCMHR[%u]", (offset - AES_GCMHR(0)) >> 2);
+ break;
default:
snprintf(tmp, sz, "0x%02x", offset);
{
unsigned int base_cnt, cur_cnt;
unsigned char ae;
- unsigned int times = MAX_RETRY_TIMES;
+ int times = MAX_RETRY_TIMES;
for (ae = 0; ae < handle->hal_handle->ae_max_num; ae++) {
qat_hal_rd_ae_csr(handle, ae, PROFILE_COUNT,
cur_cnt &= 0xffff;
} while (times-- && (cur_cnt == base_cnt));
- if (!times) {
+ if (times < 0) {
pr_err("QAT: AE%d is inactive!!\n", ae);
return -EFAULT;
}
void __iomem *csr_addr =
(void __iomem *)((uintptr_t)handle->hal_ep_csr_addr_v +
ESRAM_AUTO_INIT_CSR_OFFSET);
- unsigned int csr_val, times = 30;
+ unsigned int csr_val;
+ int times = 30;
+
+ if (handle->pci_dev->device == ADF_C3XXX_PCI_DEVICE_ID)
+ return 0;
csr_val = ADF_CSR_RD(csr_addr, 0);
if ((csr_val & ESRAM_AUTO_TINIT) && (csr_val & ESRAM_AUTO_TINIT_DONE))
qat_hal_wait_cycles(handle, 0, ESRAM_AUTO_INIT_USED_CYCLES, 0);
csr_val = ADF_CSR_RD(csr_addr, 0);
} while (!(csr_val & ESRAM_AUTO_TINIT_DONE) && times--);
- if ((!times)) {
+ if ((times < 0)) {
pr_err("QAT: Fail to init eSram!\n");
return -EFAULT;
}
ret = qat_hal_wait_cycles(handle, ae, 20, 1);
} while (ret && times--);
- if (!times) {
+ if (times < 0) {
pr_err("QAT: clear GPR of AE %d failed", ae);
return -EINVAL;
}
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
struct adf_bar *misc_bar =
&pci_info->pci_bars[hw_data->get_misc_bar_id(hw_data)];
- struct adf_bar *sram_bar =
- &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+ struct adf_bar *sram_bar;
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
if (!handle)
return -ENOMEM;
- handle->hal_sram_addr_v = sram_bar->virt_addr;
handle->hal_cap_g_ctl_csr_addr_v =
(void __iomem *)((uintptr_t)misc_bar->virt_addr +
ICP_QAT_CAP_OFFSET);
(void __iomem *)((uintptr_t)handle->hal_cap_ae_xfer_csr_addr_v +
LOCAL_TO_XFER_REG_OFFSET);
handle->pci_dev = pci_info->pci_dev;
+ if (handle->pci_dev->device != ADF_C3XXX_PCI_DEVICE_ID) {
+ sram_bar =
+ &pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
+ handle->hal_sram_addr_v = sram_bar->virt_addr;
+ }
handle->fw_auth = (handle->pci_dev->device ==
ADF_DH895XCC_PCI_DEVICE_ID) ? false : true;
handle->hal_handle = kzalloc(sizeof(*handle->hal_handle), GFP_KERNEL);
void drm_ht_remove(struct drm_open_hash *ht)
{
if (ht->table) {
- if ((PAGE_SIZE / sizeof(*ht->table)) >> ht->order)
- kfree(ht->table);
- else
- vfree(ht->table);
+ kvfree(ht->table);
ht->table = NULL;
}
}
depends on NET
depends on INET
depends on m || IPV6 != m
+ select IRQ_POLL
---help---
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
depends on INFINIBAND
default y
+config INFINIBAND_ADDR_TRANS_CONFIGFS
+ bool
+ depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
+ default y
+ ---help---
+ ConfigFS support for RDMA communication manager (CM).
+ This allows the user to config the default GID type that the CM
+ uses for each device, when initiaing new connections.
+
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
-ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
+ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
rdma_cm-y := cma.o
+rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
+
rdma_ucm-y := ucma.o
ib_addr-y := addr.o
}
EXPORT_SYMBOL(rdma_copy_addr);
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+int rdma_translate_ip(const struct sockaddr *addr,
+ struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
struct net_device *dev;
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(dev_addr->net,
- ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
if (!dev)
return ret;
rcu_read_lock();
for_each_netdev_rcu(dev_addr->net, dev) {
if (ipv6_chk_addr(dev_addr->net,
- &((struct sockaddr_in6 *) addr)->sin6_addr,
+ &((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
mutex_unlock(&lock);
}
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+ const void *daddr)
{
struct neighbour *n;
int ret;
}
static int addr4_resolve(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr,
+ struct rtable **prt)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
- if (rt->dst.dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- goto put;
- }
+ /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+ * routable) and we could set the network type accordingly.
+ */
+ if (rt->rt_uses_gateway)
+ addr->network = RDMA_NETWORK_IPV4;
- /* If the device does ARP internally, return 'done' */
- if (rt->dst.dev->flags & IFF_NOARP) {
- ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
- goto put;
- }
+ addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
- ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
-put:
- ip_rt_put(rt);
+ *prt = rt;
+ return 0;
out:
return ret;
}
#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr,
+ struct dst_entry **pdst)
{
struct flowi6 fl6;
struct dst_entry *dst;
+ struct rt6_info *rt;
int ret;
memset(&fl6, 0, sizeof fl6);
if ((ret = dst->error))
goto put;
+ rt = (struct rt6_info *)dst;
if (ipv6_addr_any(&fl6.saddr)) {
ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
&fl6.daddr, 0, &fl6.saddr);
src_in->sin6_addr = fl6.saddr;
}
- if (dst->dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- goto put;
- }
+ /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+ * routable) and we could set the network type accordingly.
+ */
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ addr->network = RDMA_NETWORK_IPV6;
- /* If the device does ARP internally, return 'done' */
- if (dst->dev->flags & IFF_NOARP) {
- ret = rdma_copy_addr(addr, dst->dev, NULL);
- goto put;
- }
+ addr->hoplimit = ip6_dst_hoplimit(dst);
- ret = dst_fetch_ha(dst, addr, &fl6.daddr);
+ *pdst = dst;
+ return 0;
put:
dst_release(dst);
return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr,
+ struct dst_entry **pdst)
{
return -EADDRNOTAVAIL;
}
#endif
+static int addr_resolve_neigh(struct dst_entry *dst,
+ const struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ int ret;
+
+ ret = rdma_translate_ip(dst_in, addr, NULL);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr,
+ MAX_ADDR_LEN);
+
+ return ret;
+ }
+
+ /* If the device doesn't do ARP internally */
+ if (!(dst->dev->flags & IFF_NOARP)) {
+ const struct sockaddr_in *dst_in4 =
+ (const struct sockaddr_in *)dst_in;
+ const struct sockaddr_in6 *dst_in6 =
+ (const struct sockaddr_in6 *)dst_in;
+
+ return dst_fetch_ha(dst, addr,
+ dst_in->sa_family == AF_INET ?
+ (const void *)&dst_in4->sin_addr.s_addr :
+ (const void *)&dst_in6->sin6_addr);
+ }
+
+ return rdma_copy_addr(addr, dst->dev, NULL);
+}
+
static int addr_resolve(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr,
+ bool resolve_neigh)
{
+ struct net_device *ndev;
+ struct dst_entry *dst;
+ int ret;
+
if (src_in->sa_family == AF_INET) {
- return addr4_resolve((struct sockaddr_in *) src_in,
- (struct sockaddr_in *) dst_in, addr);
- } else
- return addr6_resolve((struct sockaddr_in6 *) src_in,
- (struct sockaddr_in6 *) dst_in, addr);
+ struct rtable *rt = NULL;
+ const struct sockaddr_in *dst_in4 =
+ (const struct sockaddr_in *)dst_in;
+
+ ret = addr4_resolve((struct sockaddr_in *)src_in,
+ dst_in4, addr, &rt);
+ if (ret)
+ return ret;
+
+ if (resolve_neigh)
+ ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
+
+ ndev = rt->dst.dev;
+ dev_hold(ndev);
+
+ ip_rt_put(rt);
+ } else {
+ const struct sockaddr_in6 *dst_in6 =
+ (const struct sockaddr_in6 *)dst_in;
+
+ ret = addr6_resolve((struct sockaddr_in6 *)src_in,
+ dst_in6, addr,
+ &dst);
+ if (ret)
+ return ret;
+
+ if (resolve_neigh)
+ ret = addr_resolve_neigh(dst, dst_in, addr);
+
+ ndev = dst->dev;
+ dev_hold(ndev);
+
+ dst_release(dst);
+ }
+
+ addr->bound_dev_if = ndev->ifindex;
+ addr->net = dev_net(ndev);
+ dev_put(ndev);
+
+ return ret;
}
static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve(src_in, dst_in, req->addr);
+ req->status = addr_resolve(src_in, dst_in, req->addr,
+ true);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
req->client = client;
atomic_inc(&client->refcount);
- req->status = addr_resolve(src_in, dst_in, addr);
+ req->status = addr_resolve(src_in, dst_in, addr, true);
switch (req->status) {
case 0:
req->timeout = jiffies;
}
EXPORT_SYMBOL(rdma_resolve_ip);
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr)
+{
+ struct sockaddr_storage ssrc_addr = {};
+ struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family)
+ return -EINVAL;
+
+ memcpy(src_in, src_addr, rdma_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ return addr_resolve(src_in, dst_addr, addr, false);
+}
+EXPORT_SYMBOL(rdma_resolve_ip_route);
+
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
complete(&((struct resolve_cb_context *)context)->comp);
}
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id, int if_index)
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *dmac, u16 *vlan_id, int *if_index,
+ int *hoplimit)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
- dev_addr.bound_dev_if = if_index;
+ if (if_index)
+ dev_addr.bound_dev_if = *if_index;
dev_addr.net = &init_net;
ctx.addr = &dev_addr;
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
+ if (if_index)
+ *if_index = dev_addr.bound_dev_if;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
+ if (hoplimit)
+ *hoplimit = dev_addr.hoplimit;
dev_put(dev);
return ret;
}
-EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2,
+ GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
};
enum gid_table_entry_props {
};
struct ib_gid_table_entry {
- /* This lock protects an entry from being
- * read and written simultaneously.
- */
- rwlock_t lock;
unsigned long props;
union ib_gid gid;
struct ib_gid_attr attr;
* are locked by this lock.
**/
struct mutex lock;
+ /* This lock protects the table entries from being
+ * read and written simultaneously.
+ */
+ rwlock_t rwlock;
struct ib_gid_table_entry *data_vec;
};
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+{
+ if (rdma_cap_roce_gid_table(ib_dev, port)) {
+ struct ib_event event;
+
+ event.device = ib_dev;
+ event.element.port_num = port;
+ event.event = IB_EVENT_GID_CHANGE;
+
+ ib_dispatch_event(&event);
+ }
+}
+
+static const char * const gid_type_str[] = {
+ [IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
+};
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
+{
+ if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
+ return gid_type_str[gid_type];
+
+ return "Invalid GID type";
+}
+EXPORT_SYMBOL(ib_cache_gid_type_str);
+
+int ib_cache_gid_parse_type_str(const char *buf)
+{
+ unsigned int i;
+ size_t len;
+ int err = -EINVAL;
+
+ len = strlen(buf);
+ if (len == 0)
+ return -EINVAL;
+
+ if (buf[len - 1] == '\n')
+ len--;
+
+ for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
+ if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
+ len == strlen(gid_type_str[i])) {
+ err = i;
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
+
+/* This function expects that rwlock will be write locked in all
+ * scenarios and that lock will be locked in sleep-able (RoCE)
+ * scenarios.
+ */
static int write_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix,
const union ib_gid *gid,
const struct ib_gid_attr *attr,
enum gid_table_write_action action,
bool default_gid)
+ __releases(&table->rwlock) __acquires(&table->rwlock)
{
int ret = 0;
struct net_device *old_net_dev;
- unsigned long flags;
/* in rdma_cap_roce_gid_table, this funciton should be protected by a
* sleep-able lock.
*/
- write_lock_irqsave(&table->data_vec[ix].lock, flags);
if (rdma_cap_roce_gid_table(ib_dev, port)) {
table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
- write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
+ write_unlock_irq(&table->rwlock);
/* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
* RoCE providers and thus only updates the cache.
*/
else if (action == GID_TABLE_WRITE_ACTION_DEL)
ret = ib_dev->del_gid(ib_dev, port, ix,
&table->data_vec[ix].context);
- write_lock_irqsave(&table->data_vec[ix].lock, flags);
+ write_lock_irq(&table->rwlock);
}
old_net_dev = table->data_vec[ix].attr.ndev;
table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
- write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
-
- if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
- struct ib_event event;
-
- event.device = ib_dev;
- event.element.port_num = port;
- event.event = IB_EVENT_GID_CHANGE;
-
- ib_dispatch_event(&event);
- }
return ret;
}
GID_TABLE_WRITE_ACTION_DEL, default_gid);
}
+/* rwlock should be read locked */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
- unsigned long mask)
+ unsigned long mask, int *pempty)
{
- int i;
+ int i = 0;
+ int found = -1;
+ int empty = pempty ? -1 : 0;
- for (i = 0; i < table->sz; i++) {
- unsigned long flags;
- struct ib_gid_attr *attr = &table->data_vec[i].attr;
+ while (i < table->sz && (found < 0 || empty < 0)) {
+ struct ib_gid_table_entry *data = &table->data_vec[i];
+ struct ib_gid_attr *attr = &data->attr;
+ int curr_index = i;
- read_lock_irqsave(&table->data_vec[i].lock, flags);
+ i++;
- if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
- goto next;
+ if (data->props & GID_TABLE_ENTRY_INVALID)
+ continue;
+
+ if (empty < 0)
+ if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
+ !memcmp(attr, &zattr, sizeof(*attr)) &&
+ !data->props)
+ empty = curr_index;
+
+ if (found >= 0)
+ continue;
+
+ if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
+ attr->gid_type != val->gid_type)
+ continue;
if (mask & GID_ATTR_FIND_MASK_GID &&
- memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
- goto next;
+ memcmp(gid, &data->gid, sizeof(*gid)))
+ continue;
if (mask & GID_ATTR_FIND_MASK_NETDEV &&
attr->ndev != val->ndev)
- goto next;
+ continue;
if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
- !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) !=
+ !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
default_gid)
- goto next;
+ continue;
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
- return i;
-next:
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+ found = curr_index;
}
- return -1;
+ if (pempty)
+ *pempty = empty;
+
+ return found;
}
static void make_default_gid(struct net_device *dev, union ib_gid *gid)
int ix;
int ret = 0;
struct net_device *idev;
+ int empty;
table = ports_table[port - rdma_start_port(ib_dev)];
}
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_NETDEV);
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV, &empty);
if (ix >= 0)
goto out_unlock;
- ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_DEFAULT);
- if (ix < 0) {
+ if (empty < 0) {
ret = -ENOSPC;
goto out_unlock;
}
- add_gid(ib_dev, port, table, ix, gid, attr, false);
+ ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
+ if (!ret)
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
return ret;
}
table = ports_table[port - rdma_start_port(ib_dev)];
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false,
GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_NETDEV |
- GID_ATTR_FIND_MASK_DEFAULT);
+ GID_ATTR_FIND_MASK_DEFAULT,
+ NULL);
if (ix < 0)
goto out_unlock;
- del_gid(ib_dev, port, table, ix, false);
+ if (!del_gid(ib_dev, port, table, ix, false))
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
return 0;
}
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
+ bool deleted = false;
table = ports_table[port - rdma_start_port(ib_dev)];
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
for (ix = 0; ix < table->sz; ix++)
if (table->data_vec[ix].attr.ndev == ndev)
- del_gid(ib_dev, port, table, ix, false);
+ if (!del_gid(ib_dev, port, table, ix, false))
+ deleted = true;
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
+
+ if (deleted)
+ dispatch_gid_change_event(ib_dev, port);
+
return 0;
}
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- unsigned long flags;
table = ports_table[port - rdma_start_port(ib_dev)];
if (index < 0 || index >= table->sz)
return -EINVAL;
- read_lock_irqsave(&table->data_vec[index].lock, flags);
- if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
- read_unlock_irqrestore(&table->data_vec[index].lock, flags);
+ if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
return -EAGAIN;
- }
memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
if (attr) {
dev_hold(attr->ndev);
}
- read_unlock_irqrestore(&table->data_vec[index].lock, flags);
return 0;
}
struct ib_gid_table *table;
u8 p;
int local_index;
+ unsigned long flags;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
table = ports_table[p];
- local_index = find_gid(table, gid, val, false, mask);
+ read_lock_irqsave(&table->rwlock, flags);
+ local_index = find_gid(table, gid, val, false, mask, NULL);
if (local_index >= 0) {
if (index)
*index = local_index;
if (port)
*port = p + rdma_start_port(ib_dev);
+ read_unlock_irqrestore(&table->rwlock, flags);
return 0;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
}
return -ENOENT;
static int ib_cache_gid_find(struct ib_device *ib_dev,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
struct net_device *ndev, u8 *port,
u16 *index)
{
- unsigned long mask = GID_ATTR_FIND_MASK_GID;
- struct ib_gid_attr gid_attr_val = {.ndev = ndev};
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
u8 port, struct net_device *ndev,
u16 *index)
{
int local_index;
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- unsigned long mask = GID_ATTR_FIND_MASK_GID;
- struct ib_gid_attr val = {.ndev = ndev};
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+ unsigned long flags;
if (port < rdma_start_port(ib_dev) ||
port > rdma_end_port(ib_dev))
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
- local_index = find_gid(table, gid, &val, false, mask);
+ read_lock_irqsave(&table->rwlock, flags);
+ local_index = find_gid(table, gid, &val, false, mask, NULL);
if (local_index >= 0) {
if (index)
*index = local_index;
+ read_unlock_irqrestore(&table->rwlock, flags);
return 0;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_cached_gid_by_port);
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned int i;
+ unsigned long flags;
bool found = false;
if (!ports_table)
table = ports_table[port - rdma_start_port(ib_dev)];
+ read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
struct ib_gid_attr attr;
- unsigned long flags;
- read_lock_irqsave(&table->data_vec[i].lock, flags);
if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
goto next;
found = true;
next:
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-
if (found)
break;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
if (!found)
return -ENOENT;
static struct ib_gid_table *alloc_gid_table(int sz)
{
- unsigned int i;
struct ib_gid_table *table =
kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+
if (!table)
return NULL;
mutex_init(&table->lock);
table->sz = sz;
-
- for (i = 0; i < sz; i++)
- rwlock_init(&table->data_vec[i].lock);
+ rwlock_init(&table->rwlock);
return table;
struct ib_gid_table *table)
{
int i;
+ bool deleted = false;
if (!table)
return;
+ write_lock_irq(&table->rwlock);
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
sizeof(table->data_vec[i].gid)))
- del_gid(ib_dev, port, table, i,
- table->data_vec[i].props &
- GID_ATTR_FIND_MASK_DEFAULT);
+ if (!del_gid(ib_dev, port, table, i,
+ table->data_vec[i].props &
+ GID_ATTR_FIND_MASK_DEFAULT))
+ deleted = true;
}
+ write_unlock_irq(&table->rwlock);
+
+ if (deleted)
+ dispatch_gid_change_event(ib_dev, port);
}
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
+ unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
+ struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
- int ix;
- union ib_gid current_gid;
- struct ib_gid_attr current_gid_attr = {};
+ unsigned int gid_type;
table = ports_table[port - rdma_start_port(ib_dev)];
memset(&gid_attr, 0, sizeof(gid_attr));
gid_attr.ndev = ndev;
- mutex_lock(&table->lock);
- ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT);
-
- /* Coudn't find default GID location */
- WARN_ON(ix < 0);
-
- if (!__ib_cache_gid_get(ib_dev, port, ix,
- ¤t_gid, ¤t_gid_attr) &&
- mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
- !memcmp(&gid, ¤t_gid, sizeof(gid)) &&
- !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr)))
- goto unlock;
-
- if ((memcmp(¤t_gid, &zgid, sizeof(current_gid)) ||
- memcmp(¤t_gid_attr, &zattr,
- sizeof(current_gid_attr))) &&
- del_gid(ib_dev, port, table, ix, true)) {
- pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
- ix, gid.raw);
- goto unlock;
- }
+ for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
+ int ix;
+ union ib_gid current_gid;
+ struct ib_gid_attr current_gid_attr = {};
+
+ if (1UL << gid_type & ~gid_type_mask)
+ continue;
+
+ gid_attr.gid_type = gid_type;
+
+ mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
+ ix = find_gid(table, NULL, &gid_attr, true,
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT,
+ NULL);
+
+ /* Coudn't find default GID location */
+ WARN_ON(ix < 0);
+
+ zattr_type.gid_type = gid_type;
+
+ if (!__ib_cache_gid_get(ib_dev, port, ix,
+ ¤t_gid, ¤t_gid_attr) &&
+ mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
+ !memcmp(&gid, ¤t_gid, sizeof(gid)) &&
+ !memcmp(&gid_attr, ¤t_gid_attr, sizeof(gid_attr)))
+ goto release;
+
+ if (memcmp(¤t_gid, &zgid, sizeof(current_gid)) ||
+ memcmp(¤t_gid_attr, &zattr_type,
+ sizeof(current_gid_attr))) {
+ if (del_gid(ib_dev, port, table, ix, true)) {
+ pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
+ ix, gid.raw);
+ goto release;
+ } else {
+ dispatch_gid_change_event(ib_dev, port);
+ }
+ }
- if (mode == IB_CACHE_GID_DEFAULT_MODE_SET)
- if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
- pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
- gid.raw);
+ if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
+ if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
+ pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
+ gid.raw);
+ else
+ dispatch_gid_change_event(ib_dev, port);
+ }
-unlock:
- if (current_gid_attr.ndev)
- dev_put(current_gid_attr.ndev);
- mutex_unlock(&table->lock);
+release:
+ if (current_gid_attr.ndev)
+ dev_put(current_gid_attr.ndev);
+ write_unlock_irq(&table->rwlock);
+ mutex_unlock(&table->lock);
+ }
}
static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
- if (rdma_protocol_roce(ib_dev, port)) {
- struct ib_gid_table_entry *entry = &table->data_vec[0];
+ unsigned int i;
+ unsigned long roce_gid_type_mask;
+ unsigned int num_default_gids;
+ unsigned int current_gid = 0;
+
+ roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+ num_default_gids = hweight_long(roce_gid_type_mask);
+ for (i = 0; i < num_default_gids && i < table->sz; i++) {
+ struct ib_gid_table_entry *entry =
+ &table->data_vec[i];
entry->props |= GID_TABLE_ENTRY_DEFAULT;
+ current_gid = find_next_bit(&roce_gid_type_mask,
+ BITS_PER_LONG,
+ current_gid);
+ entry->attr.gid_type = current_gid++;
}
return 0;
union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
+ int res;
+ unsigned long flags;
+ struct ib_gid_table **ports_table = device->cache.gid_cache;
+ struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+
if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
- return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+ read_lock_irqsave(&table->rwlock, flags);
+ res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+ read_unlock_irqrestore(&table->rwlock, flags);
+
+ return res;
}
EXPORT_SYMBOL(ib_get_cached_gid);
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
struct net_device *ndev,
u8 *port_num,
u16 *index)
{
- return ib_cache_gid_find(device, gid, ndev, port_num, index);
+ return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
}
EXPORT_SYMBOL(ib_find_cached_gid);
device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
if (!use_roce_gid_table) {
+ write_lock(&table->rwlock);
for (i = 0; i < gid_cache->table_len; i++) {
modify_gid(device, port, table, i, gid_cache->table + i,
&zattr, false);
}
+ write_unlock(&table->rwlock);
}
device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- ndev, &p, NULL)) {
+ path->gid_type, ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
/* Check if the device started its remove_one */
- spin_lock_irq(&cm.lock);
+ spin_lock_irqsave(&cm.lock, flags);
if (!cm_dev->going_down)
queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
msecs_to_jiffies(wait_time));
- spin_unlock_irq(&cm.lock);
+ spin_unlock_irqrestore(&cm.lock, flags);
cm_id_priv->timewait_info = NULL;
}
struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+ ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ cm_id_priv->av.ah_attr.grh.sgid_index,
+ &gid, &gid_attr);
+ if (!ret) {
+ if (gid_attr.ndev) {
+ work->path[0].ifindex = gid_attr.ndev->ifindex;
+ work->path[0].net = dev_net(gid_attr.ndev);
+ dev_put(gid_attr.ndev);
+ }
+ work->path[0].gid_type = gid_attr.gid_type;
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ }
if (ret) {
- ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0, &work->path[0].sgid,
- NULL);
+ int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid,
+ &gid_attr);
+ if (!err && gid_attr.ndev) {
+ work->path[0].ifindex = gid_attr.ndev->ifindex;
+ work->path[0].net = dev_net(gid_attr.ndev);
+ dev_put(gid_attr.ndev);
+ }
+ work->path[0].gid_type = gid_attr.gid_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
EXPORT_SYMBOL(ib_cm_notify);
static void cm_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct cm_port *port = mad_agent->context;
}
EXPORT_SYMBOL(ib_cm_init_qp_attr);
-static void cm_get_ack_delay(struct cm_device *cm_dev)
-{
- struct ib_device_attr attr;
-
- if (ib_query_device(cm_dev->ib_device, &attr))
- cm_dev->ack_delay = 0; /* acks will rely on packet life time */
- else
- cm_dev->ack_delay = attr.local_ca_ack_delay;
-}
-
static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
char *buf)
{
return;
cm_dev->ib_device = ib_device;
- cm_get_ack_delay(cm_dev);
+ cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/igmp.h>
#include <linux/idr.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <rdma/ib_sa.h>
#include <rdma/iw_cm.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
struct completion comp;
atomic_t refcount;
struct list_head id_list;
+ enum ib_gid_type *default_gid_type;
};
struct rdma_bind_list {
CMA_OPTION_AFONLY,
};
+void cma_ref_dev(struct cma_device *cma_dev)
+{
+ atomic_inc(&cma_dev->refcount);
+}
+
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie)
+{
+ struct cma_device *cma_dev;
+ struct cma_device *found_cma_dev = NULL;
+
+ mutex_lock(&lock);
+
+ list_for_each_entry(cma_dev, &dev_list, list)
+ if (filter(cma_dev->device, cookie)) {
+ found_cma_dev = cma_dev;
+ break;
+ }
+
+ if (found_cma_dev)
+ cma_ref_dev(found_cma_dev);
+ mutex_unlock(&lock);
+ return found_cma_dev;
+}
+
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port)
+{
+ if (port < rdma_start_port(cma_dev->device) ||
+ port > rdma_end_port(cma_dev->device))
+ return -EINVAL;
+
+ return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port,
+ enum ib_gid_type default_gid_type)
+{
+ unsigned long supported_gids;
+
+ if (port < rdma_start_port(cma_dev->device) ||
+ port > rdma_end_port(cma_dev->device))
+ return -EINVAL;
+
+ supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
+
+ if (!(supported_gids & 1 << default_gid_type))
+ return -EINVAL;
+
+ cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
+ default_gid_type;
+
+ return 0;
+}
+
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
+{
+ return cma_dev->device;
+}
+
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
u8 tos;
u8 reuseaddr;
u8 afonly;
+ enum ib_gid_type gid_type;
};
struct cma_multicast {
void *context;
struct sockaddr_storage addr;
struct kref mcref;
+ bool igmp_joined;
};
struct cma_work {
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
-static void cma_attach_to_dev(struct rdma_id_private *id_priv,
- struct cma_device *cma_dev)
+static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
- atomic_inc(&cma_dev->refcount);
+ struct in_device *in_dev = NULL;
+
+ if (ndev) {
+ rtnl_lock();
+ in_dev = __in_dev_get_rtnl(ndev);
+ if (in_dev) {
+ if (join)
+ ip_mc_inc_group(in_dev,
+ *(__be32 *)(mgid->raw + 12));
+ else
+ ip_mc_dec_group(in_dev,
+ *(__be32 *)(mgid->raw + 12));
+ }
+ rtnl_unlock();
+ }
+ return (in_dev) ? 0 : -ENODEV;
+}
+
+static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev)
+{
+ cma_ref_dev(cma_dev);
id_priv->cma_dev = cma_dev;
+ id_priv->gid_type = 0;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
}
-static inline void cma_deref_dev(struct cma_device *cma_dev)
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev)
+{
+ _cma_attach_to_dev(id_priv, cma_dev);
+ id_priv->gid_type =
+ cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(cma_dev->device)];
+}
+
+void cma_deref_dev(struct cma_device *cma_dev)
{
if (atomic_dec_and_test(&cma_dev->refcount))
complete(&cma_dev->comp);
}
static inline int cma_validate_port(struct ib_device *device, u8 port,
+ enum ib_gid_type gid_type,
union ib_gid *gid, int dev_type,
int bound_if_index)
{
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- if (dev_type == ARPHRD_ETHER)
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
ndev = dev_get_by_index(&init_net, bound_if_index);
+ if (ndev && ndev->flags & IFF_LOOPBACK) {
+ pr_info("detected loopback device\n");
+ dev_put(ndev);
- ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ ndev = device->get_netdev(device, port);
+ if (!ndev)
+ return -ENODEV;
+ }
+ } else {
+ gid_type = IB_GID_TYPE_IB;
+ }
+
+ ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
+ ndev, NULL);
if (ndev)
dev_put(ndev);
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
- ret = cma_validate_port(cma_dev->device, port, gidp,
+ ret = cma_validate_port(cma_dev->device, port,
+ rdma_protocol_ib(cma_dev->device, port) ?
+ IB_GID_TYPE_IB :
+ listen_id_priv->gid_type, gidp,
dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
- ret = cma_validate_port(cma_dev->device, port, gidp,
- dev_addr->dev_type,
+ ret = cma_validate_port(cma_dev->device, port,
+ rdma_protocol_ib(cma_dev->device, port) ?
+ IB_GID_TYPE_IB :
+ cma_dev->default_gid_type[port - 1],
+ gidp, dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
id_priv->id.port_num)) {
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
- } else
+ } else {
+ if (mc->igmp_joined) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev,
+ &mc->multicast.ib->rec.mgid,
+ false);
+ dev_put(ndev);
+ }
+ }
kref_put(&mc->mcref, release_mc);
+ }
}
}
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int ret;
- struct ib_device_attr attr;
struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- ret = ib_query_device(conn_id->id.device, &attr);
- if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
- }
-
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event.param.conn.private_data = iw_event->private_data;
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
- cma_attach_to_dev(dev_id_priv, cma_dev);
+ _cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
if (addr->dev_addr.bound_dev_if) {
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+ if (!ndev)
+ return -ENODEV;
+
+ if (ndev->flags & IFF_LOOPBACK) {
+ dev_put(ndev);
+ if (!id_priv->id.device->get_netdev)
+ return -EOPNOTSUPP;
+
+ ndev = id_priv->id.device->get_netdev(id_priv->id.device,
+ id_priv->id.port_num);
+ if (!ndev)
+ return -ENODEV;
+ }
+
route->path_rec->net = &init_net;
- route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+ route->path_rec->ifindex = ndev->ifindex;
+ route->path_rec->gid_type = id_priv->gid_type;
}
if (!ndev) {
ret = -ENODEV;
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
- route->path_rec->hop_limit = 1;
+ /* Use the hint from IP Stack to select GID Type */
+ if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
+ /* TODO: get the hoplimit from the inet/inet6 device */
+ route->path_rec->hop_limit = addr->dev_addr.hoplimit;
+ else
+ route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
event.status = status;
event.param.ud.private_data = mc->context;
if (!status) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev =
+ dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ enum ib_gid_type gid_type =
+ id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec,
+ ndev, gid_type,
&event.param.ud.ah_attr);
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ if (ndev)
+ dev_put(ndev);
} else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
{
struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- int err;
+ int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ enum ib_gid_type gid_type;
if (cma_zero_addr((struct sockaddr *)&mc->addr))
return -EINVAL;
mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+
+ gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ if (addr->sa_family == AF_INET) {
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ true);
+ if (!err) {
+ mc->igmp_joined = true;
+ mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ }
+ } else {
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ err = -ENOTSUPP;
+ }
dev_put(ndev);
- if (!mc->multicast.ib->rec.mtu) {
- err = -EINVAL;
+ if (err || !mc->multicast.ib->rec.mtu) {
+ if (!err)
+ err = -EINVAL;
goto out2;
}
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
-
+ mc->igmp_joined = false;
spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock);
if (rdma_cap_ib_mcast(id->device, id->port_num)) {
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num))
+ } else if (rdma_protocol_roce(id->device, id->port_num)) {
+ if (mc->igmp_joined) {
+ struct rdma_dev_addr *dev_addr =
+ &id->route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev,
+ &mc->multicast.ib->rec.mgid,
+ false);
+ dev_put(ndev);
+ }
+ mc->igmp_joined = false;
+ }
kref_put(&mc->mcref, release_mc);
-
+ }
return;
}
}
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
+ unsigned int i;
+ unsigned long supported_gids = 0;
cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
return;
cma_dev->device = device;
+ cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_gid_type),
+ GFP_KERNEL);
+ if (!cma_dev->default_gid_type) {
+ kfree(cma_dev);
+ return;
+ }
+ for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+ supported_gids = roce_gid_type_mask_support(device, i);
+ WARN_ON(!supported_gids);
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ find_first_bit(&supported_gids, BITS_PER_LONG);
+ }
init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+ cma_configfs_init();
return 0;
static void __exit cma_cleanup(void)
{
+ cma_configfs_exit();
ibnl_remove_client(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
--- /dev/null
+/*
+ * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/configfs.h>
+#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+struct cma_device;
+
+struct cma_dev_group;
+
+struct cma_dev_port_group {
+ unsigned int port_num;
+ struct cma_dev_group *cma_dev_group;
+ struct config_group group;
+};
+
+struct cma_dev_group {
+ char name[IB_DEVICE_NAME_MAX];
+ struct config_group device_group;
+ struct config_group ports_group;
+ struct config_group *default_dev_group[2];
+ struct config_group **default_ports_group;
+ struct cma_dev_port_group *ports;
+};
+
+static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
+{
+ struct config_group *group;
+
+ if (!item)
+ return NULL;
+
+ group = container_of(item, struct config_group, cg_item);
+ return container_of(group, struct cma_dev_port_group, group);
+}
+
+static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
+{
+ return !strcmp(ib_dev->name, cookie);
+}
+
+static int cma_configfs_params_get(struct config_item *item,
+ struct cma_device **pcma_dev,
+ struct cma_dev_port_group **pgroup)
+{
+ struct cma_dev_port_group *group = to_dev_port_group(item);
+ struct cma_device *cma_dev;
+
+ if (!group)
+ return -ENODEV;
+
+ cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+ group->cma_dev_group->name);
+ if (!cma_dev)
+ return -ENODEV;
+
+ *pcma_dev = cma_dev;
+ *pgroup = group;
+
+ return 0;
+}
+
+static void cma_configfs_params_put(struct cma_device *cma_dev)
+{
+ cma_deref_dev(cma_dev);
+}
+
+static ssize_t default_roce_mode_show(struct config_item *item,
+ char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ int gid_type;
+ ssize_t ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ gid_type = cma_get_default_gid_type(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ if (gid_type < 0)
+ return gid_type;
+
+ return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+}
+
+static ssize_t default_roce_mode_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ int gid_type = ib_cache_gid_parse_type_str(buf);
+ ssize_t ret;
+
+ if (gid_type < 0)
+ return -EINVAL;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
+
+ cma_configfs_params_put(cma_dev);
+
+ return !ret ? strnlen(buf, count) : ret;
+}
+
+CONFIGFS_ATTR(, default_roce_mode);
+
+static struct configfs_attribute *cma_configfs_attributes[] = {
+ &attr_default_roce_mode,
+ NULL,
+};
+
+static struct config_item_type cma_port_group_type = {
+ .ct_attrs = cma_configfs_attributes,
+ .ct_owner = THIS_MODULE
+};
+
+static int make_cma_ports(struct cma_dev_group *cma_dev_group,
+ struct cma_device *cma_dev)
+{
+ struct ib_device *ibdev;
+ unsigned int i;
+ unsigned int ports_num;
+ struct cma_dev_port_group *ports;
+ struct config_group **ports_group;
+ int err;
+
+ ibdev = cma_get_ib_dev(cma_dev);
+
+ if (!ibdev)
+ return -ENODEV;
+
+ ports_num = ibdev->phys_port_cnt;
+ ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
+ GFP_KERNEL);
+ ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
+
+ if (!ports || !ports_group) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ for (i = 0; i < ports_num; i++) {
+ char port_str[10];
+
+ ports[i].port_num = i + 1;
+ snprintf(port_str, sizeof(port_str), "%u", i + 1);
+ ports[i].cma_dev_group = cma_dev_group;
+ config_group_init_type_name(&ports[i].group,
+ port_str,
+ &cma_port_group_type);
+ ports_group[i] = &ports[i].group;
+ }
+ ports_group[i] = NULL;
+ cma_dev_group->default_ports_group = ports_group;
+ cma_dev_group->ports = ports;
+
+ return 0;
+free:
+ kfree(ports);
+ kfree(ports_group);
+ cma_dev_group->ports = NULL;
+ cma_dev_group->default_ports_group = NULL;
+ return err;
+}
+
+static void release_cma_dev(struct config_item *item)
+{
+ struct config_group *group = container_of(item, struct config_group,
+ cg_item);
+ struct cma_dev_group *cma_dev_group = container_of(group,
+ struct cma_dev_group,
+ device_group);
+
+ kfree(cma_dev_group);
+};
+
+static void release_cma_ports_group(struct config_item *item)
+{
+ struct config_group *group = container_of(item, struct config_group,
+ cg_item);
+ struct cma_dev_group *cma_dev_group = container_of(group,
+ struct cma_dev_group,
+ ports_group);
+
+ kfree(cma_dev_group->ports);
+ kfree(cma_dev_group->default_ports_group);
+ cma_dev_group->ports = NULL;
+ cma_dev_group->default_ports_group = NULL;
+};
+
+static struct configfs_item_operations cma_ports_item_ops = {
+ .release = release_cma_ports_group
+};
+
+static struct config_item_type cma_ports_group_type = {
+ .ct_item_ops = &cma_ports_item_ops,
+ .ct_owner = THIS_MODULE
+};
+
+static struct configfs_item_operations cma_device_item_ops = {
+ .release = release_cma_dev
+};
+
+static struct config_item_type cma_device_group_type = {
+ .ct_item_ops = &cma_device_item_ops,
+ .ct_owner = THIS_MODULE
+};
+
+static struct config_group *make_cma_dev(struct config_group *group,
+ const char *name)
+{
+ int err = -ENODEV;
+ struct cma_device *cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+ (void *)name);
+ struct cma_dev_group *cma_dev_group = NULL;
+
+ if (!cma_dev)
+ goto fail;
+
+ cma_dev_group = kzalloc(sizeof(*cma_dev_group), GFP_KERNEL);
+
+ if (!cma_dev_group) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+
+ err = make_cma_ports(cma_dev_group, cma_dev);
+ if (err)
+ goto fail;
+
+ cma_dev_group->ports_group.default_groups =
+ cma_dev_group->default_ports_group;
+ config_group_init_type_name(&cma_dev_group->ports_group, "ports",
+ &cma_ports_group_type);
+
+ cma_dev_group->device_group.default_groups
+ = cma_dev_group->default_dev_group;
+ cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
+ cma_dev_group->default_dev_group[1] = NULL;
+
+ config_group_init_type_name(&cma_dev_group->device_group, name,
+ &cma_device_group_type);
+
+ cma_deref_dev(cma_dev);
+ return &cma_dev_group->device_group;
+
+fail:
+ if (cma_dev)
+ cma_deref_dev(cma_dev);
+ kfree(cma_dev_group);
+ return ERR_PTR(err);
+}
+
+static struct configfs_group_operations cma_subsys_group_ops = {
+ .make_group = make_cma_dev,
+};
+
+static struct config_item_type cma_subsys_type = {
+ .ct_group_ops = &cma_subsys_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem cma_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "rdma_cm",
+ .ci_type = &cma_subsys_type,
+ },
+ },
+};
+
+int __init cma_configfs_init(void)
+{
+ config_group_init(&cma_subsys.su_group);
+ mutex_init(&cma_subsys.su_mutex);
+ return configfs_register_subsystem(&cma_subsys);
+}
+
+void __exit cma_configfs_exit(void)
+{
+ configfs_unregister_subsystem(&cma_subsys);
+}
#include <rdma/ib_verbs.h>
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+ return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+struct cma_device;
+void cma_ref_dev(struct cma_device *cma_dev);
+void cma_deref_dev(struct cma_device *cma_dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie);
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port);
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port,
+ enum ib_gid_type default_gid_type);
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
+
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
IB_CACHE_GID_DEFAULT_MODE_DELETE
};
+int ib_cache_gid_parse_type_str(const char *buf);
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
+
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
+ unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode);
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
void roce_gid_mgmt_cleanup(void);
int roce_rescan_device(struct ib_device *ib_dev);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
+ struct net_device *upper)
+{
+ struct net_device *_upper = NULL;
+ struct list_head *iter;
+
+ netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
+ if (_upper == upper)
+ break;
+
+ return _upper == upper;
+}
+
#endif /* _CORE_PRIV_H */
--- /dev/null
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+
+/* # of WCs to poll for with a single call to ib_poll_cq */
+#define IB_POLL_BATCH 16
+
+/* # of WCs to iterate over before yielding */
+#define IB_POLL_BUDGET_IRQ 256
+#define IB_POLL_BUDGET_WORKQUEUE 65536
+
+#define IB_POLL_FLAGS \
+ (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+
+static int __ib_process_cq(struct ib_cq *cq, int budget)
+{
+ int i, n, completed = 0;
+
+ while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+ for (i = 0; i < n; i++) {
+ struct ib_wc *wc = &cq->wc[i];
+
+ if (wc->wr_cqe)
+ wc->wr_cqe->done(cq, wc);
+ else
+ WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
+ }
+
+ completed += n;
+
+ if (n != IB_POLL_BATCH ||
+ (budget != -1 && completed >= budget))
+ break;
+ }
+
+ return completed;
+}
+
+/**
+ * ib_process_direct_cq - process a CQ in caller context
+ * @cq: CQ to process
+ * @budget: number of CQEs to poll for
+ *
+ * This function is used to process all outstanding CQ entries on a
+ * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
+ * context and does not ask for completion interrupts from the HCA.
+ *
+ * Note: for compatibility reasons -1 can be passed in %budget for unlimited
+ * polling. Do not use this feature in new code, it will be removed soon.
+ */
+int ib_process_cq_direct(struct ib_cq *cq, int budget)
+{
+ WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+
+ return __ib_process_cq(cq, budget);
+}
+EXPORT_SYMBOL(ib_process_cq_direct);
+
+static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
+{
+ WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
+}
+
+static int ib_poll_handler(struct irq_poll *iop, int budget)
+{
+ struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+ int completed;
+
+ completed = __ib_process_cq(cq, budget);
+ if (completed < budget) {
+ irq_poll_complete(&cq->iop);
+ if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ irq_poll_sched(&cq->iop);
+ }
+
+ return completed;
+}
+
+static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
+{
+ irq_poll_sched(&cq->iop);
+}
+
+static void ib_cq_poll_work(struct work_struct *work)
+{
+ struct ib_cq *cq = container_of(work, struct ib_cq, work);
+ int completed;
+
+ completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+ if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
+ ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ queue_work(ib_comp_wq, &cq->work);
+}
+
+static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
+{
+ queue_work(ib_comp_wq, &cq->work);
+}
+
+/**
+ * ib_alloc_cq - allocate a completion queue
+ * @dev: device to allocate the CQ for
+ * @private: driver private data, accessible from cq->cq_context
+ * @nr_cqe: number of CQEs to allocate
+ * @comp_vector: HCA completion vectors for this CQ
+ * @poll_ctx: context to poll the CQ from.
+ *
+ * This is the proper interface to allocate a CQ for in-kernel users. A
+ * CQ allocated with this interface will automatically be polled from the
+ * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * to use this CQ abstraction.
+ */
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+{
+ struct ib_cq_init_attr cq_attr = {
+ .cqe = nr_cqe,
+ .comp_vector = comp_vector,
+ };
+ struct ib_cq *cq;
+ int ret = -ENOMEM;
+
+ cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
+ if (IS_ERR(cq))
+ return cq;
+
+ cq->device = dev;
+ cq->uobject = NULL;
+ cq->event_handler = NULL;
+ cq->cq_context = private;
+ cq->poll_ctx = poll_ctx;
+ atomic_set(&cq->usecnt, 0);
+
+ cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
+ if (!cq->wc)
+ goto out_destroy_cq;
+
+ switch (cq->poll_ctx) {
+ case IB_POLL_DIRECT:
+ cq->comp_handler = ib_cq_completion_direct;
+ break;
+ case IB_POLL_SOFTIRQ:
+ cq->comp_handler = ib_cq_completion_softirq;
+
+ irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ break;
+ case IB_POLL_WORKQUEUE:
+ cq->comp_handler = ib_cq_completion_workqueue;
+ INIT_WORK(&cq->work, ib_cq_poll_work);
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out_free_wc;
+ }
+
+ return cq;
+
+out_free_wc:
+ kfree(cq->wc);
+out_destroy_cq:
+ cq->device->destroy_cq(cq);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_cq);
+
+/**
+ * ib_free_cq - free a completion queue
+ * @cq: completion queue to free.
+ */
+void ib_free_cq(struct ib_cq *cq)
+{
+ int ret;
+
+ if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
+ return;
+
+ switch (cq->poll_ctx) {
+ case IB_POLL_DIRECT:
+ break;
+ case IB_POLL_SOFTIRQ:
+ irq_poll_disable(&cq->iop);
+ break;
+ case IB_POLL_WORKQUEUE:
+ flush_work(&cq->work);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ kfree(cq->wc);
+ ret = cq->device->destroy_cq(cq);
+ WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL(ib_free_cq);
bool going_down;
};
+struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
{
int ret;
struct ib_client *client;
+ struct ib_udata uhw = {.outlen = 0, .inlen = 0};
mutex_lock(&device_mutex);
goto out;
}
+ memset(&device->attrs, 0, sizeof(device->attrs));
+ ret = device->query_device(device, &device->attrs, &uhw);
+ if (ret) {
+ printk(KERN_WARNING "Couldn't query the device attributes\n");
+ goto out;
+ }
+
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
}
EXPORT_SYMBOL(ib_dispatch_event);
-/**
- * ib_query_device - Query IB device attributes
- * @device:Device to query
- * @device_attr:Device attributes
- *
- * ib_query_device() returns the attributes of a device through the
- * @device_attr pointer.
- */
-int ib_query_device(struct ib_device *device,
- struct ib_device_attr *device_attr)
-{
- struct ib_udata uhw = {.outlen = 0, .inlen = 0};
-
- memset(device_attr, 0, sizeof(*device_attr));
-
- return device->query_device(device, device_attr, &uhw);
-}
-EXPORT_SYMBOL(ib_query_device);
-
/**
* ib_query_port - Query IB port attributes
* @device:Device to query
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @gid_type: Type of GID.
* @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index)
+ enum ib_gid_type gid_type, struct net_device *ndev,
+ u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_find_cached_gid_by_port(device, gid, port,
+ if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, index)) {
*port_num = port;
return 0;
}
}
+ if (gid_type != IB_GID_TYPE_IB)
+ continue;
+
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
if (ret)
if (!ib_wq)
return -ENOMEM;
+ ib_comp_wq = alloc_workqueue("ib-comp-wq",
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_comp_wq) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
ret = class_register(&ib_class);
if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
- goto err;
+ goto err_comp;
}
ret = ibnl_init();
err_sysfs:
class_unregister(&ib_class);
-
+err_comp:
+ destroy_workqueue(ib_comp_wq);
err:
destroy_workqueue(ib_wq);
return ret;
ib_cache_cleanup();
ibnl_cleanup();
class_unregister(&ib_class);
+ destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
}
{
struct ib_device *device;
struct ib_fmr_pool *pool;
- struct ib_device_attr *attr;
int i;
int ret;
int max_remaps;
return ERR_PTR(-ENOSYS);
}
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
- if (!attr) {
- printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
- return ERR_PTR(-ENOMEM);
- }
-
- ret = ib_query_device(device, attr);
- if (ret) {
- printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
- kfree(attr);
- return ERR_PTR(ret);
- }
-
- if (!attr->max_map_per_fmr)
+ if (!device->attrs.max_map_per_fmr)
max_remaps = IB_FMR_MAX_REMAPS;
else
- max_remaps = attr->max_map_per_fmr;
-
- kfree(attr);
+ max_remaps = device->attrs.max_map_per_fmr;
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
u8 mgmt_class);
static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
struct ib_mad_agent_private *agent_priv);
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc);
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
/*
* Returns a ib_mad_port_private structure or NULL for a device/port
atomic_inc(&mad_snoop_priv->refcount);
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
+ mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
mad_recv_wc);
deref_snoop_agent(mad_snoop_priv);
spin_lock_irqsave(&qp_info->snoop_lock, flags);
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
}
-static void build_smp_wc(struct ib_qp *qp,
- u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
- struct ib_wc *wc)
+static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
+ u16 pkey_index, u8 port_num, struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
- wc->wr_id = wr_id;
+ wc->wr_cqe = cqe;
wc->status = IB_WC_SUCCESS;
wc->opcode = IB_WC_RECV;
wc->pkey_index = pkey_index;
}
build_smp_wc(mad_agent_priv->agent.qp,
- send_wr->wr.wr_id, drslid,
+ send_wr->wr.wr_cqe, drslid,
send_wr->pkey_index,
send_wr->port_num, &mad_wc);
mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
- mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+ mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+
+ mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
mad_send_wr->send_wr.wr.num_sge = 2;
mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
/* Set WR ID to find mad_send_wr upon completion */
qp_info = mad_send_wr->mad_agent_priv->qp_info;
- mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+ mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+ mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
mad_agent = mad_send_wr->send_buf.mad_agent;
sge = mad_send_wr->sg_list;
/* user rmpp is in effect
* and this is an active RMPP MAD
*/
- mad_recv_wc->wc->wr_id = 0;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
+ mad_agent_priv->agent.recv_handler(
+ &mad_agent_priv->agent, NULL,
+ mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
} else {
/* not user rmpp, revert to normal behavior and
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
- mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
+ mad_agent_priv->agent.recv_handler(
+ &mad_agent_priv->agent,
+ &mad_send_wr->send_buf,
+ mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
mad_send_wc.status = IB_WC_SUCCESS;
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
}
} else {
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
}
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_mad_port_private *port_priv = cq->cq_context;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_qp_info *qp_info;
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv, *response = NULL;
- struct ib_mad_list_head *mad_list;
struct ib_mad_agent_private *mad_agent;
int port_num;
int ret = IB_MAD_RESULT_SUCCESS;
u16 resp_mad_pkey_index = 0;
bool opa;
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ if (list_empty_careful(&port_priv->port_list))
+ return;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ /*
+ * Receive errors indicate that the QP has entered the error
+ * state - error handling/shutdown code will cleanup
+ */
+ return;
+ }
+
qp_info = mad_list->mad_queue->qp_info;
dequeue_mad(mad_list);
response = alloc_mad_private(mad_size, GFP_KERNEL);
if (!response) {
dev_err(&port_priv->device->dev,
- "ib_mad_recv_done_handler no memory for response buffer\n");
+ "%s: no memory for response buffer\n", __func__);
goto out;
}
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_mad_port_private *port_priv = cq->cq_context;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
- struct ib_mad_list_head *mad_list;
struct ib_mad_qp_info *qp_info;
struct ib_mad_queue *send_queue;
struct ib_send_wr *bad_send_wr;
unsigned long flags;
int ret;
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ if (list_empty_careful(&port_priv->port_list))
+ return;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ if (!ib_mad_send_error(port_priv, wc))
+ return;
+ }
+
mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
mad_list);
send_queue = mad_list->mad_queue;
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
}
-static void mad_error_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
{
- struct ib_mad_list_head *mad_list;
- struct ib_mad_qp_info *qp_info;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
+ struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
struct ib_mad_send_wr_private *mad_send_wr;
int ret;
- /* Determine if failure was a send or receive */
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
- qp_info = mad_list->mad_queue->qp_info;
- if (mad_list->mad_queue == &qp_info->recv_queue)
- /*
- * Receive errors indicate that the QP has entered the error
- * state - error handling/shutdown code will cleanup
- */
- return;
-
/*
* Send errors will transition the QP to SQE - move
* QP to RTS and repost flushed work requests
mad_send_wr->retry = 0;
ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
&bad_send_wr);
- if (ret)
- ib_mad_send_done_handler(port_priv, wc);
- } else
- ib_mad_send_done_handler(port_priv, wc);
+ if (!ret)
+ return false;
+ }
} else {
struct ib_qp_attr *attr;
kfree(attr);
if (ret)
dev_err(&port_priv->device->dev,
- "mad_error_handler - ib_modify_qp to RTS : %d\n",
- ret);
+ "%s - ib_modify_qp to RTS: %d\n",
+ __func__, ret);
else
mark_sends_for_retry(qp_info);
}
- ib_mad_send_done_handler(port_priv, wc);
}
-}
-/*
- * IB MAD completion callback
- */
-static void ib_mad_completion_handler(struct work_struct *work)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_wc wc;
-
- port_priv = container_of(work, struct ib_mad_port_private, work);
- ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-
- while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
- if (wc.status == IB_WC_SUCCESS) {
- switch (wc.opcode) {
- case IB_WC_SEND:
- ib_mad_send_done_handler(port_priv, &wc);
- break;
- case IB_WC_RECV:
- ib_mad_recv_done_handler(port_priv, &wc);
- break;
- default:
- BUG_ON(1);
- break;
- }
- } else
- mad_error_handler(port_priv, &wc);
- }
+ return true;
}
static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
* before request
*/
build_smp_wc(recv_mad_agent->agent.qp,
- (unsigned long) local->mad_send_wr,
+ local->mad_send_wr->send_wr.wr.wr_cqe,
be16_to_cpu(IB_LID_PERMISSIVE),
local->mad_send_wr->send_wr.pkey_index,
recv_mad_agent->agent.port_num, &wc);
IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
+ &local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
atomic_dec(&recv_mad_agent->refcount);
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
-static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
-{
- struct ib_mad_port_private *port_priv = cq->cq_context;
- unsigned long flags;
-
- spin_lock_irqsave(&ib_mad_port_list_lock, flags);
- if (!list_empty(&port_priv->port_list))
- queue_work(port_priv->wq, &port_priv->work);
- spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-}
-
/*
* Allocate receive MADs and post receive WRs for them
*/
break;
}
mad_priv->header.mapping = sg_list.addr;
- recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
+ mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
+ recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
/* Post receive WR */
spin_lock_irqsave(&recv_queue->lock, flags);
unsigned long flags;
char name[sizeof "ib_mad123"];
int has_smi;
- struct ib_cq_init_attr cq_attr = {};
if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
return -EFAULT;
if (has_smi)
cq_size *= 2;
- cq_attr.cqe = cq_size;
- port_priv->cq = ib_create_cq(port_priv->device,
- ib_mad_thread_completion_handler,
- NULL, port_priv, &cq_attr);
+ port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
+ IB_POLL_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
ret = -ENOMEM;
goto error8;
}
- INIT_WORK(&port_priv->work, ib_mad_completion_handler);
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_mad_port_list);
error6:
ib_dealloc_pd(port_priv->pd);
error4:
- ib_destroy_cq(port_priv->cq);
+ ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
error3:
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
ib_dealloc_pd(port_priv->pd);
- ib_destroy_cq(port_priv->cq);
+ ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */
struct ib_mad_list_head {
struct list_head list;
+ struct ib_cqe cqe;
struct ib_mad_queue *mad_queue;
};
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
struct list_head agent_list;
struct workqueue_struct *wq;
- struct work_struct work;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
+ struct net_device *ndev,
+ enum ib_gid_type gid_type,
struct ib_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
u8 p;
- ret = ib_find_cached_gid(device, &rec->port_gid,
- NULL, &p, &gid_index);
+ if (rdma_protocol_roce(device, port_num)) {
+ ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+ gid_type, port_num,
+ ndev,
+ &gid_index);
+ } else if (rdma_protocol_ib(device, port_num)) {
+ ret = ib_find_cached_gid(device, &rec->port_gid,
+ IB_GID_TYPE_IB, NULL, &p,
+ &gid_index);
+ } else {
+ ret = -EINVAL;
+ }
+
if (ret)
return ret;
struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ];
};
+static const struct {
+ bool (*is_supported)(const struct ib_device *device, u8 port_num);
+ enum ib_gid_type gid_type;
+} PORT_CAP_TO_GID_TYPE[] = {
+ {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
+ {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
+};
+
+#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
+
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+{
+ int i;
+ unsigned int ret_flags = 0;
+
+ if (!rdma_protocol_roce(ib_dev, port))
+ return 1UL << IB_GID_TYPE_IB;
+
+ for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
+ if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
+ ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
+
+ return ret_flags;
+}
+EXPORT_SYMBOL(roce_gid_type_mask_support);
+
static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
u8 port, union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
- switch (gid_op) {
- case GID_ADD:
- ib_cache_gid_add(ib_dev, port, gid, gid_attr);
- break;
- case GID_DEL:
- ib_cache_gid_del(ib_dev, port, gid, gid_attr);
- break;
+ int i;
+ unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
+ if ((1UL << i) & gid_type_mask) {
+ gid_attr->gid_type = i;
+ switch (gid_op) {
+ case GID_ADD:
+ ib_cache_gid_add(ib_dev, port,
+ gid, gid_attr);
+ break;
+ case GID_DEL:
+ ib_cache_gid_del(ib_dev, port,
+ gid, gid_attr);
+ break;
+ }
+ }
}
}
return BONDING_SLAVE_STATE_NA;
}
-static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper)
-{
- struct net_device *_upper = NULL;
- struct list_head *iter;
-
- netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
- if (_upper == upper)
- break;
-
- return _upper == upper;
-}
-
#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
BONDING_SLAVE_STATE_NA)
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
if (!real_dev)
real_dev = event_ndev;
- res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
return 1;
rcu_read_lock();
- res = is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
rcu_read_unlock();
return res;
u8 port, struct net_device *event_ndev,
struct net_device *rdma_ndev)
{
+ unsigned long gid_type_mask;
+
rcu_read_lock();
if (!rdma_ndev ||
((rdma_ndev != event_ndev &&
- !is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+ !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
is_eth_active_slave_of_bonding_rcu(rdma_ndev,
netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
BONDING_SLAVE_STATE_INACTIVE)) {
}
rcu_read_unlock();
- ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
IB_CACHE_GID_DEFAULT_MODE_SET);
}
rcu_read_lock();
- if (is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
BONDING_SLAVE_STATE_INACTIVE) {
+ unsigned long gid_type_mask;
+
rcu_read_unlock();
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask,
IB_CACHE_GID_DEFAULT_MODE_DELETE);
} else {
rcu_read_unlock();
#include <net/netlink.h>
#include <uapi/rdma/ib_user_sa.h>
#include <rdma/ib_marshall.h>
+#include <rdma/ib_addr.h>
#include "sa.h"
+#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
struct nlattr *tb[LS_NLA_TYPE_MAX];
int ret;
- if (!netlink_capable(skb, CAP_NET_ADMIN))
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
+ !(NETLINK_CB(skb).sk) ||
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
int found = 0;
int ret;
- if (!netlink_capable(skb, CAP_NET_ADMIN))
+ if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
+ !(NETLINK_CB(skb).sk) ||
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
{
int ret;
u16 gid_index;
- int force_grh;
+ int use_roce;
+ struct net_device *ndev = NULL;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->dlid);
ah_attr->port_num = port_num;
ah_attr->static_rate = rec->rate;
- force_grh = rdma_cap_eth_ah(device, port_num);
+ use_roce = rdma_cap_eth_ah(device, port_num);
+
+ if (use_roce) {
+ struct net_device *idev;
+ struct net_device *resolved_dev;
+ struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
+ .net = rec->net ? rec->net :
+ &init_net};
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
+
+ /* validate the route */
+ ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+ &dgid_addr._sockaddr, &dev_addr);
+ if (ret)
+ return ret;
- if (rec->hop_limit > 1 || force_grh) {
- struct net_device *ndev = ib_get_ndev_from_path(rec);
+ if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+ dev_addr.network == RDMA_NETWORK_IPV6) &&
+ rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return -EINVAL;
+
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
+
+ resolved_dev = dev_get_by_index(dev_addr.net,
+ dev_addr.bound_dev_if);
+ if (resolved_dev->flags & IFF_LOOPBACK) {
+ dev_put(resolved_dev);
+ resolved_dev = idev;
+ dev_hold(resolved_dev);
+ }
+ ndev = ib_get_ndev_from_path(rec);
+ rcu_read_lock();
+ if ((ndev && ndev != resolved_dev) ||
+ (resolved_dev != idev &&
+ !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(idev);
+ dev_put(resolved_dev);
+ if (ret) {
+ if (ndev)
+ dev_put(ndev);
+ return ret;
+ }
+ }
+ if (rec->hop_limit > 1 || use_roce) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
- ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
- &gid_index);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid,
+ rec->gid_type, port_num, ndev,
+ &gid_index);
if (ret) {
if (ndev)
dev_put(ndev);
if (ndev)
dev_put(ndev);
}
- if (force_grh) {
+
+ if (use_roce)
memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
- }
+
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
mad->data, &rec);
rec.net = NULL;
rec.ifindex = 0;
+ rec.gid_type = IB_GID_TYPE_IB;
memset(rec.dmac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
}
static void recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
- struct ib_mad_send_buf *mad_buf;
- mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
- query = mad_buf->context[0];
+ if (!send_buf)
+ return;
+ query = send_buf->context[0];
if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
query->callback(query,
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/string.h>
+#include <linux/netdevice.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
+struct ib_port;
+
+struct gid_attr_group {
+ struct ib_port *port;
+ struct kobject kobj;
+ struct attribute_group ndev;
+ struct attribute_group type;
+};
struct ib_port {
struct kobject kobj;
struct ib_device *ibdev;
+ struct gid_attr_group *gid_attr_group;
struct attribute_group gid_group;
struct attribute_group pkey_group;
u8 port_num;
+ struct attribute_group *pma_table;
};
struct port_attribute {
struct port_attribute attr;
char name[8];
int index;
+ __be16 attr_id;
};
static ssize_t port_attr_show(struct kobject *kobj,
.show = port_attr_show
};
+static ssize_t gid_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct ib_port *p = container_of(kobj, struct gid_attr_group,
+ kobj)->port;
+
+ if (!port_attr->show)
+ return -EIO;
+
+ return port_attr->show(p, port_attr, buf);
+}
+
+static const struct sysfs_ops gid_attr_sysfs_ops = {
+ .show = gid_attr_show
+};
+
static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
NULL
};
+static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+{
+ if (!gid_attr->ndev)
+ return -EINVAL;
+
+ return sprintf(buf, "%s\n", gid_attr->ndev->name);
+}
+
+static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+{
+ return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+}
+
+static ssize_t _show_port_gid_attr(struct ib_port *p,
+ struct port_attribute *attr,
+ char *buf,
+ size_t (*print)(struct ib_gid_attr *gid_attr,
+ char *buf))
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr = {};
+ ssize_t ret;
+ va_list args;
+
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
+ &gid_attr);
+ if (ret)
+ goto err;
+
+ ret = print(&gid_attr, buf);
+
+err:
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ va_end(args);
+ return ret;
+}
+
static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
return sprintf(buf, "%pI6\n", gid.raw);
}
+static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return _show_port_gid_attr(p, attr, buf, print_ndev);
+}
+
+static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
+ struct port_attribute *attr,
+ char *buf)
+{
+ return _show_port_gid_attr(p, attr, buf, print_gid_type);
+}
+
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
- .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24), \
+ .attr_id = IB_PMA_PORT_COUNTERS , \
}
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+#define PORT_PMA_ATTR_EXT(_name, _width, _offset) \
+struct port_table_attribute port_pma_attr_ext_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
+ .index = (_offset) | ((_width) << 16), \
+ .attr_id = IB_PMA_PORT_COUNTERS_EXT , \
+}
+
+/*
+ * Get a Perfmgmt MAD block of data.
+ * Returns error code or the number of bytes retrieved.
+ */
+static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
+ void *data, int offset, size_t size)
{
- struct port_table_attribute *tab_attr =
- container_of(attr, struct port_table_attribute, attr);
- int offset = tab_attr->index & 0xffff;
- int width = (tab_attr->index >> 16) & 0xff;
- struct ib_mad *in_mad = NULL;
- struct ib_mad *out_mad = NULL;
+ struct ib_mad *in_mad;
+ struct ib_mad *out_mad;
size_t mad_size = sizeof(*out_mad);
u16 out_mad_pkey_index = 0;
ssize_t ret;
- if (!p->ibdev->process_mad)
- return sprintf(buf, "N/A (no PMA)\n");
+ if (!dev->process_mad)
+ return -ENOSYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
in_mad->mad_hdr.class_version = 1;
in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
- in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
+ in_mad->mad_hdr.attr_id = attr;
- in_mad->data[41] = p->port_num; /* PortSelect field */
+ if (attr != IB_PMA_CLASS_PORT_INFO)
+ in_mad->data[41] = port_num; /* PortSelect field */
- if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
- p->port_num, NULL, NULL,
+ if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
+ port_num, NULL, NULL,
(const struct ib_mad_hdr *)in_mad, mad_size,
(struct ib_mad_hdr *)out_mad, &mad_size,
&out_mad_pkey_index) &
ret = -EINVAL;
goto out;
}
+ memcpy(data, out_mad->data + offset, size);
+ ret = size;
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return ret;
+}
+
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int offset = tab_attr->index & 0xffff;
+ int width = (tab_attr->index >> 16) & 0xff;
+ ssize_t ret;
+ u8 data[8];
+
+ ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+ 40 + offset / 8, sizeof(data));
+ if (ret < 0)
+ return sprintf(buf, "N/A (no PMA)\n");
switch (width) {
case 4:
- ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+ ret = sprintf(buf, "%u\n", (*data >>
(4 - (offset % 8))) & 0xf);
break;
case 8:
- ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+ ret = sprintf(buf, "%u\n", *data);
break;
case 16:
ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+ be16_to_cpup((__be16 *)data));
break;
case 32:
ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+ be32_to_cpup((__be32 *)data));
+ break;
+ case 64:
+ ret = sprintf(buf, "%llu\n",
+ be64_to_cpup((__be64 *)data));
break;
+
default:
ret = 0;
}
-out:
- kfree(in_mad);
- kfree(out_mad);
-
return ret;
}
static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
+/*
+ * Counters added by extended set
+ */
+static PORT_PMA_ATTR_EXT(port_xmit_data , 64, 64);
+static PORT_PMA_ATTR_EXT(port_rcv_data , 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets , 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets , 64, 256);
+static PORT_PMA_ATTR_EXT(unicast_xmit_packets , 64, 320);
+static PORT_PMA_ATTR_EXT(unicast_rcv_packets , 64, 384);
+static PORT_PMA_ATTR_EXT(multicast_xmit_packets , 64, 448);
+static PORT_PMA_ATTR_EXT(multicast_rcv_packets , 64, 512);
+
static struct attribute *pma_attrs[] = {
&port_pma_attr_symbol_error.attr.attr,
&port_pma_attr_link_error_recovery.attr.attr,
NULL
};
+static struct attribute *pma_attrs_ext[] = {
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_ext_port_xmit_data.attr.attr,
+ &port_pma_attr_ext_port_rcv_data.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets.attr.attr,
+ &port_pma_attr_ext_unicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_unicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_multicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_multicast_xmit_packets.attr.attr,
+ NULL
+};
+
+static struct attribute *pma_attrs_noietf[] = {
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_ext_port_xmit_data.attr.attr,
+ &port_pma_attr_ext_port_rcv_data.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets.attr.attr,
+ NULL
+};
+
static struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
+static struct attribute_group pma_group_ext = {
+ .name = "counters",
+ .attrs = pma_attrs_ext
+};
+
+static struct attribute_group pma_group_noietf = {
+ .name = "counters",
+ .attrs = pma_attrs_noietf
+};
+
static void ib_port_release(struct kobject *kobj)
{
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
kfree(p);
}
+static void ib_port_gid_attr_release(struct kobject *kobj)
+{
+ struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
+ kobj);
+ struct attribute *a;
+ int i;
+
+ if (g->ndev.attrs) {
+ for (i = 0; (a = g->ndev.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(g->ndev.attrs);
+ }
+
+ if (g->type.attrs) {
+ for (i = 0; (a = g->type.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(g->type.attrs);
+ }
+
+ kfree(g);
+}
+
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
.default_attrs = port_default_attrs
};
+static struct kobj_type gid_attr_type = {
+ .sysfs_ops = &gid_attr_sysfs_ops,
+ .release = ib_port_gid_attr_release
+};
+
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct ib_port *,
struct port_attribute *, char *buf),
return NULL;
}
+/*
+ * Figure out which counter table to use depending on
+ * the device capabilities.
+ */
+static struct attribute_group *get_counter_table(struct ib_device *dev,
+ int port_num)
+{
+ struct ib_class_port_info cpi;
+
+ if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
+ &cpi, 40, sizeof(cpi)) >= 0) {
+
+ if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
+ /* We have extended counters */
+ return &pma_group_ext;
+
+ if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
+ /* But not the IETF ones */
+ return &pma_group_noietf;
+ }
+
+ /* Fall back to normal counters */
+ return &pma_group;
+}
+
static int add_port(struct ib_device *device, int port_num,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
return ret;
}
- ret = sysfs_create_group(&p->kobj, &pma_group);
- if (ret)
+ p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
+ if (!p->gid_attr_group) {
+ ret = -ENOMEM;
goto err_put;
+ }
+
+ p->gid_attr_group->port = p;
+ ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
+ &p->kobj, "gid_attrs");
+ if (ret) {
+ kfree(p->gid_attr_group);
+ goto err_put;
+ }
+
+ p->pma_table = get_counter_table(device, port_num);
+ ret = sysfs_create_group(&p->kobj, p->pma_table);
+ if (ret)
+ goto err_put_gid_attrs;
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
if (ret)
goto err_free_gid;
+ p->gid_attr_group->ndev.name = "ndevs";
+ p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
+ attr.gid_tbl_len);
+ if (!p->gid_attr_group->ndev.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid;
+ }
+
+ ret = sysfs_create_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->ndev);
+ if (ret)
+ goto err_free_gid_ndev;
+
+ p->gid_attr_group->type.name = "types";
+ p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
+ attr.gid_tbl_len);
+ if (!p->gid_attr_group->type.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid_ndev;
+ }
+
+ ret = sysfs_create_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->type);
+ if (ret)
+ goto err_free_gid_type;
+
p->pkey_group.name = "pkeys";
p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
attr.pkey_tbl_len);
if (!p->pkey_group.attrs) {
ret = -ENOMEM;
- goto err_remove_gid;
+ goto err_remove_gid_type;
}
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
kfree(p->pkey_group.attrs);
p->pkey_group.attrs = NULL;
+err_remove_gid_type:
+ sysfs_remove_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->type);
+
+err_free_gid_type:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_attr_group->type.attrs[i]);
+
+ kfree(p->gid_attr_group->type.attrs);
+ p->gid_attr_group->type.attrs = NULL;
+
+err_remove_gid_ndev:
+ sysfs_remove_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->ndev);
+
+err_free_gid_ndev:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_attr_group->ndev.attrs[i]);
+
+ kfree(p->gid_attr_group->ndev.attrs);
+ p->gid_attr_group->ndev.attrs = NULL;
+
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
p->gid_group.attrs = NULL;
err_remove_pma:
- sysfs_remove_group(&p->kobj, &pma_group);
+ sysfs_remove_group(&p->kobj, p->pma_table);
+
+err_put_gid_attrs:
+ kobject_put(&p->gid_attr_group->kobj);
err_put:
kobject_put(&p->kobj);
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- struct ib_device_attr attr;
- ssize_t ret;
-
- ret = ib_query_device(dev, &attr);
- if (ret)
- return ret;
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
}
static ssize_t show_node_guid(struct device *device,
list_for_each_entry_safe(p, t, &device->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
list_del(&p->entry);
- sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, port->pma_table);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
+ sysfs_remove_group(&port->gid_attr_group->kobj,
+ &port->gid_attr_group->ndev);
+ sysfs_remove_group(&port->gid_attr_group->kobj,
+ &port->gid_attr_group->type);
+ kobject_put(&port->gid_attr_group->kobj);
kobject_put(p);
}
#include <linux/string.h>
#include <linux/export.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <rdma/ib_pack.h>
.size_bits = 16 }
};
+static const struct ib_field ip4_table[] = {
+ { STRUCT_FIELD(ip4, ver),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { STRUCT_FIELD(ip4, hdr_len),
+ .offset_words = 0,
+ .offset_bits = 4,
+ .size_bits = 4 },
+ { STRUCT_FIELD(ip4, tos),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, tot_len),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, id),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, frag_off),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, ttl),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, protocol),
+ .offset_words = 2,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, check),
+ .offset_words = 2,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, saddr),
+ .offset_words = 3,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { STRUCT_FIELD(ip4, daddr),
+ .offset_words = 4,
+ .offset_bits = 0,
+ .size_bits = 32 }
+};
+
+static const struct ib_field udp_table[] = {
+ { STRUCT_FIELD(udp, sport),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, dport),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, length),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, csum),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 }
+};
+
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD(grh, ip_version),
.offset_words = 0,
.size_bits = 24 }
};
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header)
+{
+ struct iphdr iph;
+
+ iph.ihl = 5;
+ iph.version = 4;
+ iph.tos = header->ip4.tos;
+ iph.tot_len = header->ip4.tot_len;
+ iph.id = header->ip4.id;
+ iph.frag_off = header->ip4.frag_off;
+ iph.ttl = header->ip4.ttl;
+ iph.protocol = header->ip4.protocol;
+ iph.check = 0;
+ iph.saddr = header->ip4.saddr;
+ iph.daddr = header->ip4.daddr;
+
+ return ip_fast_csum((u8 *)&iph, iph.ihl);
+}
+EXPORT_SYMBOL(ib_ud_ip4_csum);
+
/**
* ib_ud_header_init - Initialize UD header structure
* @payload_bytes:Length of packet payload
* @lrh_present: specify if LRH is present
* @eth_present: specify if Eth header is present
* @vlan_present: packet is tagged vlan
- * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @grh_present: GRH flag (if non-zero, GRH will be included)
+ * @ip_version: if non-zero, IP header, V4 or V6, will be included
+ * @udp_present :if non-zero, UDP header will be included
* @immediate_present: specify if immediate data is present
* @header:Structure to initialize
*/
-void ib_ud_header_init(int payload_bytes,
- int lrh_present,
- int eth_present,
- int vlan_present,
- int grh_present,
- int immediate_present,
- struct ib_ud_header *header)
+int ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
+ int grh_present,
+ int ip_version,
+ int udp_present,
+ int immediate_present,
+ struct ib_ud_header *header)
{
+ grh_present = grh_present && !ip_version;
memset(header, 0, sizeof *header);
+ /*
+ * UDP header without IP header doesn't make sense
+ */
+ if (udp_present && ip_version != 4 && ip_version != 6)
+ return -EINVAL;
+
if (lrh_present) {
u16 packet_length;
if (vlan_present)
header->eth.type = cpu_to_be16(ETH_P_8021Q);
- if (grh_present) {
+ if (ip_version == 6 || grh_present) {
header->grh.ip_version = 6;
header->grh.payload_length =
cpu_to_be16((IB_BTH_BYTES +
payload_bytes +
4 + /* ICRC */
3) & ~3); /* round up */
- header->grh.next_header = 0x1b;
+ header->grh.next_header = udp_present ? IPPROTO_UDP : 0x1b;
+ }
+
+ if (ip_version == 4) {
+ int udp_bytes = udp_present ? IB_UDP_BYTES : 0;
+
+ header->ip4.ver = 4; /* version 4 */
+ header->ip4.hdr_len = 5; /* 5 words */
+ header->ip4.tot_len =
+ cpu_to_be16(IB_IP4_BYTES +
+ udp_bytes +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4); /* ICRC */
+ header->ip4.protocol = IPPROTO_UDP;
}
+ if (udp_present && ip_version)
+ header->udp.length =
+ cpu_to_be16(IB_UDP_BYTES +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4); /* ICRC */
if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
header->lrh_present = lrh_present;
header->eth_present = eth_present;
header->vlan_present = vlan_present;
- header->grh_present = grh_present;
+ header->grh_present = grh_present || (ip_version == 6);
+ header->ipv4_present = ip_version == 4;
+ header->udp_present = udp_present;
header->immediate_present = immediate_present;
+ return 0;
}
EXPORT_SYMBOL(ib_ud_header_init);
&header->grh, buf + len);
len += IB_GRH_BYTES;
}
+ if (header->ipv4_present) {
+ ib_pack(ip4_table, ARRAY_SIZE(ip4_table),
+ &header->ip4, buf + len);
+ len += IB_IP4_BYTES;
+ }
+ if (header->udp_present) {
+ ib_pack(udp_table, ARRAY_SIZE(udp_table),
+ &header->udp, buf + len);
+ len += IB_UDP_BYTES;
+ }
ib_pack(bth_table, ARRAY_SIZE(bth_table),
&header->bth, buf + len);
ib_ucontext_notifier_end_account(context);
}
-static struct mmu_notifier_ops ib_umem_notifiers = {
+static const struct mmu_notifier_ops ib_umem_notifiers = {
.release = ib_umem_notifier_release,
.invalidate_page = ib_umem_notifier_invalidate_page,
.invalidate_range_start = ib_umem_notifier_invalidate_range_start,
}
static void recv_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_umad_file *file = agent->context;
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int uverbs_dealloc_mw(struct ib_mw *mw);
+
struct ib_uverbs_flow_spec {
union {
union {
struct ib_uverbs_get_context cmd;
struct ib_uverbs_get_context_resp resp;
struct ib_udata udata;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct ib_device_attr dev_attr;
-#endif
struct ib_ucontext *ucontext;
struct file *filp;
int ret;
ucontext->odp_mrs_count = 0;
INIT_LIST_HEAD(&ucontext->no_private_counters);
- ret = ib_query_device(ib_dev, &dev_attr);
- if (ret)
- goto err_free;
- if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+ if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
ucontext->invalidate_range = NULL;
#endif
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
- struct ib_device_attr attr;
- int ret;
if (out_len < sizeof resp)
return -ENOSPC;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = ib_query_device(ib_dev, &attr);
- if (ret)
- return ret;
-
memset(&resp, 0, sizeof resp);
- copy_query_dev_fields(file, ib_dev, &resp, &attr);
+ copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
}
if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
- struct ib_device_attr attr;
-
- ret = ib_query_device(pd->device, &attr);
- if (ret || !(attr.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
+ if (!(pd->device->attrs.device_cap_flags &
+ IB_DEVICE_ON_DEMAND_PAGING)) {
pr_debug("ODP support not available\n");
ret = -EINVAL;
goto err_put;
mr->pd = pd;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
uobj->object = mr;
ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
}
}
- if (atomic_read(&mr->usecnt)) {
- ret = -EBUSY;
- goto put_uobj_pd;
- }
-
old_pd = mr->pd;
ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
cmd.length, cmd.hca_va,
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
err_unalloc:
- ib_dealloc_mw(mw);
+ uverbs_dealloc_mw(mw);
err_put:
put_pd_read(pd);
mw = uobj->object;
- ret = ib_dealloc_mw(mw);
+ ret = uverbs_dealloc_mw(mw);
if (!ret)
uobj->live = 0;
sizeof(cmd->create_flags))
attr.create_flags = cmd->create_flags;
- if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) {
ret = -EINVAL;
goto err_put;
}
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+int uverbs_dealloc_mw(struct ib_mw *mw)
+{
+ struct ib_pd *pd = mw->pd;
+ int ret;
+
+ ret = mw->device->dealloc_mw(mw);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+ return ret;
+}
+
static void ib_uverbs_release_dev(struct kobject *kobj)
{
struct ib_uverbs_device *dev =
struct ib_mw *mw = uobj->object;
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- ib_dealloc_mw(mw);
+ uverbs_dealloc_mw(mw);
kfree(uobj);
}
memset(dst->dmac, 0, sizeof(dst->dmac));
dst->net = NULL;
dst->ifindex = 0;
+ dst->gid_type = IB_GID_TYPE_IB;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
struct ib_pd *pd;
- struct ib_device_attr devattr;
- int rc;
-
- rc = ib_query_device(device, &devattr);
- if (rc)
- return ERR_PTR(rc);
pd = device->alloc_pd(device, NULL, NULL);
if (IS_ERR(pd))
pd->local_mr = NULL;
atomic_set(&pd->usecnt, 0);
- if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+ if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else {
struct ib_mr *mr;
}
EXPORT_SYMBOL(ib_create_ah);
+static int ib_get_header_version(const union rdma_network_hdr *hdr)
+{
+ const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
+ struct iphdr ip4h_checked;
+ const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
+
+ /* If it's IPv6, the version must be 6, otherwise, the first
+ * 20 bytes (before the IPv4 header) are garbled.
+ */
+ if (ip6h->version != 6)
+ return (ip4h->version == 4) ? 4 : 0;
+ /* version may be 6 or 4 because the first 20 bytes could be garbled */
+
+ /* RoCE v2 requires no options, thus header length
+ * must be 5 words
+ */
+ if (ip4h->ihl != 5)
+ return 6;
+
+ /* Verify checksum.
+ * We can't write on scattered buffers so we need to copy to
+ * temp buffer.
+ */
+ memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
+ ip4h_checked.check = 0;
+ ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
+ /* if IPv4 header checksum is OK, believe it */
+ if (ip4h->check == ip4h_checked.check)
+ return 4;
+ return 6;
+}
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+ u8 port_num,
+ const struct ib_grh *grh)
+{
+ int grh_version;
+
+ if (rdma_protocol_ib(device, port_num))
+ return RDMA_NETWORK_IB;
+
+ grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+
+ if (grh_version == 4)
+ return RDMA_NETWORK_IPV4;
+
+ if (grh->next_hdr == IPPROTO_UDP)
+ return RDMA_NETWORK_IPV6;
+
+ return RDMA_NETWORK_ROCE_V1;
+}
+
struct find_gid_index_context {
u16 vlan_id;
+ enum ib_gid_type gid_type;
};
static bool find_gid_index(const union ib_gid *gid,
struct find_gid_index_context *ctx =
(struct find_gid_index_context *)context;
+ if (ctx->gid_type != gid_attr->gid_type)
+ return false;
+
if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
(is_vlan_dev(gid_attr->ndev) &&
vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
u16 vlan_id, const union ib_gid *sgid,
+ enum ib_gid_type gid_type,
u16 *gid_index)
{
- struct find_gid_index_context context = {.vlan_id = vlan_id};
+ struct find_gid_index_context context = {.vlan_id = vlan_id,
+ .gid_type = gid_type};
return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
&context, gid_index);
}
+static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid)
+{
+ struct sockaddr_in src_in;
+ struct sockaddr_in dst_in;
+ __be32 src_saddr, dst_saddr;
+
+ if (!sgid || !dgid)
+ return -EINVAL;
+
+ if (net_type == RDMA_NETWORK_IPV4) {
+ memcpy(&src_in.sin_addr.s_addr,
+ &hdr->roce4grh.saddr, 4);
+ memcpy(&dst_in.sin_addr.s_addr,
+ &hdr->roce4grh.daddr, 4);
+ src_saddr = src_in.sin_addr.s_addr;
+ dst_saddr = dst_in.sin_addr.s_addr;
+ ipv6_addr_set_v4mapped(src_saddr,
+ (struct in6_addr *)sgid);
+ ipv6_addr_set_v4mapped(dst_saddr,
+ (struct in6_addr *)dgid);
+ return 0;
+ } else if (net_type == RDMA_NETWORK_IPV6 ||
+ net_type == RDMA_NETWORK_IB) {
+ *dgid = hdr->ibgrh.dgid;
+ *sgid = hdr->ibgrh.sgid;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct ib_ah_attr *ah_attr)
u32 flow_class;
u16 gid_index;
int ret;
+ enum rdma_network_type net_type = RDMA_NETWORK_IB;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ int hoplimit = 0xff;
+ union ib_gid dgid;
+ union ib_gid sgid;
memset(ah_attr, 0, sizeof *ah_attr);
if (rdma_cap_eth_ah(device, port_num)) {
+ if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
+ net_type = wc->network_hdr_type;
+ else
+ net_type = ib_get_net_type_by_grh(device, port_num, grh);
+ gid_type = ib_network_to_gid_type(net_type);
+ }
+ ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ &sgid, &dgid);
+ if (ret)
+ return ret;
+
+ if (rdma_protocol_roce(device, port_num)) {
+ int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
+ struct net_device *idev;
+ struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
- !(wc->wc_flags & IB_WC_WITH_VLAN)) {
- ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
- ah_attr->dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- 0);
- if (ret)
- return ret;
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
+
+ ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
+ ah_attr->dmac,
+ wc->wc_flags & IB_WC_WITH_VLAN ?
+ NULL : &vlan_id,
+ &if_index, &hoplimit);
+ if (ret) {
+ dev_put(idev);
+ return ret;
}
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &grh->dgid, &gid_index);
+ resolved_dev = dev_get_by_index(&init_net, if_index);
+ if (resolved_dev->flags & IFF_LOOPBACK) {
+ dev_put(resolved_dev);
+ resolved_dev = idev;
+ dev_hold(resolved_dev);
+ }
+ rcu_read_lock();
+ if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
+ resolved_dev))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(idev);
+ dev_put(resolved_dev);
if (ret)
return ret;
- if (wc->wc_flags & IB_WC_WITH_SMAC)
- memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+ ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+ &dgid, gid_type, &gid_index);
+ if (ret)
+ return ret;
}
ah_attr->dlid = wc->slid;
if (wc->wc_flags & IB_WC_GRH) {
ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = grh->sgid;
+ ah_attr->grh.dgid = sgid;
if (!rdma_cap_eth_ah(device, port_num)) {
- ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+ ret = ib_find_cached_gid_by_port(device, &dgid,
+ IB_GID_TYPE_IB,
port_num, NULL,
&gid_index);
if (ret)
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = 0xFF;
+ ah_attr->grh.hop_limit = hoplimit;
ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
}
return 0;
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
int ifindex;
+ int hop_limit;
ret = ib_query_gid(qp->device,
qp_attr->ah_attr.port_num,
ifindex = sgid_attr.ndev->ifindex;
- ret = rdma_addr_find_dmac_by_grh(&sgid,
- &qp_attr->ah_attr.grh.dgid,
- qp_attr->ah_attr.dmac,
- NULL, ifindex);
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+ &qp_attr->ah_attr.grh.dgid,
+ qp_attr->ah_attr.dmac,
+ NULL, &ifindex, &hop_limit);
dev_put(sgid_attr.ndev);
+
+ qp_attr->ah_attr.grh.hop_limit = hop_limit;
}
}
out:
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_get_dma_mr);
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
- return mr->device->query_mr ?
- mr->device->query_mr(mr, mr_attr) : -ENOSYS;
-}
-EXPORT_SYMBOL(ib_query_mr);
-
int ib_dereg_mr(struct ib_mr *mr)
{
- struct ib_pd *pd;
+ struct ib_pd *pd = mr->pd;
int ret;
- if (atomic_read(&mr->usecnt))
- return -EBUSY;
-
- pd = mr->pd;
ret = mr->device->dereg_mr(mr);
if (!ret)
atomic_dec(&pd->usecnt);
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_alloc_mr);
-/* Memory windows */
-
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
- struct ib_mw *mw;
-
- if (!pd->device->alloc_mw)
- return ERR_PTR(-ENOSYS);
-
- mw = pd->device->alloc_mw(pd, type);
- if (!IS_ERR(mw)) {
- mw->device = pd->device;
- mw->pd = pd;
- mw->uobject = NULL;
- mw->type = type;
- atomic_inc(&pd->usecnt);
- }
-
- return mw;
-}
-EXPORT_SYMBOL(ib_alloc_mw);
-
-int ib_dealloc_mw(struct ib_mw *mw)
-{
- struct ib_pd *pd;
- int ret;
-
- pd = mw->pd;
- ret = mw->device->dealloc_mw(mw);
- if (!ret)
- atomic_dec(&pd->usecnt);
-
- return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_mw);
-
/* "Fast" memory regions */
struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
int (*set_page)(struct ib_mr *, u64))
{
struct scatterlist *sg;
- u64 last_end_dma_addr = 0, last_page_addr = 0;
+ u64 last_end_dma_addr = 0;
unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1);
int i, ret;
mr->length += dma_len;
last_end_dma_addr = end_dma_addr;
- last_page_addr = end_dma_addr & page_mask;
last_page_off = end_dma_addr & ~page_mask;
}
error = l2t_send(tdev, skb, l2e);
if (error < 0)
kfree_skb(skb);
- return error;
+ return error < 0 ? error : 0;
}
int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
error = cxgb3_ofld_send(tdev, skb);
if (error < 0)
kfree_skb(skb);
- return error;
+ return error < 0 ? error : 0;
}
static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
case T3_SEND_WITH_SE_INV:
wc->opcode = IB_WC_SEND;
break;
- case T3_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
-
case T3_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
break;
return ret;
}
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- int npages)
-{
- u32 stag;
- int ret;
-
- /* We could support this... */
- if (npages > mhp->attr.pbl_size)
- return -ENOMEM;
-
- stag = mhp->attr.stag;
- if (cxio_reregister_phys_mem(&rhp->rdev,
- &stag, mhp->attr.pdid,
- mhp->attr.perms,
- mhp->attr.zbva,
- mhp->attr.va_fbo,
- mhp->attr.len,
- shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr))
- return -ENOMEM;
-
- ret = iwch_finish_mem_reg(mhp, stag);
- if (ret)
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-
- return ret;
-}
-
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
{
mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
return cxio_write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (offset << 3), npages);
}
-
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- u64 *iova_start,
- u64 *total_size,
- int *npages,
- int *shift,
- __be64 **page_list)
-{
- u64 mask;
- int i, j, n;
-
- mask = 0;
- *total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return -EINVAL;
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return -EINVAL;
- *total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
- else
- mask |= buffer_list[i].addr & PAGE_MASK;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
- else
- mask |= (buffer_list[i].addr + buffer_list[i].size +
- PAGE_SIZE - 1) & PAGE_MASK;
- }
-
- if (*total_size > 0xFFFFFFFFULL)
- return -ENOMEM;
-
- /* Find largest page shift we can use to cover buffers */
- for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
- if ((1ULL << *shift) & mask)
- break;
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
- buffer_list[0].addr &= ~0ull << *shift;
-
- *npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- *npages += (buffer_list[i].size +
- (1ULL << *shift) - 1) >> *shift;
-
- if (!*npages)
- return -EINVAL;
-
- *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
- if (!*page_list)
- return -ENOMEM;
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
- ++j)
- (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
- ((u64) j << *shift));
-
- PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, (unsigned long long) *iova_start,
- (unsigned long long) mask, *shift, (unsigned long long) *total_size,
- *npages);
-
- return 0;
-
-}
u32 mmid;
PDBG("%s ib_mr %p\n", __func__, ib_mr);
- /* There can be no memory windows */
- if (atomic_read(&ib_mr->usecnt))
- return -EINVAL;
mhp = to_iwch_mr(ib_mr);
kfree(mhp->pages);
return 0;
}
-static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start)
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
{
- __be64 *page_list;
- int shift;
- u64 total_size;
- int npages;
- struct iwch_dev *rhp;
- struct iwch_pd *php;
+ const u64 total_size = 0xffffffff;
+ const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
+ struct iwch_pd *php = to_iwch_pd(pd);
+ struct iwch_dev *rhp = php->rhp;
struct iwch_mr *mhp;
- int ret;
+ __be64 *page_list;
+ int shift = 26, npages, ret, i;
PDBG("%s ib_pd %p\n", __func__, pd);
- php = to_iwch_pd(pd);
- rhp = php->rhp;
+
+ /*
+ * T3 only supports 32 bits of size.
+ */
+ if (sizeof(phys_addr_t) > 4) {
+ pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+ return ERR_PTR(-ENOTSUPP);
+ }
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
mhp->rhp = rhp;
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ npages = (total_size + (1ULL << shift) - 1) >> shift;
+ if (!npages) {
ret = -EINVAL;
goto err;
}
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
- ret = -EINVAL;
+ page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
+ if (!page_list) {
+ ret = -ENOMEM;
goto err;
}
- ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
- &total_size, &npages, &shift, &page_list);
- if (ret)
- goto err;
+ for (i = 0; i < npages; i++)
+ page_list[i] = cpu_to_be64((u64)i << shift);
+
+ PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, mask, shift, total_size, npages);
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = *iova_start;
+ mhp->attr.va_fbo = 0;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) total_size;
err:
kfree(mhp);
return ERR_PTR(ret);
-
-}
-
-static int iwch_reregister_phys_mem(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc, u64 * iova_start)
-{
-
- struct iwch_mr mh, *mhp;
- struct iwch_pd *php;
- struct iwch_dev *rhp;
- __be64 *page_list = NULL;
- int shift = 0;
- u64 total_size;
- int npages = 0;
- int ret;
-
- PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
- /* There can be no memory windows */
- if (atomic_read(&mr->usecnt))
- return -EINVAL;
-
- mhp = to_iwch_mr(mr);
- rhp = mhp->rhp;
- php = to_iwch_pd(mr->pd);
-
- /* make sure we are on the same adapter */
- if (rhp != php->rhp)
- return -EINVAL;
-
- memcpy(&mh, mhp, sizeof *mhp);
-
- if (mr_rereg_mask & IB_MR_REREG_PD)
- php = to_iwch_pd(pd);
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mh.attr.perms = iwch_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- ret = build_phys_page_list(buffer_list, num_phys_buf,
- iova_start,
- &total_size, &npages,
- &shift, &page_list);
- if (ret)
- return ret;
- }
-
- ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
- kfree(page_list);
- if (ret) {
- return ret;
- }
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mhp->attr.pdid = php->pdid;
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- mhp->attr.zbva = 0;
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- }
-
- return 0;
}
-
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
return ERR_PTR(err);
}
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct ib_phys_buf bl;
- u64 kva;
- struct ib_mr *ibmr;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
-
- /*
- * T3 only supports 32 bits of size.
- */
- if (sizeof(phys_addr_t) > 4) {
- pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
- return ERR_PTR(-ENOTSUPP);
- }
- bl.size = 0xffffffff;
- bl.addr = 0;
- kva = 0;
- ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
- return ibmr;
-}
-
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct iwch_dev *rhp;
dev->ibdev.resize_cq = iwch_resize_cq;
dev->ibdev.poll_cq = iwch_poll_cq;
dev->ibdev.get_dma_mr = iwch_get_dma_mr;
- dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
- dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
dev->ibdev.reg_user_mr = iwch_reg_user_mr;
dev->ibdev.dereg_mr = iwch_dereg_mr;
dev->ibdev.alloc_mw = iwch_alloc_mw;
- dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
dev->ibdev.alloc_mr = iwch_alloc_mr;
dev->ibdev.map_mr_sg = iwch_map_mr_sg;
struct ib_send_wr **bad_wr);
int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int iwch_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
int iwch_post_zb_read(struct iwch_ep *ep);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift);
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- int npages);
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
void iwch_free_pbl(struct iwch_mr *mhp);
int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- u64 *iova_start,
- u64 *total_size,
- int *npages,
- int *shift,
- __be64 **page_list);
-
#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
return err;
}
-int iwch_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct iwch_dev *rhp;
- struct iwch_mw *mhp;
- struct iwch_qp *qhp;
- union t3_wr *wqe;
- u32 pbl_addr;
- u8 page_size;
- u32 num_wrs;
- unsigned long flag;
- struct ib_sge sgl;
- int err=0;
- enum t3_wr_flags t3_wr_flags;
- u32 idx;
- struct t3_swsq *sqp;
-
- qhp = to_iwch_qp(qp);
- mhp = to_iwch_mw(mw);
- rhp = qhp->rhp;
-
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
- }
- num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
- qhp->wq.sq_size_log2);
- if (num_wrs == 0) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return -ENOMEM;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
- mw, mw_bind);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
-
- t3_wr_flags = 0;
- if (mw_bind->send_flags & IB_SEND_SIGNALED)
- t3_wr_flags = T3_COMPLETION_FLAG;
-
- sgl.addr = mw_bind->bind_info.addr;
- sgl.lkey = mw_bind->bind_info.mr->lkey;
- sgl.length = mw_bind->bind_info.length;
- wqe->bind.reserved = 0;
- wqe->bind.type = TPT_VATO;
-
- /* TBD: check perms */
- wqe->bind.perms = iwch_ib_to_tpt_bind_access(
- mw_bind->bind_info.mw_access_flags);
- wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
- wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
- wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
- wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
- err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
- if (err) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return err;
- }
- wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
- sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
- sqp->wr_id = mw_bind->wr_id;
- sqp->opcode = T3_BIND_MW;
- sqp->sq_wptr = qhp->wq.sq_wptr;
- sqp->complete = 0;
- sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
- wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
- wqe->bind.mr_pagesz = page_size;
- build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
- sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
- ++(qhp->wq.wptr);
- ++(qhp->wq.sq_wptr);
- spin_unlock_irqrestore(&qhp->lock, flag);
-
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
- return err;
-}
-
static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
u8 *layer_type, u8 *ecode)
{
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
&ep->com.mapped_local_addr;
+ if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
+ err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
+ (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+ if (err)
+ return err;
+ }
c4iw_init_wr_wait(&ep->com.wr_wait);
err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
ep->stid, &sin6->sin6_addr,
0, 0, __func__);
else if (err > 0)
err = net_xmit_errno(err);
- if (err)
+ if (err) {
+ cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
+ (const u32 *)&sin6->sin6_addr.s6_addr, 1);
pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
err, ep->stid,
sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
- else
- cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
- (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+ }
return err;
}
case FW_RI_SEND_WITH_SE:
wc->opcode = IB_WC_SEND;
break;
- case FW_RI_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case FW_RI_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
static int qp_open(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *qpd;
- int ret = 0;
int count = 1;
qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
- if (!qpd) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!qpd)
+ return -ENOMEM;
+
qpd->devp = inode->i_private;
qpd->pos = 0;
qpd->bufsize = count * 128;
qpd->buf = vmalloc(qpd->bufsize);
if (!qpd->buf) {
- ret = -ENOMEM;
- goto err1;
+ kfree(qpd);
+ return -ENOMEM;
}
spin_lock_irq(&qpd->devp->lock);
qpd->buf[qpd->pos++] = 0;
file->private_data = qpd;
- goto out;
-err1:
- kfree(qpd);
-out:
- return ret;
+ return 0;
}
static const struct file_operations qp_debugfs_fops = {
pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
rdev->lldi.ucq_density);
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
rdev->lldi.vr->cq.size);
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
rdev->qpmask = rdev->lldi.udb_density - 1;
rdev->lldi.db_reg, rdev->lldi.gts_reg,
rdev->qpmask, rdev->cqmask);
- if (c4iw_num_stags(rdev) == 0) {
- err = -EINVAL;
- goto err1;
- }
+ if (c4iw_num_stags(rdev) == 0)
+ return -EINVAL;
rdev->stats.pd.total = T4_MAX_NUM_PD;
rdev->stats.stag.total = rdev->lldi.vr->stag.size;
err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
if (err) {
printk(KERN_ERR MOD "error %d initializing resources\n", err);
- goto err1;
+ return err;
}
err = c4iw_pblpool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
- goto err2;
+ goto destroy_resource;
}
err = c4iw_rqtpool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
- goto err3;
+ goto destroy_pblpool;
}
err = c4iw_ocqp_pool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
- goto err4;
+ goto destroy_rqtpool;
}
rdev->status_page = (struct t4_dev_status_page *)
__get_free_page(GFP_KERNEL);
- if (!rdev->status_page) {
- pr_err(MOD "error allocating status page\n");
- goto err4;
- }
+ if (!rdev->status_page)
+ goto destroy_ocqp_pool;
+ rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
+ rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
+ rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
+ rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
if (c4iw_wr_log) {
rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
rdev->status_page->db_off = 0;
return 0;
-err4:
+destroy_ocqp_pool:
+ c4iw_ocqp_pool_destroy(rdev);
+destroy_rqtpool:
c4iw_rqtpool_destroy(rdev);
-err3:
+destroy_pblpool:
c4iw_pblpool_destroy(rdev);
-err2:
+destroy_resource:
c4iw_destroy_resource(&rdev->resource);
-err1:
return err;
}
struct ib_send_wr **bad_wr);
int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
int c4iw_destroy_listen(struct iw_cm_id *cm_id);
u64 length, u64 virt, int acc,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start);
-int c4iw_reregister_phys_mem(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc, u64 *iova_start);
int c4iw_dereg_mr(struct ib_mr *ib_mr);
int c4iw_destroy_cq(struct ib_cq *ib_cq);
struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
return ret;
}
-static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
- struct c4iw_mr *mhp, int shift, int npages)
-{
- u32 stag;
- int ret;
-
- if (npages > mhp->attr.pbl_size)
- return -ENOMEM;
-
- stag = mhp->attr.stag;
- ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
- FW_RI_STAG_NSMR, mhp->attr.perms,
- mhp->attr.mw_bind_enable, mhp->attr.zbva,
- mhp->attr.va_fbo, mhp->attr.len, shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret)
- return ret;
-
- ret = finish_mem_reg(mhp, stag);
- if (ret)
- dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-
- return ret;
-}
-
static int alloc_pbl(struct c4iw_mr *mhp, int npages)
{
mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
return 0;
}
-static int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf, u64 *iova_start,
- u64 *total_size, int *npages,
- int *shift, __be64 **page_list)
-{
- u64 mask;
- int i, j, n;
-
- mask = 0;
- *total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return -EINVAL;
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return -EINVAL;
- *total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
- else
- mask |= buffer_list[i].addr & PAGE_MASK;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
- else
- mask |= (buffer_list[i].addr + buffer_list[i].size +
- PAGE_SIZE - 1) & PAGE_MASK;
- }
-
- if (*total_size > 0xFFFFFFFFULL)
- return -ENOMEM;
-
- /* Find largest page shift we can use to cover buffers */
- for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
- if ((1ULL << *shift) & mask)
- break;
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
- buffer_list[0].addr &= ~0ull << *shift;
-
- *npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- *npages += (buffer_list[i].size +
- (1ULL << *shift) - 1) >> *shift;
-
- if (!*npages)
- return -EINVAL;
-
- *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
- if (!*page_list)
- return -ENOMEM;
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
- ++j)
- (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
- ((u64) j << *shift));
-
- PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, (unsigned long long)*iova_start,
- (unsigned long long)mask, *shift, (unsigned long long)*total_size,
- *npages);
-
- return 0;
-
-}
-
-int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
- struct ib_pd *pd, struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
-
- struct c4iw_mr mh, *mhp;
- struct c4iw_pd *php;
- struct c4iw_dev *rhp;
- __be64 *page_list = NULL;
- int shift = 0;
- u64 total_size;
- int npages;
- int ret;
-
- PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
- /* There can be no memory windows */
- if (atomic_read(&mr->usecnt))
- return -EINVAL;
-
- mhp = to_c4iw_mr(mr);
- rhp = mhp->rhp;
- php = to_c4iw_pd(mr->pd);
-
- /* make sure we are on the same adapter */
- if (rhp != php->rhp)
- return -EINVAL;
-
- memcpy(&mh, mhp, sizeof *mhp);
-
- if (mr_rereg_mask & IB_MR_REREG_PD)
- php = to_c4iw_pd(pd);
- if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
- mh.attr.perms = c4iw_ib_to_tpt_access(acc);
- mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
- IB_ACCESS_MW_BIND;
- }
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- ret = build_phys_page_list(buffer_list, num_phys_buf,
- iova_start,
- &total_size, &npages,
- &shift, &page_list);
- if (ret)
- return ret;
- }
-
- if (mr_exceeds_hw_limits(rhp, total_size)) {
- kfree(page_list);
- return -EINVAL;
- }
-
- ret = reregister_mem(rhp, php, &mh, shift, npages);
- kfree(page_list);
- if (ret)
- return ret;
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mhp->attr.pdid = php->pdid;
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- mhp->attr.zbva = 0;
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- }
-
- return 0;
-}
-
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- __be64 *page_list;
- int shift;
- u64 total_size;
- int npages;
- struct c4iw_dev *rhp;
- struct c4iw_pd *php;
- struct c4iw_mr *mhp;
- int ret;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
- php = to_c4iw_pd(pd);
- rhp = php->rhp;
-
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
- ret = -EINVAL;
- goto err;
- }
-
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
- ret = -EINVAL;
- goto err;
- }
-
- ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
- &total_size, &npages, &shift,
- &page_list);
- if (ret)
- goto err;
-
- if (mr_exceeds_hw_limits(rhp, total_size)) {
- kfree(page_list);
- ret = -EINVAL;
- goto err;
- }
-
- ret = alloc_pbl(mhp, npages);
- if (ret) {
- kfree(page_list);
- goto err;
- }
-
- ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
- npages);
- kfree(page_list);
- if (ret)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
-
- mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
-
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- ret = register_mem(rhp, php, mhp, shift);
- if (ret)
- goto err_pbl;
-
- return &mhp->ibmr;
-
-err_pbl:
- c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
- mhp->attr.pbl_size << 3);
-
-err:
- kfree(mhp);
- return ERR_PTR(ret);
-
-}
-
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
{
struct c4iw_dev *rhp;
u32 mmid;
PDBG("%s ib_mr %p\n", __func__, ib_mr);
- /* There can be no memory windows */
- if (atomic_read(&ib_mr->usecnt))
- return -EINVAL;
mhp = to_c4iw_mr(ib_mr);
rhp = mhp->rhp;
dev->ibdev.resize_cq = c4iw_resize_cq;
dev->ibdev.poll_cq = c4iw_poll_cq;
dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
- dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
- dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
dev->ibdev.dereg_mr = c4iw_dereg_mr;
dev->ibdev.alloc_mw = c4iw_alloc_mw;
- dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
dev->ibdev.alloc_mr = c4iw_alloc_mr;
dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
return err;
}
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
-{
- return -ENOSYS;
-}
-
static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
u8 *ecode)
{
struct t4_dev_status_page {
u8 db_off;
+ u8 pad1;
+ u16 pad2;
+ u32 pad3;
+ u64 qp_start;
+ u64 qp_size;
+ u64 cq_start;
+ u64 cq_size;
};
#ifndef __C4IW_USER_H__
#define __C4IW_USER_H__
-#define C4IW_UVERBS_ABI_VERSION 2
+#define C4IW_UVERBS_ABI_VERSION 3
/*
* Make sure that all structs defined in this file remain laid out so
ah_attr->grh.sgid_index, &sgid, &gid_attr);
if (ret)
return ERR_PTR(ret);
- memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+ eth_zero_addr(ah->av.eth.s_mac);
if (gid_attr.ndev) {
if (is_vlan_dev(gid_attr.ndev))
vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+ ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX4_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX4_OPCODE_LSO:
wc->opcode = IB_WC_LSO;
break;
return dev;
}
-static int mlx4_ib_update_gids(struct gid_entry *gids,
- struct mlx4_ib_dev *ibdev,
- u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
return err;
}
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ struct {
+ union ib_gid gid;
+ __be32 rsrvd1[2];
+ __be16 rsrvd2;
+ u8 type;
+ u8 version;
+ __be32 rsrvd3;
+ } *gid_tbl;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ gid_tbl = mailbox->buf;
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+ memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
+ if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ gid_tbl[i].version = 2;
+ if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
+ gid_tbl[i].type = 1;
+ else
+ memset(&gid_tbl[i].gid, 0, 12);
+ }
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (mlx4_is_bonded(dev))
+ err += mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+ return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
static int mlx4_ib_add_gid(struct ib_device *device,
u8 port_num,
unsigned int index,
port_gid_table = &iboe->gids[port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
- if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+ (port_gid_table->gids[i].gid_type == attr->gid_type)) {
found = i;
break;
}
} else {
*context = port_gid_table->gids[free].ctx;
memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ port_gid_table->gids[free].gid_type = attr->gid_type;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
if (!gids) {
ret = -ENOMEM;
} else {
- for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ gids[i].gid_type = port_gid_table->gids[i].gid_type;
+ }
}
}
spin_unlock_bh(&iboe->lock);
int i;
int ret;
unsigned long flags;
+ struct ib_gid_attr attr;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
return index;
- ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+ ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
if (ret)
return ret;
+ if (attr.ndev)
+ dev_put(attr.ndev);
+
if (!memcmp(&gid, &zgid, sizeof(gid)))
return -EINVAL;
port_gid_table = &iboe->gids[port_num - 1];
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
- if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
+ attr.gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
err = mlx4_ib_query_port(ibdev, port_num, &attr);
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+ if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
- else
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ } else {
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ }
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
- ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
ibdev->ib_dev.uverbs_cmd_mask |=
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
goto err_notif;
}
}
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err) {
+ goto err_notif;
+ }
+ }
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
unsigned tail;
};
+enum {
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+ /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
struct gid_entry {
union ib_gid gid;
+ enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
};
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
return ERR_PTR(err);
}
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct ib_bind_mw_wr wr;
- struct ib_send_wr *bad_wr;
- int ret;
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.opcode = IB_WR_BIND_MW;
- wr.wr.wr_id = mw_bind->wr_id;
- wr.wr.send_flags = mw_bind->send_flags;
- wr.mw = mw;
- wr.bind_info = mw_bind->bind_info;
- wr.rkey = ib_inc_rkey(mw->rkey);
-
- ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
- if (!ret)
- mw->rkey = wr.rkey;
-
- return ret;
-}
-
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
{
struct mlx4_ib_mw *mw = to_mmw(ibmw);
*/
#include <linux/log2.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
u32 send_psn;
struct ib_ud_header ud_header;
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
};
enum {
[IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
- [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
};
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
}
}
}
- return proxy_sqp;
+ if (proxy_sqp)
+ return 1;
+
+ return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
}
/* used for INIT/CLOSE port logic */
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+ qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
if (!qp->sq.wrid)
qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+ qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
if (!qp->rq.wrid)
qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mlx4_ib_qp *qp = NULL;
int err;
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
return ERR_PTR(-EINVAL);
}
- if (init_attr->create_flags &&
- ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
- ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
- MLX4_IB_QP_CREATE_USE_GFP_NOIO |
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
- init_attr->qp_type != IB_QPT_UD) ||
- ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
- init_attr->qp_type > IB_QPT_GSI)))
- return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags) {
+ if (udata && init_attr->create_flags & ~(sup_u_create_flags))
+ return ERR_PTR(-EINVAL);
+
+ if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
+ init_attr->qp_type != IB_QPT_UD) ||
+ (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
+ init_attr->qp_type > IB_QPT_GSI) ||
+ (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
+ init_attr->qp_type != IB_QPT_GSI))
+ return ERR_PTR(-EINVAL);
+ }
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ int sqpn;
+
/* Userspace is not allowed to create special QPs: */
if (udata)
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+ int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+
+ if (res)
+ return ERR_PTR(res);
+ } else {
+ sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
+ }
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- get_sqp_num(to_mdev(pd->device), init_attr),
+ sqpn,
&qp, gfp);
if (err)
return ERR_PTR(err);
qp->port = init_attr->port_num;
- qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
+ init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
break;
}
default:
return &qp->ibqp;
}
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata) {
+ struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+ struct ib_qp *ibqp;
+ struct mlx4_ib_dev *dev = to_mdev(device);
+
+ ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+
+ if (!IS_ERR(ibqp) &&
+ (init_attr->qp_type == IB_QPT_GSI) &&
+ !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
+ struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
+
+ if (is_eth &&
+ dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
+
+ if (IS_ERR(sqp->roce_v2_gsi)) {
+ pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+ sqp->roce_v2_gsi = NULL;
+ } else {
+ sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
+ sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ }
+
+ init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ }
+ }
+ return ibqp;
+}
+
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
return 0;
}
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+
+ if (sqp->roce_v2_gsi)
+ ib_destroy_qp(sqp->roce_v2_gsi);
+ }
+
+ return _mlx4_ib_destroy_qp(qp);
+}
+
static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
return 0;
}
+enum {
+ MLX4_QPC_ROCE_MODE_1 = 0,
+ MLX4_QPC_ROCE_MODE_2 = 2,
+ MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+ switch (gid_type) {
+ case IB_GID_TYPE_ROCE:
+ return MLX4_QPC_ROCE_MODE_1;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ return MLX4_QPC_ROCE_MODE_2;
+ default:
+ return MLX4_QPC_ROCE_MODE_UNDEFINED;
+ }
+}
+
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
mlx4_ib_steer_qp_reg(dev, qp, 1);
steer_qp = 1;
}
+
+ if (ibqp->qp_type == IB_QPT_GSI) {
+ enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
+ IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
+
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
int status = 0;
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ attr->ah_attr.ah_flags & IB_AH_GRH;
- if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
- attr->ah_attr.ah_flags & IB_AH_GRH) {
+ if (is_eth) {
int index = attr->ah_attr.grh.sgid_index;
status = ib_get_cached_gid(ibqp->device, port_num,
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
+
+ if (is_eth &&
+ (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+
+ if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
+ err = -EINVAL;
+ goto out;
+ }
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
+
}
if (attr_mask & IB_QP_TIMEOUT) {
sqd_event = 0;
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->rlkey |= (1 << 4);
+ context->rlkey_roce_mode |= (1 << 4);
/*
* Before passing a kernel QP to the HW, make sure that the
return err;
}
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
return err;
}
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ int ret;
+
+ ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ int err = 0;
+
+ if (sqp->roce_v2_gsi)
+ err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
+ if (err)
+ pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
+ err);
+ }
+ return ret;
+}
+
static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
{
int i;
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
- ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+ ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
return 0;
}
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
- int i;
-
- for (i = ETH_ALEN; i; i--) {
- dst_mac[i - 1] = src_mac & 0xff;
- src_mac >>= 8;
- }
-}
-
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
bool is_eth;
bool is_vlan = false;
bool is_grh;
+ bool is_udp = false;
+ int ip_version = 0;
send_size = 0;
for (i = 0; i < wr->wr.num_sge; ++i)
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
+ struct ib_gid_attr gid_attr;
+
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid,
- NULL);
- if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
- err = -ENOENT;
- if (err)
+ &gid_attr);
+ if (!err) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ if (!memcmp(&sgid, &zgid, sizeof(sgid)))
+ err = -ENOENT;
+ }
+ if (!err) {
+ is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ if (is_udp) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
+ ip_version = 4;
+ else
+ ip_version = 6;
+ is_grh = false;
+ }
+ } else {
return err;
+ }
}
-
if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
is_vlan = 1;
}
}
- ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+ err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
+ ip_version, is_udp, 0, &sqp->ud_header);
+ if (err)
+ return err;
if (!is_eth) {
sqp->ud_header.lrh.service_level =
sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
}
- if (is_grh) {
+ if (is_grh || (ip_version == 6)) {
sqp->ud_header.grh.traffic_class =
(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
sqp->ud_header.grh.flow_label =
ah->av.ib.dgid, 16);
}
+ if (ip_version == 4) {
+ sqp->ud_header.ip4.tos =
+ (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
+ sqp->ud_header.ip4.id = 0;
+ sqp->ud_header.ip4.frag_off = htons(IP_DF);
+ sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
+
+ memcpy(&sqp->ud_header.ip4.saddr,
+ sgid.raw + 12, 4);
+ memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
+ sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
+ }
+
+ if (is_udp) {
+ sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
+ sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
+ sqp->ud_header.udp.csum = 0;
+ }
+
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
if (is_eth) {
struct in6_addr in6;
-
+ u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
+ ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+ (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
+
mlx->sched_prio = cpu_to_be16(pcp);
+ ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
- /* FIXME: cache smac value? */
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
- if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
- u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
- u8 smac[ETH_ALEN];
-
- mlx4_u64_to_smac(smac, mac);
- memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
- } else {
- /* use the src mac of the tunnel */
- memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
- }
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
- sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.eth.type = cpu_to_be16(ether_type);
} else {
- sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
fseg->reserved[1] = 0;
}
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
- struct ib_bind_mw_wr *wr)
-{
- bseg->flags1 =
- convert_access(wr->bind_info.mw_access_flags) &
- cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
- MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
- MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
- bseg->flags2 = 0;
- if (wr->mw->type == IB_MW_TYPE_2)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
- if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
- bseg->new_rkey = cpu_to_be32(wr->rkey);
- bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
- bseg->addr = cpu_to_be64(wr->bind_info.addr);
- bseg->length = cpu_to_be64(wr->bind_info.length);
-}
-
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
{
memset(iseg, 0, sizeof(*iseg));
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(qp);
+
+ if (sqp->roce_v2_gsi) {
+ struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
+ struct ib_gid_attr gid_attr;
+ union ib_gid gid;
+
+ if (!ib_get_cached_gid(ibqp->device,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &gid,
+ &gid_attr)) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ } else {
+ pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
+ ah->av.ib.gid_index);
+ }
+ }
+ }
+
spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
break;
- case IB_WR_BIND_MW:
- ctrl->srcrb_flags |=
- cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
- set_bind_seg(wqe, bind_mw_wr(wr));
- wqe += sizeof(struct mlx4_wqe_bind_seg);
- size += sizeof(struct mlx4_wqe_bind_seg) / 16;
- break;
default:
/* No extra segments required for sends */
break;
if (err)
goto err_mtt;
- srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+ srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
+ GFP_KERNEL | __GFP_NOWARN);
if (!srq->wrid) {
srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
GFP_KERNEL, PAGE_KERNEL);
#include "mlx5_ib.h"
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah)
+static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ah *ah,
+ struct ib_ah_attr *ah_attr,
+ enum rdma_link_layer ll)
{
if (ah_attr->ah_flags & IB_AH_GRH) {
memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
ah->av.tclass = ah_attr->grh.traffic_class;
}
- ah->av.rlid = cpu_to_be16(ah_attr->dlid);
- ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
- ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+ ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ ah->av.udp_sport =
+ mlx5_get_roce_udp_sport(dev,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index);
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+ } else {
+ ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+ ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+ }
return &ah->ibah;
}
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct mlx5_ib_ah *ah;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ enum rdma_link_layer ll;
+
+ ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+
+ if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+ return ERR_PTR(-EINVAL);
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
- return create_ib_ah(ah_attr, ah); /* never fails */
+ return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX5_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX5_OPCODE_UMR:
wc->opcode = get_umr_comp(wq, idx);
break;
static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_ib_qp *qp)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
struct mlx5_ib_srq *srq;
struct mlx5_ib_wq *wq;
} else {
wc->pkey_index = 0;
}
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ switch (wc->sl & 0x3) {
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
+ wc->network_hdr_type = RDMA_NETWORK_IB;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ break;
+ }
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
int eqn;
int err;
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
+ if (check_cq_create_flags(attr->flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
return ERR_PTR(-EINVAL);
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
+ cq->create_flags = attr->flags;
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
cq->cqe_size = cqe_size;
cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+ if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ cqb->ctx.cqe_sz_flags |= (1 << 1);
+
cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
if (err)
#include <linux/io-mapping.h>
#include <linux/sched.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+enum {
+ MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
+};
+
static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device)
+mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
-
- switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
+ switch (port_type_cap) {
case MLX5_CAP_PORT_TYPE_IB:
return IB_LINK_LAYER_INFINIBAND;
case MLX5_CAP_PORT_TYPE_ETH:
}
}
+static enum rdma_link_layer
+mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+
+ return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+}
+
+static int mlx5_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
+ roce.nb);
+
+ if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
+ return NOTIFY_DONE;
+
+ write_lock(&ibdev->roce.netdev_lock);
+ if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+ ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
+ write_unlock(&ibdev->roce.netdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
+ u8 port_num)
+{
+ struct mlx5_ib_dev *ibdev = to_mdev(device);
+ struct net_device *ndev;
+
+ /* Ensure ndev does not disappear before we invoke dev_hold()
+ */
+ read_lock(&ibdev->roce.netdev_lock);
+ ndev = ibdev->roce.netdev;
+ if (ndev)
+ dev_hold(ndev);
+ read_unlock(&ibdev->roce.netdev_lock);
+
+ return ndev;
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct net_device *ndev;
+ enum ib_mtu ndev_ib_mtu;
+ u16 qkey_viol_cntr;
+
+ memset(props, 0, sizeof(*props));
+
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
+
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ props->max_mtu = IB_MTU_4096;
+ props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
+ props->pkey_tbl_len = 1;
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+
+ mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+
+ ndev = mlx5_ib_get_netdev(device, port_num);
+ if (!ndev)
+ return 0;
+
+ if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
+
+ dev_put(ndev);
+
+ props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
+
+ props->active_width = IB_WIDTH_4X; /* TODO */
+ props->active_speed = IB_SPEED_QDR; /* TODO */
+
+ return 0;
+}
+
+static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void *mlx5_addr)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_l3_address);
+ void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_mac_47_32);
+
+ if (!gid)
+ return;
+
+ ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+
+ if (is_vlan_dev(attr->ndev)) {
+ MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
+ MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ }
+
+ switch (attr->gid_type) {
+ case IB_GID_TYPE_IB:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ break;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ break;
+
+ default:
+ WARN_ON(true);
+ }
+
+ if (attr->gid_type != IB_GID_TYPE_IB) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV4);
+ else
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV6);
+ }
+
+ if ((attr->gid_type == IB_GID_TYPE_IB) ||
+ !ipv6_addr_v4mapped((void *)gid))
+ memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
+ else
+ memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+}
+
+static int set_roce_addr(struct ib_device *device, u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ memset(in, 0, sizeof(in));
+
+ ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, gid, attr);
+}
+
+static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, NULL, NULL);
+}
+
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+
+ if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
+ return 0;
+
+ if (!attr.ndev)
+ return 0;
+
+ dev_put(attr.ndev);
+
+ if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return 0;
+
+ return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
return !dev->mdev->issi;
if (mlx5_use_mad_ifc(to_mdev(ibdev)))
return MLX5_VPORT_ACCESS_METHOD_MAD;
- if (mlx5_ib_port_link_layer(ibdev) ==
+ if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET)
return MLX5_VPORT_ACCESS_METHOD_NIC;
return MLX5_VPORT_ACCESS_METHOD_HCA;
}
+static void get_atomic_caps(struct mlx5_ib_dev *dev,
+ struct ib_device_attr *props)
+{
+ u8 tmp;
+ u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+ u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+ u8 atomic_req_8B_endianness_mode =
+ MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+
+ /* Check if HW supports 8 bytes standard atomic operations and capable
+ * of host endianness respond
+ */
+ tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
+ if (((atomic_operations & tmp) == tmp) &&
+ (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
+ (atomic_req_8B_endianness_mode)) {
+ props->atomic_cap = IB_ATOMIC_HCA;
+ } else {
+ props->atomic_cap = IB_ATOMIC_NONE;
+ }
+}
+
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
- if (!err)
- *sys_image_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *sys_image_guid = cpu_to_be64(tmp);
+
+ return err;
+
}
static int mlx5_query_max_pkeys(struct ib_device *ibdev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
- if (!err)
- *node_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *node_guid = cpu_to_be64(tmp);
+
+ return err;
}
struct mlx5_reg_node_desc {
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
- props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
+ props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
- props->atomic_cap = IB_ATOMIC_NONE;
+ get_atomic_caps(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+ props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (MLX5_CAP_GEN(mdev, pg))
props->odp_caps = dev->odp_caps;
#endif
+ if (MLX5_CAP_GEN(mdev, cd))
+ props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
return 0;
}
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_port(ibdev, port, props);
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ return mlx5_query_port_roce(ibdev, port, props);
+
default:
return -EINVAL;
}
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_alloc_ucontext_req_v2 req;
- struct mlx5_ib_alloc_ucontext_resp resp;
+ struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+ struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- memset(&req, 0, sizeof(req));
+ if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
+ return ERR_PTR(-EINVAL);
+
reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
- else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+ else if (reqlen >= sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
ver = 2;
else
return ERR_PTR(-EINVAL);
- err = ib_copy_from_udata(&req, udata, reqlen);
+ err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
if (err)
return ERR_PTR(err);
- if (req.flags || req.reserved)
+ if (req.flags)
return ERR_PTR(-EINVAL);
if (req.total_num_uuars > MLX5_MAX_UUARS)
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
+ if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (reqlen > sizeof(req) &&
+ !ib_is_udata_cleared(udata, sizeof(req),
+ reqlen - sizeof(req)))
+ return ERR_PTR(-EOPNOTSUPP);
+
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp.cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
+ resp.response_length = min(offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length), udata->outlen);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
+ err = mlx5_core_alloc_transport_domain(dev->mdev,
+ &context->tdn);
+ if (err)
+ goto out_uars;
+ }
+
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
- err = ib_copy_to_udata(udata, &resp,
- sizeof(resp) - sizeof(resp.reserved));
+
+ if (field_avail(typeof(resp), cqe_version, udata->outlen))
+ resp.response_length += sizeof(resp.cqe_version);
+
+ if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp.hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg, internal_timer_h) %
+ PAGE_SIZE;
+ resp.response_length += sizeof(resp.hca_core_clock_offset) +
+ sizeof(resp.reserved2) +
+ sizeof(resp.reserved3);
+ }
+
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- goto out_uars;
+ goto out_td;
uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
+ context->cqe_version = resp.cqe_version;
+
return &context->ibucontext;
+out_td:
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
struct mlx5_uuar_info *uuari = &context->uuari;
int i;
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
+ case MLX5_IB_MMAP_CORE_CLOCK:
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ /* Don't expose to user-space information it shouldn't have */
+ if (PAGE_SIZE > 4096)
+ return -EOPNOTSUPP;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ pfn = (dev->mdev->iseg_base +
+ offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+ PAGE_SHIFT;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
+ vma->vm_start,
+ (unsigned long long)pfn << PAGE_SHIFT);
+ break;
+
default:
return -EINVAL;
}
mlx5_ib_dealloc_pd(devr->p0);
}
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+ u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+ u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+ u32 ret = 0;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND)
+ return RDMA_CORE_PORT_IBA_IB;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+ return 0;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+ return 0;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ return ret;
+}
+
static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
+static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ dev->roce.nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier(&dev->roce.nb);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ goto err_unregister_netdevice_notifier;
+
+ return 0;
+
+err_unregister_netdevice_notifier:
+ unregister_netdevice_notifier(&dev->roce.nb);
+ return err;
+}
+
+static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+{
+ mlx5_nic_vport_disable_roce(dev->mdev);
+ unregister_netdevice_notifier(&dev->roce.nb);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
int err;
int i;
- /* don't create IB instance over Eth ports, no RoCE yet! */
- if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
return NULL;
printk_once(KERN_INFO "%s", mlx5_version);
dev->mdev = mdev;
+ rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_dealloc;
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
+ dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
+ dev->ib_dev.add_gid = mlx5_ib_add_gid;
+ dev->ib_dev.del_gid = mlx5_ib_del_gid;
dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
dev->ib_dev.modify_device = mlx5_ib_modify_device;
dev->ib_dev.modify_port = mlx5_ib_modify_port;
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
- if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+ if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ err = mlx5_enable_roce(dev);
+ if (err)
+ goto err_dealloc;
+ }
+
err = create_dev_resources(&dev->devr);
if (err)
- goto err_dealloc;
+ goto err_disable_roce;
err = mlx5_ib_odp_init_one(dev);
if (err)
err_rsrc:
destroy_dev_resources(&dev->devr);
+err_disable_roce:
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
+
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
ib_dealloc_device(&dev->ib_dev);
}
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
#include <linux/types.h>
+#include <linux/mlx5/transobj.h>
#define mlx5_ib_dbg(dev, format, arg...) \
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
__LINE__, current->pid, ##arg)
+#define field_avail(type, fld, sz) (offsetof(type, fld) + \
+ sizeof(((type *)0)->fld) <= (sz))
+#define MLX5_IB_DEFAULT_UIDX 0xffffff
+#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+
enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
MLX5_IB_MMAP_CMD_MASK = 0xff,
enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_REGULAR_PAGE = 0,
- MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
+ MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+ MLX5_IB_MMAP_CORE_CLOCK = 5,
};
enum {
MLX5_MAD_IFC_NET_VIEW = 4,
};
+enum {
+ MLX5_CROSS_CHANNEL_UUAR = 0,
+};
+
+enum {
+ MLX5_CQE_VERSION_V0,
+ MLX5_CQE_VERSION_V1,
+};
+
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
*/
struct mutex db_page_mutex;
struct mlx5_uuar_info uuari;
+ u8 cqe_version;
+ /* Transport Domain number */
+ u32 tdn;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
struct mlx5_pagefault mpfault;
};
+struct mlx5_ib_ubuffer {
+ struct ib_umem *umem;
+ int buf_size;
+ u64 buf_addr;
+};
+
+struct mlx5_ib_qp_base {
+ struct mlx5_ib_qp *container_mibqp;
+ struct mlx5_core_qp mqp;
+ struct mlx5_ib_ubuffer ubuffer;
+};
+
+struct mlx5_ib_qp_trans {
+ struct mlx5_ib_qp_base base;
+ u16 xrcdn;
+ u8 alt_port;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+};
+
struct mlx5_ib_rq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *rq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
u32 tirn;
+ u8 state;
+};
+
+struct mlx5_ib_sq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *sq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
+ u32 tisn;
+ u8 state;
};
struct mlx5_ib_raw_packet_qp {
+ struct mlx5_ib_sq sq;
struct mlx5_ib_rq rq;
};
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
- struct mlx5_core_qp mqp;
- struct mlx5_ib_raw_packet_qp raw_packet_qp;
+ struct mlx5_ib_qp_trans trans_qp;
+ struct mlx5_ib_raw_packet_qp raw_packet_qp;
};
-
struct mlx5_buf buf;
struct mlx5_db db;
struct mlx5_ib_wq rq;
- u32 doorbell_qpn;
u8 sq_signal_bits;
u8 fm_cache;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
struct mlx5_ib_wq sq;
- struct ib_umem *umem;
- int buf_size;
-
/* serialize qp state modifications
*/
struct mutex mutex;
- u16 xrcdn;
u32 flags;
u8 port;
- u8 alt_port;
- u8 atomic_rd_en;
- u8 resp_depth;
u8 state;
- int mlx_type;
int wq_sig;
int scat_cqe;
int max_inline_data;
enum mlx5_ib_qp_flags {
MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
+ MLX5_IB_QP_CROSS_CHANNEL = 1 << 2,
+ MLX5_IB_QP_MANAGED_SEND = 1 << 3,
+ MLX5_IB_QP_MANAGED_RECV = 1 << 4,
};
struct mlx5_umr_wr {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
+ u32 create_flags;
};
struct mlx5_ib_srq {
struct ib_srq *s1;
};
+struct mlx5_roce {
+ /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
+ * netdev pointer
+ */
+ rwlock_t netdev_lock;
+ struct net_device *netdev;
+ struct notifier_block nb;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
+ struct mlx5_roce roce;
MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
int num_ports;
/* serialize update of capability mask
static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
{
- return container_of(mqp, struct mlx5_ib_qp, mqp);
+ return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
}
static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah);
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
struct ib_recv_wr **bad_wr);
void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length);
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base);
struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index);
+
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+static inline u32 check_cq_create_flags(u32 flags)
+{
+ /*
+ * It returns non-zero value for unsupported CQ
+ * create flags, otherwise it returns zero.
+ */
+ return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+}
+
+static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
+ u32 *user_index)
+{
+ if (cqe_version) {
+ if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
+ (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
+ return -EINVAL;
+ *user_index = cmd_uidx;
+ } else {
+ *user_index = MLX5_IB_DEFAULT_UIDX;
+ }
+
+ return 0;
+}
#endif /* MLX5_IB_H */
static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
struct mlx5_ib_pfault *pfault,
- int error) {
+ int error)
+{
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int ret = mlx5_core_page_fault_resume(dev->mdev,
+ qpn,
pfault->mpfault.flags,
error);
if (ret)
- pr_err("Failed to resolve the page fault on QP 0x%x\n",
- qp->mqp.qpn);
+ pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
}
/*
#if defined(DEBUG)
u32 ctrl_wqe_index, ctrl_qpn;
#endif
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
if (ds == 0) {
mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
- wqe_index, qp->mqp.qpn);
+ wqe_index, qpn);
return -EFAULT;
}
MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
if (wqe_index != ctrl_wqe_index) {
mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_wqe_index);
return -EFAULT;
}
ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
MLX5_WQE_CTRL_QPN_SHIFT;
- if (qp->mqp.qpn != ctrl_qpn) {
+ if (qpn != ctrl_qpn) {
mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_qpn);
return -EFAULT;
}
int resume_with_error = 0;
u16 wqe_index = pfault->mpfault.wqe.wqe_index;
int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
}
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
- PAGE_SIZE);
+ PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
- -ret, wqe_index, qp->mqp.qpn);
+ -ret, wqe_index, qpn);
resume_with_error = 1;
goto resolve_page_fault;
}
resolve_page_fault:
mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
- qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+ qpn, resume_with_error,
+ pfault->mpfault.flags);
free_page((unsigned long)buffer);
}
qp->disable_page_faults = 1;
spin_lock_init(&qp->disable_page_faults_lock);
- qp->mqp.pfault_handler = mlx5_ib_pfault_handler;
+ qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
#include <linux/module.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
* Return: the number of bytes copied, or an error code.
*/
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length)
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base)
{
struct ib_device *ibdev = qp->ibqp.device;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
size_t offset;
size_t wq_end;
- struct ib_umem *umem = qp->umem;
+ struct ib_umem *umem = base->ubuffer.umem;
u32 first_copy_length;
int wqe_length;
int ret;
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
struct ib_event event;
- if (type == MLX5_EVENT_TYPE_PATH_MIG)
- to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+ if (type == MLX5_EVENT_TYPE_PATH_MIG) {
+ /* This event is only valid for trans_qps */
+ to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
+ }
if (ibqp->event_handler) {
event.device = ibqp->device;
static int set_user_buf_size(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
- struct mlx5_ib_create_qp *ucmd)
+ struct mlx5_ib_create_qp *ucmd,
+ struct mlx5_ib_qp_base *base,
+ struct ib_qp_init_attr *attr)
{
int desc_sz = 1 << qp->sq.wqe_shift;
return -EINVAL;
}
- qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
- (qp->sq.wqe_cnt << 6);
+ if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+ qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
+ } else {
+ base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << 6);
+ }
return 0;
}
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case IB_QPT_GSI: return MLX5_QP_ST_QP1;
case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
- case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_RAW_PACKET:
+ case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_MAX:
default: return -EINVAL;
}
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
}
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
+ struct ib_pd *pd,
+ unsigned long addr, size_t size,
+ struct ib_umem **umem,
+ int *npages, int *page_shift, int *ncont,
+ u32 *offset)
+{
+ int err;
+
+ *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+ if (IS_ERR(*umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ return PTR_ERR(*umem);
+ }
+
+ mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+
+ err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+
+ mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
+ addr, size, *npages, *page_shift, *ncont, *offset);
+
+ return 0;
+
+err_umem:
+ ib_umem_release(*umem);
+ *umem = NULL;
+
+ return err;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct ib_qp_init_attr *attr,
struct mlx5_create_qp_mbox_in **in,
- struct mlx5_ib_create_qp_resp *resp, int *inlen)
+ struct mlx5_ib_create_qp_resp *resp, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_create_qp ucmd;
+ struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
int page_shift = 0;
int uar_index;
int npages;
/*
* TBD: should come from the verbs when we have the API
*/
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to medium latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+ uuarn = MLX5_CROSS_CHANNEL_UUAR;
+ else {
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to medium latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
- mlx5_ib_warn(dev, "uuar allocation failed\n");
- return uuarn;
+ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to high latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ if (uuarn < 0) {
+ mlx5_ib_warn(dev, "uuar allocation failed\n");
+ return uuarn;
+ }
}
}
}
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- err = set_user_buf_size(dev, qp, &ucmd);
+ err = set_user_buf_size(dev, qp, &ucmd, base, attr);
if (err)
goto err_uuar;
- if (ucmd.buf_addr && qp->buf_size) {
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
- if (IS_ERR(qp->umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- err = PTR_ERR(qp->umem);
+ if (ucmd.buf_addr && ubuffer->buf_size) {
+ ubuffer->buf_addr = ucmd.buf_addr;
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
+ ubuffer->buf_size,
+ &ubuffer->umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
goto err_uuar;
- }
} else {
- qp->umem = NULL;
- }
-
- if (qp->umem) {
- mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
- mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
- ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+ ubuffer->umem = NULL;
}
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
err = -ENOMEM;
goto err_umem;
}
- if (qp->umem)
- mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ if (ubuffer->umem)
+ mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
+ (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
kvfree(*in);
err_umem:
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (ubuffer->umem)
+ ib_umem_release(ubuffer->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
return err;
}
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (base->ubuffer.umem)
+ ib_umem_release(base->ubuffer.umem);
free_uuar(&context->uuari, qp->uuarn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
- struct mlx5_create_qp_mbox_in **in, int *inlen)
+ struct mlx5_create_qp_mbox_in **in, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
qp->rq.offset = 0;
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+ base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf);
+ err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
goto err_uuar;
return 0;
}
+static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, u32 tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(tisc, tisc, transport_domain, tdn);
+
+ return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+}
+
+static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+}
+
+static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, void *qpin,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
+ __be64 *pas;
+ void *in;
+ void *sqc;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ void *wq;
+ int inlen;
+ int err;
+ int page_shift = 0;
+ int npages;
+ int ncont = 0;
+ u32 offset = 0;
+
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
+ &sq->ubuffer.umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_umem;
+ }
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
+ MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+ MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, offset);
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+
+ err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+
+ kvfree(in);
+
+ if (err)
+ goto err_umem;
+
+ return 0;
+
+err_umem:
+ ib_umem_release(sq->ubuffer.umem);
+ sq->ubuffer.umem = NULL;
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+ ib_umem_release(sq->ubuffer.umem);
+}
+
+static int get_rq_pas_size(void *qpc)
+{
+ u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
+ u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
+ u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
+ u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, void *qpin)
+{
+ __be64 *pas;
+ __be64 *qp_pas;
+ void *in;
+ void *rqc;
+ void *wq;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ int inlen;
+ int err;
+ u32 rq_pas_size = get_rq_pas_size(qpc);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, vsd, 1);
+ MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, end_padding_mode,
+ MLX5_GET64(qpc, qpc, end_padding_mode));
+ MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+ MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
+ memcpy(pas, qp_pas, rq_pas_size);
+
+ err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+}
+
+static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, u32 tdn)
+{
+ u32 *in;
+ void *tirc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+ err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+}
+
+static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_mbox_in *in,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct ib_uobject *uobj = pd->uobject;
+ struct ib_ucontext *ucontext = uobj->context;
+ struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+ int err;
+ u32 tdn = mucontext->tdn;
+
+ if (qp->sq.wqe_cnt) {
+ err = create_raw_packet_qp_tis(dev, sq, tdn);
+ if (err)
+ return err;
+
+ err = create_raw_packet_qp_sq(dev, sq, in, pd);
+ if (err)
+ goto err_destroy_tis;
+
+ sq->base.container_mibqp = qp;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = create_raw_packet_qp_rq(dev, rq, in);
+ if (err)
+ goto err_destroy_sq;
+
+ rq->base.container_mibqp = qp;
+
+ err = create_raw_packet_qp_tir(dev, rq, tdn);
+ if (err)
+ goto err_destroy_rq;
+ }
+
+ qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
+ rq->base.mqp.qpn;
+
+ return 0;
+
+err_destroy_rq:
+ destroy_raw_packet_qp_rq(dev, rq);
+err_destroy_sq:
+ if (!qp->sq.wqe_cnt)
+ return err;
+ destroy_raw_packet_qp_sq(dev, sq);
+err_destroy_tis:
+ destroy_raw_packet_qp_tis(dev, sq);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ if (qp->rq.wqe_cnt) {
+ destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_rq(dev, rq);
+ }
+
+ if (qp->sq.wqe_cnt) {
+ destroy_raw_packet_qp_sq(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq);
+ }
+}
+
+static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp)
+{
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ sq->sq = &qp->sq;
+ rq->rq = &qp->rq;
+ sq->doorbell = &qp->db;
+ rq->doorbell = &qp->db;
+}
+
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_resources *devr = &dev->devr;
struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp_base *base;
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
struct mlx5_ib_create_qp ucmd;
int inlen = sizeof(*in);
int err;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ void *qpc;
+
+ base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
- mlx5_ib_odp_create_qp(qp);
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ mlx5_ib_odp_create_qp(qp);
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
}
}
+ if (init_attr->create_flags &
+ (IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) {
+ if (!MLX5_CAP_GEN(mdev, cd)) {
+ mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
+ return -EINVAL;
+ }
+ if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+ qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+ qp->flags |= MLX5_IB_QP_MANAGED_SEND;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+ qp->flags |= MLX5_IB_QP_MANAGED_RECV;
+ }
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
return -EFAULT;
}
+ err = get_qp_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
} else {
ucmd.sq_wqe_count, max_wqes);
return -EINVAL;
}
- err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+ err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
+ &resp, &inlen, base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
} else {
- err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+ err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
+ base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
}
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
int scqe_sz;
in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
- err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(qpc, qpc, user_index, uidx);
+ }
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+ raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+ err = create_raw_packet_qp(dev, qp, in, pd);
+ } else {
+ err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+ }
+
if (err) {
mlx5_ib_dbg(dev, "create qp failed\n");
goto err_create;
}
kvfree(in);
- /* Hardware wants QPN written in big-endian order (after
- * shifting) for send doorbell. Precompute this value to save
- * a little bit when posting sends.
- */
- qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = mlx5_ib_qp_event;
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
return 0;
err_create:
if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(pd, qp);
+ destroy_qp_user(pd, qp, base);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
case IB_QPT_UD:
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
+ case IB_QPT_RAW_PACKET:
*send_cq = to_mcq(qp->ibqp.send_cq);
*recv_cq = to_mcq(qp->ibqp.recv_cq);
break;
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
*send_cq = NULL;
}
}
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation);
+
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_modify_qp_mbox_in *in;
int err;
+ base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return;
if (qp->state != IB_QPS_RESET) {
- mlx5_ib_qp_disable_pagefaults(qp);
- if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
- MLX5_QP_STATE_RST, in, 0, &qp->mqp))
- mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
- qp->mqp.qpn);
+ if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_qp_disable_pagefaults(qp);
+ err = mlx5_core_qp_modify(dev->mdev,
+ MLX5_CMD_OP_2RST_QP, in, 0,
+ &base->mqp);
+ } else {
+ err = modify_raw_packet_qp(dev, qp,
+ MLX5_CMD_OP_2RST_QP);
+ }
+ if (err)
+ mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
+ base->mqp.qpn);
}
get_cqs(qp, &send_cq, &recv_cq);
if (qp->create_type == MLX5_QP_KERNEL) {
mlx5_ib_lock_cqs(send_cq, recv_cq);
- __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
- __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
+ NULL);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
}
- err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
- if (err)
- mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
- kfree(in);
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ destroy_raw_packet_qp(dev, qp);
+ } else {
+ err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+ if (err)
+ mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
+ base->mqp.qpn);
+ }
+ kfree(in);
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(&get_pd(qp)->ibpd, qp);
+ destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
return ERR_PTR(-EINVAL);
}
dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (!pd->uobject) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
+ return ERR_PTR(-EINVAL);
+ } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
}
switch (init_attr->qp_type) {
}
/* fall through */
+ case IB_QPT_RAW_PACKET:
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
else if (is_qp1(init_attr->qp_type))
qp->ibqp.qp_num = 1;
else
- qp->ibqp.qp_num = qp->mqp.qpn;
+ qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
- qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+ qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+ to_mcq(init_attr->recv_cq)->mcq.cqn,
to_mcq(init_attr->send_cq)->mcq.cqn);
- qp->xrcdn = xrcdn;
+ qp->trans_qp.xrcdn = xrcdn;
break;
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
mlx5_ib_dbg(dev, "unsupported qp type %d\n",
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
else
- dest_rd_atomic = qp->resp_depth;
+ dest_rd_atomic = qp->trans_qp.resp_depth;
if (attr_mask & IB_QP_ACCESS_FLAGS)
access_flags = attr->qp_access_flags;
else
- access_flags = qp->atomic_rd_en;
+ access_flags = qp->trans_qp.atomic_rd_en;
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
return rate + MLX5_STAT_RATE_OFFSET;
}
-static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, u8 sl)
+{
+ void *in;
+ void *tisc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+
+ tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+ MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
+
+ err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
int err;
- path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
- path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
-
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = attr->pkey_index;
- path->grh_mlid = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
-
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >=
dev->mdev->port_caps[port - 1].gid_table_len) {
dev->mdev->port_caps[port - 1].gid_table_len);
return -EINVAL;
}
- path->grh_mlid |= 1 << 7;
+ }
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ if (!(ah->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+ memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+ path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
+ ah->grh.sgid_index);
+ path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ } else {
+ path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+ path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
+ 0;
+ path->rlid = cpu_to_be16(ah->dlid);
+ path->grh_mlid = ah->src_path_bits & 0x7f;
+ if (ah->ah_flags & IB_AH_GRH)
+ path->grh_mlid |= 1 << 7;
+ path->dci_cfi_prio_sl = ah->sl & 0xf;
+ }
+
+ if (ah->ah_flags & IB_AH_GRH) {
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
path->tclass_flowlabel =
if (attr_mask & IB_QP_TIMEOUT)
path->ackto_lt = attr->timeout << 3;
- path->sl = ah->sl & 0xf;
+ if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+ return modify_raw_packet_eth_prio(dev->mdev,
+ &qp->raw_packet_qp.sq,
+ ah->sl & 0xf);
return 0;
}
return result;
}
+static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_rq *rq, int new_state)
+{
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, state, new_state);
+
+ err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ rq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, int new_state)
+{
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_sq_in, in, sq_state, sq->state);
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, new_state);
+
+ err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ sq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ int rq_state;
+ int sq_state;
+ int err;
+
+ switch (operation) {
+ case MLX5_CMD_OP_RST2INIT_QP:
+ rq_state = MLX5_RQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RDY;
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ rq_state = MLX5_RQC_STATE_ERR;
+ sq_state = MLX5_SQC_STATE_ERR;
+ break;
+ case MLX5_CMD_OP_2RST_QP:
+ rq_state = MLX5_RQC_STATE_RST;
+ sq_state = MLX5_SQC_STATE_RST;
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ /* Nothing to do here... */
+ return 0;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->sq.wqe_cnt)
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+
+ return 0;
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
+ static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+ [MLX5_QP_STATE_RST] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
+ },
+ [MLX5_QP_STATE_INIT] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
+ [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
+ },
+ [MLX5_QP_STATE_RTR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
+ },
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
+ },
+ [MLX5_QP_STATE_SQD] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ },
+ [MLX5_QP_STATE_SQER] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
+ },
+ [MLX5_QP_STATE_ERR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ }
+ };
+
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
struct mlx5_modify_qp_mbox_in *in;
int sqd_event;
int mlx5_st;
int err;
+ u16 op;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
context->pri_path.port = attr->port_num;
if (attr_mask & IB_QP_AV) {
- err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+ err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
attr_mask, 0, attr);
if (err)
context->pri_path.ackto_lt |= attr->timeout << 3;
if (attr_mask & IB_QP_ALT_PATH) {
- err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
+ &context->alt_path,
attr->alt_port_num, attr_mask, 0, attr);
if (err)
goto out;
* again to RTS, and may cause the driver and the device to get out of
* sync. */
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
- (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+ (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_disable_pagefaults(qp);
+ if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
+ !optab[mlx5_cur][mlx5_new])
+ goto out;
+
+ op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
in->optparam = cpu_to_be32(optpar);
- err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
- to_mlx5_state(new_state), in, sqd_event,
- &qp->mqp);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+ err = modify_raw_packet_qp(dev, qp, op);
+ else
+ err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+ &base->mqp);
if (err)
goto out;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_enable_pagefaults(qp);
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
- qp->atomic_rd_en = attr->qp_access_flags;
+ qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- qp->resp_depth = attr->max_dest_rd_atomic;
+ qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
if (attr_mask & IB_QP_PORT)
qp->port = attr->port_num;
if (attr_mask & IB_QP_ALT_PATH)
- qp->alt_port = attr->alt_port_num;
+ qp->trans_qp.alt_port = attr->alt_port_num;
/*
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
- mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
qp->rq.head = 0;
qp->rq.tail = 0;
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
int port;
+ enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+ if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
+ port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
+ }
+
if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_UNSPECIFIED))
+ ll))
goto out;
if ((attr_mask & IB_QP_PORT) &&
ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
mlx5_opcode | ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
return;
- ib_ah_attr->sl = path->sl & 0xf;
+ ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
}
}
-int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
+static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq,
+ u8 *sq_state)
+{
+ void *out;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+ *sq_state = MLX5_GET(sqc, sqc, state);
+ sq->state = *sq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq,
+ u8 *rq_state)
+{
+ void *out;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_rq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+ *rq_state = MLX5_GET(rqc, rqc, state);
+ rq->state = *rq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
+ struct mlx5_ib_qp *qp, u8 *qp_state)
+{
+ static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
+ [MLX5_RQC_STATE_RST] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD,
+ [MLX5_SQ_STATE_NA] = IB_QPS_RESET,
+ },
+ [MLX5_RQC_STATE_RDY] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
+ },
+ [MLX5_RQC_STATE_ERR] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_ERR,
+ [MLX5_SQ_STATE_NA] = IB_QPS_ERR,
+ },
+ [MLX5_RQ_STATE_NA] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
+ },
+ };
+
+ *qp_state = sqrq_trans[rq_state][sq_state];
+
+ if (*qp_state == MLX5_QP_STATE_BAD) {
+ WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
+ qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
+ qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
+ return -EINVAL;
+ }
+
+ if (*qp_state == MLX5_QP_STATE)
+ *qp_state = qp->state;
+
+ return 0;
+}
+
+static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ u8 *raw_packet_qp_state)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ int err;
+ u8 sq_state = MLX5_SQ_STATE_NA;
+ u8 rq_state = MLX5_RQ_STATE_NA;
+
+ if (qp->sq.wqe_cnt) {
+ err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
+ if (err)
+ return err;
+ }
+
+ return sqrq_state_to_qp_state(sq_state, rq_state, qp,
+ raw_packet_qp_state);
+}
+
+static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_attr *qp_attr)
{
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_query_qp_mbox_out *outb;
struct mlx5_qp_context *context;
int mlx5_state;
int err = 0;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * Wait for any outstanding page faults, in case the user frees memory
- * based upon this query's result.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
- mutex_lock(&qp->mutex);
outb = kzalloc(sizeof(*outb), GFP_KERNEL);
- if (!outb) {
- err = -ENOMEM;
- goto out;
- }
+ if (!outb)
+ return -ENOMEM;
+
context = &outb->ctx;
- err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
+ err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
+ sizeof(*outb));
if (err)
- goto out_free;
+ goto out;
mlx5_state = be32_to_cpu(context->flags) >> 28;
qp->state = to_ib_qp_state(mlx5_state);
- qp_attr->qp_state = qp->state;
qp_attr->path_mtu = context->mtu_msgmax >> 5;
qp_attr->path_mig_state =
to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
+
+out:
+ kfree(outb);
+ return err;
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ int err = 0;
+ u8 raw_packet_qp_state;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /*
+ * Wait for any outstanding page faults, in case the user frees memory
+ * based upon this query's result.
+ */
+ flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
+ if (err)
+ goto out;
+ qp->state = raw_packet_qp_state;
+ qp_attr->port_num = 1;
+ } else {
+ err = query_qp_attr(dev, qp, qp_attr);
+ if (err)
+ goto out;
+ }
+
+ qp_attr->qp_state = qp->state;
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
qp_attr->cap.max_recv_sge = qp->rq.max_gs;
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
-out_free:
- kfree(outb);
-
out:
mutex_unlock(&qp->mutex);
return err;
struct ib_udata *udata, int buf_size, int *inlen)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_ib_create_srq ucmd;
+ struct mlx5_ib_create_srq ucmd = {};
size_t ucmdlen;
+ void *xsrqc;
int err;
int npages;
int page_shift;
int ncont;
u32 offset;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- ucmdlen =
- (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
- sizeof(ucmd)) ? (sizeof(ucmd) -
- sizeof(ucmd.reserved)) : sizeof(ucmd);
+ if (drv_data < 0)
+ return -EINVAL;
+
+ ucmdlen = (drv_data < sizeof(ucmd)) ?
+ drv_data : sizeof(ucmd);
if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
return -EFAULT;
}
- if (ucmdlen == sizeof(ucmd) &&
- ucmd.reserved != 0)
+ if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
+ if (drv_data > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ drv_data - sizeof(ucmd)))
+ return -EINVAL;
+
+ err = get_srq_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
+ }
+
return 0;
err_in:
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
+ void *xsrqc;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
+ }
+
return 0;
err_in:
#include <linux/types.h>
+#include "mlx5_ib.h"
+
enum {
MLX5_QP_FLAG_SIGNATURE = 1 << 0,
MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
__u32 total_num_uuars;
__u32 num_low_latency_uuars;
__u32 flags;
- __u32 reserved;
+ __u32 comp_mask;
+ __u8 max_cqe_version;
+ __u8 reserved0;
+ __u16 reserved1;
+ __u32 reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
struct mlx5_ib_alloc_ucontext_resp {
__u32 max_recv_wr;
__u32 max_srq_recv_wr;
__u16 num_ports;
- __u16 reserved;
+ __u16 reserved1;
+ __u32 comp_mask;
+ __u32 response_length;
+ __u8 cqe_version;
+ __u8 reserved2;
+ __u16 reserved3;
+ __u64 hca_core_clock_offset;
};
struct mlx5_ib_alloc_pd_resp {
__u64 buf_addr;
__u64 db_addr;
__u32 flags;
- __u32 reserved; /* explicit padding (optional on i386) */
+ __u32 reserved0; /* explicit padding (optional on i386) */
+ __u32 uidx;
+ __u32 reserved1;
};
struct mlx5_ib_create_srq_resp {
__u32 rq_wqe_count;
__u32 rq_wqe_shift;
__u32 flags;
+ __u32 uidx;
+ __u32 reserved0;
+ __u64 sq_buf_addr;
};
struct mlx5_ib_create_qp_resp {
__u32 uuar_index;
};
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_qp *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_srq *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
#endif /* MLX5_IB_USER_H */
entry->opcode = IB_WC_FETCH_ADD;
entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
- case MTHCA_OPCODE_BIND_MW:
- entry->opcode = IB_WC_BIND_MW;
- break;
default:
entry->opcode = MTHCA_OPCODE_INVALID;
break;
return &mr->ibmr;
}
-static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start)
-{
- struct mthca_mr *mr;
- u64 *page_list;
- u64 total_size;
- unsigned long mask;
- int shift;
- int npages;
- int err;
- int i, j, n;
-
- mask = buffer_list[0].addr ^ *iova_start;
- total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0)
- mask |= buffer_list[i].addr;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
-
- total_size += buffer_list[i].size;
- }
-
- if (mask & ~PAGE_MASK)
- return ERR_PTR(-EINVAL);
-
- shift = __ffs(mask | 1 << 31);
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
- buffer_list[0].addr &= ~0ull << shift;
-
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
-
- npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-
- if (!npages)
- return &mr->ibmr;
-
- page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
- if (!page_list) {
- kfree(mr);
- return ERR_PTR(-ENOMEM);
- }
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
- ++j)
- page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
-
- mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
- "in PD %x; shift %d, npages %d.\n",
- (unsigned long long) buffer_list[0].addr,
- (unsigned long long) *iova_start,
- to_mpd(pd)->pd_num,
- shift, npages);
-
- err = mthca_mr_alloc_phys(to_mdev(pd->device),
- to_mpd(pd)->pd_num,
- page_list, shift, npages,
- *iova_start, total_size,
- convert_access(acc), mr);
-
- if (err) {
- kfree(page_list);
- kfree(mr);
- return ERR_PTR(err);
- }
-
- kfree(page_list);
- mr->umem = NULL;
-
- return &mr->ibmr;
-}
-
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
- dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
dev->ib_dev.get_port_immutable = mthca_port_immutable;
u16 pkey;
ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
- mthca_ah_grh_present(to_mah(wr->ah)), 0,
+ mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
&sqp->ud_header);
err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
/* External CM API Interface */
/* instance of function pointers for client API */
/* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static struct nes_cm_ops nes_cm_api = {
+static const struct nes_cm_ops nes_cm_api = {
mini_cm_accelerated,
mini_cm_listen,
mini_cm_del_listen,
int passive_state;
struct nes_ib_device *nesibdev;
struct ib_mr *ibmr = NULL;
- struct ib_phys_buf ibphysbuf;
struct nes_pd *nespd;
u64 tagged_offset;
u8 mpa_frame_offset = 0;
u64temp = (unsigned long)nesqp;
nesibdev = nesvnic->nesibdev;
nespd = nesqp->nespd;
- ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
- ibphysbuf.size = buff_len;
tagged_offset = (u64)(unsigned long)*start_buff;
- ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
- &ibphysbuf, 1,
- IB_ACCESS_LOCAL_WRITE,
- &tagged_offset);
- if (!ibmr) {
+ ibmr = nes_reg_phys_mr(&nespd->ibpd,
+ nesqp->ietf_frame_pbase + mpa_frame_offset,
+ buff_len, IB_ACCESS_LOCAL_WRITE,
+ &tagged_offset);
+ if (IS_ERR(ibmr)) {
nes_debug(NES_DBG_CM, "Unable to register memory region"
"for lSMM for cm_node = %p \n",
cm_node);
pci_free_consistent(nesdev->pcidev,
nesqp->private_data_len + nesqp->ietf_frame_size,
nesqp->ietf_frame, nesqp->ietf_frame_pbase);
- return -ENOMEM;
+ return PTR_ERR(ibmr);
}
ibmr->pd = &nespd->ibpd;
struct timer_list tcp_timer;
- struct nes_cm_ops *api;
+ const struct nes_cm_ops *api;
int (*post_event)(struct nes_cm_event *event);
atomic_t events_posted;
if (action == NES_ARP_DELETE) {
nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
nesadapter->arp_table[arp_index].ip_addr = 0;
- memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+ eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
return arp_index;
}
}
-/**
- * nes_bind_mw
- */
-static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
- struct ib_mw_bind *ibmw_bind)
-{
- u64 u64temp;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- /* struct nes_mr *nesmr = to_nesmw(ibmw); */
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_hw_qp_wqe *wqe;
- unsigned long flags = 0;
- u32 head;
- u32 wqe_misc = 0;
- u32 qsize;
-
- if (nesqp->ibqp_state > IB_QPS_RTS)
- return -EINVAL;
-
- spin_lock_irqsave(&nesqp->lock, flags);
-
- head = nesqp->hwqp.sq_head;
- qsize = nesqp->hwqp.sq_tail;
-
- /* Check for SQ overflow */
- if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- spin_unlock_irqrestore(&nesqp->lock, flags);
- return -ENOMEM;
- }
-
- wqe = &nesqp->hwqp.sq_vbase[head];
- /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
- nes_fill_init_qp_wqe(wqe, nesqp, head);
- u64temp = ibmw_bind->wr_id;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
- wqe_misc = NES_IWARP_SQ_OP_BIND;
-
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
- if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
- wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
- if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
- wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
- if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
- wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
- ibmw_bind->bind_info.mr->lkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
- ibmw_bind->bind_info.length);
- wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
- u64temp = (u64)ibmw_bind->bind_info.addr;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
-
- head++;
- if (head >= qsize)
- head = 0;
-
- nesqp->hwqp.sq_head = head;
- barrier();
-
- nes_write32(nesdev->regs+NES_WQE_ALLOC,
- (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- return 0;
-}
-
-
/*
* nes_alloc_fast_mr
*/
/**
* nes_reg_phys_mr
*/
-static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
- struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
- u64 * iova_start)
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+ int acc, u64 *iova_start)
{
u64 region_length;
struct nes_pd *nespd = to_nespd(ib_pd);
struct nes_vpbl vpbl;
struct nes_root_vpbl root_vpbl;
u32 stag;
- u32 i;
unsigned long mask;
u32 stag_index = 0;
u32 next_stag_index = 0;
u32 driver_key = 0;
- u32 root_pbl_index = 0;
- u32 cur_pbl_index = 0;
int err = 0;
int ret = 0;
u16 pbl_count = 0;
next_stag_index >>= 8;
next_stag_index %= nesadapter->max_mr;
- if (num_phys_buf > (1024*512)) {
- return ERR_PTR(-E2BIG);
- }
- if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK)
+ if ((addr ^ *iova_start) & ~PAGE_MASK)
return ERR_PTR(-EINVAL);
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
return ERR_PTR(-ENOMEM);
}
- for (i = 0; i < num_phys_buf; i++) {
+ /* Allocate a 4K buffer for the PBL */
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+ vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_phys_err;
+ }
- if ((i & 0x01FF) == 0) {
- if (root_pbl_index == 1) {
- /* Allocate the root PBL */
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
- &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- return ERR_PTR(-ENOMEM);
- }
- root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- return ERR_PTR(-ENOMEM);
- }
- root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- /* Allocate a 4K buffer for the PBL */
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
- vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
- kfree(nesmr);
- goto reg_phys_err;
- }
- /* Fill in the root table */
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
- }
- mask = !buffer_list[i].size;
- if (i != 0)
- mask |= buffer_list[i].addr;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
-
- if (mask & ~PAGE_MASK) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_phys_err;
- }
+ mask = !size;
- region_length += buffer_list[i].size;
- if ((i != 0) && (single_page)) {
- if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
- single_page = 0;
- }
- vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK);
- vpbl.pbl_vbase[cur_pbl_index++].pa_high =
- cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+ if (mask & ~PAGE_MASK) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_phys_err;
}
+ region_length += size;
+ vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
+ vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
+
stag = stag_index << 8;
stag |= driver_key;
stag += (u32)stag_key;
stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
/* Make the leaf PBL the root if only one PBL */
- if (root_pbl_index == 1) {
- root_vpbl.pbl_pbase = vpbl.pbl_pbase;
- }
+ root_vpbl.pbl_pbase = vpbl.pbl_pbase;
if (single_page) {
pbl_count = 0;
} else {
- pbl_count = root_pbl_index;
+ pbl_count = 1;
}
ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
- buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
+ addr, pbl_count, 1, acc, iova_start,
&nesmr->pbls_used, &nesmr->pbl_4k);
if (ret == 0) {
ibmr = ERR_PTR(-ENOMEM);
}
- reg_phys_err:
- /* free the resources */
- if (root_pbl_index == 1) {
- /* single PBL case */
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
- } else {
- for (i=0; i<root_pbl_index; i++) {
- pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
- root_vpbl.leaf_vpbl[i].pbl_pbase);
- }
- kfree(root_vpbl.leaf_vpbl);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- }
-
+reg_phys_err:
+ /* single PBL case */
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
return ibmr;
}
*/
static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
{
- struct ib_phys_buf bl;
u64 kva = 0;
nes_debug(NES_DBG_MR, "\n");
- bl.size = (u64)0xffffffffffULL;
- bl.addr = 0;
- return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+ return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
}
-
/**
* nes_reg_user_mr
*/
nesibdev->ibdev.destroy_cq = nes_destroy_cq;
nesibdev->ibdev.poll_cq = nes_poll_cq;
nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
- nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
nesibdev->ibdev.dereg_mr = nes_dereg_mr;
nesibdev->ibdev.alloc_mw = nes_alloc_mw;
nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
- nesibdev->ibdev.bind_mw = nes_bind_mw;
nesibdev->ibdev.alloc_mr = nes_alloc_mr;
nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
u8 pau_state;
__u64 nesuqp_addr;
};
+
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+ u64 addr, u64 size, int acc, u64 *iova_start);
+
#endif /* NES_VERBS_H */
if ((pd->uctx) &&
(!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
(!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
- status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
- attr->dmac, &vlan_tag,
- sgid_attr.ndev->ifindex);
+ status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
+ attr->dmac, &vlan_tag,
+ &sgid_attr.ndev->ifindex,
+ NULL);
if (status) {
pr_err("%s(): Failed to resolve dmac from gid."
"status = %d\n", __func__, status);
dev->ibdev.req_notify_cq = ocrdma_arm_cq;
dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
- dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
dev->ibdev.dereg_mr = ocrdma_dereg_mr;
dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
return ERR_PTR(-ENOMEM);
}
-#define MAX_KERNEL_PBE_SIZE 65536
-static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
- int buf_cnt, u32 *pbe_size)
-{
- u64 total_size = 0;
- u64 buf_size = 0;
- int i;
- *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
- *pbe_size = roundup_pow_of_two(*pbe_size);
-
- /* find the smallest PBE size that we can have */
- for (i = 0; i < buf_cnt; i++) {
- /* first addr may not be page aligned, so ignore checking */
- if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
- (buf_list[i].size & ~PAGE_MASK))) {
- return 0;
- }
-
- /* if configured PBE size is greater then the chosen one,
- * reduce the PBE size.
- */
- buf_size = roundup(buf_list[i].size, PAGE_SIZE);
- /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
- buf_size = roundup_pow_of_two(buf_size);
- if (*pbe_size > buf_size)
- *pbe_size = buf_size;
-
- total_size += buf_size;
- }
- *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
- (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
-
- /* num_pbes = total_size / (*pbe_size); this is implemented below. */
-
- return total_size >> ilog2(*pbe_size);
-}
-
-static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
- u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
- struct ocrdma_hw_mr *hwmr)
-{
- int i;
- int idx;
- int pbes_per_buf = 0;
- u64 buf_addr = 0;
- int num_pbes;
- struct ocrdma_pbe *pbe;
- int total_num_pbes = 0;
-
- if (!hwmr->num_pbes)
- return;
-
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
-
- /* go through the OS phy regions & fill hw pbe entries into pbls. */
- for (i = 0; i < ib_buf_cnt; i++) {
- buf_addr = buf_list[i].addr;
- pbes_per_buf =
- roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
- pbe_size;
- hwmr->len += buf_list[i].size;
- /* number of pbes can be more for one OS buf, when
- * buffers are of different sizes.
- * split the ib_buf to one or more pbes.
- */
- for (idx = 0; idx < pbes_per_buf; idx++) {
- /* we program always page aligned addresses,
- * first unaligned address is taken care by fbo.
- */
- if (i == 0) {
- /* for non zero fbo, assign the
- * start of the page.
- */
- pbe->pa_lo =
- cpu_to_le32((u32) (buf_addr & PAGE_MASK));
- pbe->pa_hi =
- cpu_to_le32((u32) upper_32_bits(buf_addr));
- } else {
- pbe->pa_lo =
- cpu_to_le32((u32) (buf_addr & 0xffffffff));
- pbe->pa_hi =
- cpu_to_le32((u32) upper_32_bits(buf_addr));
- }
- buf_addr += pbe_size;
- num_pbes += 1;
- total_num_pbes += 1;
- pbe++;
-
- if (total_num_pbes == hwmr->num_pbes)
- goto mr_tbl_done;
- /* if the pbl is full storing the pbes,
- * move to next pbl.
- */
- if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
- }
- }
- }
-mr_tbl_done:
- return;
-}
-
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
- struct ib_phys_buf *buf_list,
- int buf_cnt, int acc, u64 *iova_start)
-{
- int status = -ENOMEM;
- struct ocrdma_mr *mr;
- struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
- u32 num_pbes;
- u32 pbe_size = 0;
-
- if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
- return ERR_PTR(-EINVAL);
-
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(status);
-
- num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
- if (num_pbes == 0) {
- status = -EINVAL;
- goto pbl_err;
- }
- status = ocrdma_get_pbl_info(dev, mr, num_pbes);
- if (status)
- goto pbl_err;
-
- mr->hwmr.pbe_size = pbe_size;
- mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
- mr->hwmr.va = *iova_start;
- mr->hwmr.local_rd = 1;
- mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
- mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
- mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
- mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
- mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
-
- status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
- if (status)
- goto pbl_err;
- build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
- &mr->hwmr);
- status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
- if (status)
- goto mbx_err;
-
- mr->ibmr.lkey = mr->hwmr.lkey;
- if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
- mr->ibmr.rkey = mr->hwmr.lkey;
- return &mr->ibmr;
-
-mbx_err:
- ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
-pbl_err:
- kfree(mr);
- return ERR_PTR(status);
-}
-
static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
{
struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
int ocrdma_dereg_mr(struct ib_mr *);
struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *);
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
{
int error;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
error = qibfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return error;
}
int ret, i;
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
snprintf(unit, sizeof(unit), "%u", dd->unit);
dir = lookup_one_len(unit, root, strlen(unit));
goto bail;
}
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
}
}
remove_file(dir, "flash");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = simple_rmdir(d_inode(root), dir);
d_delete(dir);
dput(dir);
bail:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
return ret;
}
rval = init_qib_mregion(&mr->mr, pd, count);
if (rval)
goto bail;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
+
rval = qib_alloc_lkey(&mr->mr, 0);
if (rval)
goto bail_mregion;
goto done;
}
-/**
- * qib_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct qib_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- goto bail;
- }
-
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.access_flags = acc;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == QIB_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
/**
* qib_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
32768 /* 1E */
};
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
+static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+ gfp_t gfp)
{
- unsigned long page = get_zeroed_page(GFP_KERNEL);
+ unsigned long page = get_zeroed_page(gfp);
/*
* Free the page if someone raced with us installing it.
* zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
*/
static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
- enum ib_qp_type type, u8 port)
+ enum ib_qp_type type, u8 port, gfp_t gfp)
{
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- get_map_page(qpt, map);
+ get_map_page(qpt, map, gfp);
if (unlikely(!map->page))
break;
}
size_t sz;
size_t sg_list_sz;
struct ib_qp *ret;
+ gfp_t gfp;
+
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
- init_attr->create_flags) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
+ init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+ return ERR_PTR(-EINVAL);
+
+ /* GFP_NOIO is applicable in RC QPs only */
+ if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+ init_attr->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+
+ gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+ GFP_NOIO : GFP_KERNEL;
/* Check receive queue parameters if no SRQ is specified. */
if (!init_attr->srq) {
sz = sizeof(struct qib_sge) *
init_attr->cap.max_send_sge +
sizeof(struct qib_swqe);
- swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+ swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
+ gfp, PAGE_KERNEL);
if (swq == NULL) {
ret = ERR_PTR(-ENOMEM);
goto bail;
} else if (init_attr->cap.max_recv_sge > 1)
sg_list_sz = sizeof(*qp->r_sg_list) *
(init_attr->cap.max_recv_sge - 1);
- qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+ qp = kzalloc(sz + sg_list_sz, gfp);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
goto bail_swq;
}
RCU_INIT_POINTER(qp->next, NULL);
- qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+ qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
if (!qp->s_hdr) {
ret = ERR_PTR(-ENOMEM);
goto bail_qp;
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
sizeof(struct qib_rwqe);
- qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +
- qp->r_rq.size * sz);
+ if (gfp != GFP_NOIO)
+ qp->r_rq.wq = vmalloc_user(
+ sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz);
+ else
+ qp->r_rq.wq = __vmalloc(
+ sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz,
+ gfp, PAGE_KERNEL);
+
if (!qp->r_rq.wq) {
ret = ERR_PTR(-ENOMEM);
goto bail_qp;
dev = to_idev(ibpd->device);
dd = dd_from_dev(dev);
err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
- init_attr->port_num);
+ init_attr->port_num, gfp);
if (err < 0) {
ret = ERR_PTR(err);
vfree(qp->r_rq.wq);
unsigned long flags;
struct qib_lkey_table *rkt;
struct qib_pd *pd;
+ int avoid_schedule = 0;
spin_lock_irqsave(&qp->s_lock, flags);
qp->ibqp.qp_type == IB_QPT_RC) {
if (wqe->length > 0x80000000U)
goto bail_inval_free;
+ if (wqe->length <= qp->pmtu)
+ avoid_schedule = 1;
} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
- qp->port_num - 1)->ibmtu)
+ qp->port_num - 1)->ibmtu) {
goto bail_inval_free;
- else
+ } else {
atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
+ avoid_schedule = 1;
+ }
wqe->ssn = qp->s_ssn++;
qp->s_head = next;
bail_inval:
ret = -EINVAL;
bail:
- if (!ret && !wr->next &&
+ if (!ret && !wr->next && !avoid_schedule &&
!qib_sdma_empty(
dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
qib_schedule_send(qp);
ibdev->poll_cq = qib_poll_cq;
ibdev->req_notify_cq = qib_req_notify_cq;
ibdev->get_dma_mr = qib_get_dma_mr;
- ibdev->reg_phys_mr = qib_reg_phys_mr;
ibdev->reg_user_mr = qib_reg_user_mr;
ibdev->dereg_mr = qib_dereg_mr;
ibdev->alloc_mr = qib_alloc_mr;
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
struct qib_ibdev *dev = to_idev(ibqp->device);
struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
struct qib_mcast *mcast = NULL;
- struct qib_mcast_qp *p, *tmp;
+ struct qib_mcast_qp *p, *tmp, *delp = NULL;
struct rb_node *n;
int last = 0;
int ret;
- if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
- ret = -EINVAL;
- goto bail;
- }
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+ return -EINVAL;
spin_lock_irq(&ibp->lock);
while (1) {
if (n == NULL) {
spin_unlock_irq(&ibp->lock);
- ret = -EINVAL;
- goto bail;
+ return -EINVAL;
}
mcast = rb_entry(n, struct qib_mcast, rb_node);
*/
list_del_rcu(&p->list);
mcast->n_attached--;
+ delp = p;
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
}
spin_unlock_irq(&ibp->lock);
+ /* QP not attached */
+ if (!delp)
+ return -EINVAL;
+ /*
+ * Wait for any list walkers to finish before freeing the
+ * list element.
+ */
+ wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+ qib_mcast_qp_free(delp);
- if (p) {
- /*
- * Wait for any list walkers to finish before freeing the
- * list element.
- */
- wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
- qib_mcast_qp_free(p);
- }
if (last) {
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
dev->n_mcast_grps_allocated--;
spin_unlock_irq(&dev->n_mcast_grps_lock);
}
-
- ret = 0;
-
-bail:
- return ret;
+ return 0;
}
int qib_mcast_tree_empty(struct qib_ibport *ibp)
qp_flow,
&flowinfo_ops);
if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
- usnic_err("Failed to create dbg fs entry for flow %u\n",
- qp_flow->flow->flow_id);
+ usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
+ qp_flow->flow->flow_id,
+ PTR_ERR(qp_flow->dbgfs_dentry));
}
}
if (!status) {
qp_grp->state = new_state;
- usnic_info("Transistioned %u from %s to %s",
+ usnic_info("Transitioned %u from %s to %s",
qp_grp->grp_id,
usnic_ib_qp_grp_state_to_string(old_state),
usnic_ib_qp_grp_state_to_string(new_state));
return res_chunk_list;
out_free_res:
- for (i--; i > 0; i--)
+ for (i--; i >= 0; i--)
usnic_vnic_put_resources(res_chunk_list[i]);
kfree(res_chunk_list);
return ERR_PTR(err);
static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
{
- *fw_ver = (u64) *fw_ver_str;
+ *fw_ver = *((u64 *)fw_ver_str);
}
static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
qp_grp = to_uqp_grp(ibqp);
- /* TODO: Future Support All States */
mutex_lock(&qp_grp->vf->pf->usdev_lock);
- if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
- } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
- } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+ if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
+ /* usnic devices only have one port */
+ status = -EINVAL;
+ goto out_unlock;
+ }
+ if (attr_mask & IB_QP_STATE) {
+ status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
} else {
- usnic_err("Unexpected combination mask: %u state: %u\n",
- attr_mask & IB_QP_STATE, attr->qp_state);
+ usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
status = -EINVAL;
}
+out_unlock:
mutex_unlock(&qp_grp->vf->pf->usdev_lock);
return status;
}
virt_addr, length);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (IS_ERR_OR_NULL(mr))
- return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
access_flags, 0);
struct ib_udata *uhw);
int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
-enum rdma_protocol_type
-usnic_ib_query_protocol(struct ib_device *device, u8 port_num);
int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
struct usnic_vnic_res *res;
int i;
- if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+ if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
return ERR_PTR(-EINVAL);
ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
return ERR_PTR(-ENOMEM);
}
- ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
- if (!ret->res) {
- usnic_err("Failed to allocate resources for %s. Out of memory\n",
- usnic_vnic_pci_name(vnic));
- kfree(ret);
- return ERR_PTR(-ENOMEM);
- }
+ if (cnt > 0) {
+ ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
+ if (!ret->res) {
+ usnic_err("Failed to allocate resources for %s. Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ kfree(ret);
+ return ERR_PTR(-ENOMEM);
+ }
- spin_lock(&vnic->res_lock);
- src = &vnic->chunks[type];
- for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
- res = src->res[i];
- if (!res->owner) {
- src->free_cnt--;
- res->owner = owner;
- ret->res[ret->cnt++] = res;
+ spin_lock(&vnic->res_lock);
+ src = &vnic->chunks[type];
+ for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+ res = src->res[i];
+ if (!res->owner) {
+ src->free_cnt--;
+ res->owner = owner;
+ ret->res[ret->cnt++] = res;
+ }
}
- }
- spin_unlock(&vnic->res_lock);
+ spin_unlock(&vnic->res_lock);
+ }
ret->type = type;
ret->vnic = vnic;
WARN_ON(ret->cnt != cnt);
int i;
struct usnic_vnic *vnic = chunk->vnic;
- spin_lock(&vnic->res_lock);
- while ((i = --chunk->cnt) >= 0) {
- res = chunk->res[i];
- chunk->res[i] = NULL;
- res->owner = NULL;
- vnic->chunks[res->type].free_cnt++;
+ if (chunk->cnt > 0) {
+ spin_lock(&vnic->res_lock);
+ while ((i = --chunk->cnt) >= 0) {
+ res = chunk->res[i];
+ chunk->res[i] = NULL;
+ res->owner = NULL;
+ vnic->chunks[res->type].free_cnt++;
+ }
+ spin_unlock(&vnic->res_lock);
}
- spin_unlock(&vnic->res_lock);
kfree(chunk->res);
kfree(chunk);
void ipoib_mcast_join_task(struct work_struct *work);
void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
-void ipoib_mcast_free(struct ipoib_mcast *mc);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid, int set_qkey);
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
+void ipoib_mcast_remove_list(struct list_head *remove_list);
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+ struct list_head *remove_list);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
static struct ib_send_wr ipoib_cm_rx_drain_wr = {
- .wr_id = IPOIB_CM_RX_DRAIN_WRID,
.opcode = IB_WR_SEND,
};
* error" WC will be immediately generated for each WR we post.
*/
p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+ ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
ipoib_warn(priv, "failed to post drain wr\n");
int ipoib_cm_dev_init(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- int i, ret;
- struct ib_device_attr attr;
+ int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
skb_queue_head_init(&priv->cm.skb_queue);
- ret = ib_query_device(priv->ca, &attr);
- if (ret) {
- printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
- return ret;
- }
-
- ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+ ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
- attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
- ipoib_cm_create_srq(dev, attr.max_srq_sge);
+ max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
+ ipoib_cm_create_srq(dev, max_srq_sge);
if (ipoib_cm_has_srq(dev)) {
- priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
- priv->cm.num_frags = attr.max_srq_sge;
+ priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
+ priv->cm.num_frags = max_srq_sge;
ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
priv->cm.max_cm_mtu, priv->cm.num_frags);
} else {
struct ethtool_drvinfo *drvinfo)
{
struct ipoib_dev_priv *priv = netdev_priv(netdev);
- struct ib_device_attr *attr;
-
- attr = kmalloc(sizeof(*attr), GFP_KERNEL);
- if (attr && !ib_query_device(priv->ca, attr))
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%d.%d.%d", (int)(attr->fw_ver >> 32),
- (int)(attr->fw_ver >> 16) & 0xffff,
- (int)attr->fw_ver & 0xffff);
- kfree(attr);
+
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
+ (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
+ (int)priv->ca->attrs.fw_ver & 0xffff);
strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
sizeof(drvinfo->bus_info));
unsigned long flags;
int i;
LIST_HEAD(remove_list);
- struct ipoib_mcast *mcast, *tmcast;
- struct net_device *dev = priv->dev;
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
lockdep_is_held(&priv->lock))) != NULL) {
/* was the neigh idle for two GC periods */
if (time_after(neigh_obsolete, neigh->alive)) {
- u8 *mgid = neigh->daddr + 4;
- /* Is this multicast ? */
- if (*mgid == 0xff) {
- mcast = __ipoib_mcast_find(dev, mgid);
-
- if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
- list_del(&mcast->list);
- rb_erase(&mcast->rb_node, &priv->multicast_tree);
- list_add_tail(&mcast->list, &remove_list);
- }
- }
+ ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
out_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
}
static void ipoib_reap_neigh(struct work_struct *work)
int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
{
- struct ib_device_attr *device_attr;
- int result = -ENOMEM;
-
- device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
- if (!device_attr) {
- printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
- hca->name, sizeof *device_attr);
- return result;
- }
-
- result = ib_query_device(hca, device_attr);
- if (result) {
- printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
- hca->name, result);
- kfree(device_attr);
- return result;
- }
- priv->hca_caps = device_attr->device_cap_flags;
-
- kfree(device_attr);
+ priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features = NETIF_F_SG |
queue_delayed_work(priv->wq, &priv->mcast_task, 0);
}
-void ipoib_mcast_free(struct ipoib_mcast *mcast)
+static void ipoib_mcast_free(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
return mcast;
}
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
return 0;
}
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret = 0;
return 0;
}
+/*
+ * Check if the multicast group is sendonly. If so remove it from the maps
+ * and add to the remove list
+ */
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+ struct list_head *remove_list)
+{
+ /* Is this multicast ? */
+ if (*mgid == 0xff) {
+ struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
+
+ if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+ list_del(&mcast->list);
+ rb_erase(&mcast->rb_node, &priv->multicast_tree);
+ list_add_tail(&mcast->list, remove_list);
+ }
+ }
+}
+
+void ipoib_mcast_remove_list(struct list_head *remove_list)
+{
+ struct ipoib_mcast *mcast, *tmcast;
+
+ list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
+ ipoib_mcast_leave(mcast->dev, mcast);
+ ipoib_mcast_free(mcast);
+ }
+}
+
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
}
static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(mcast->dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
/*
* Double check that we are still up
ib_conn = &iser_conn->ib_conn;
if (ib_conn->pi_support) {
- u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+ u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
* max fastreg page list length.
*/
shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
- ib_conn->device->dev_attr.max_fast_reg_page_list_len);
+ ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
shost->max_sectors = min_t(unsigned int,
1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
release_wq = alloc_workqueue("release workqueue", 0, 0);
if (!release_wq) {
iser_err("failed to allocate release workqueue\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_alloc_wq;
}
iscsi_iser_scsi_transport = iscsi_register_transport(
if (!iscsi_iser_scsi_transport) {
iser_err("iscsi_register_transport failed\n");
err = -EINVAL;
- goto register_transport_failure;
+ goto err_reg;
}
return 0;
-register_transport_failure:
+err_reg:
+ destroy_workqueue(release_wq);
+err_alloc_wq:
kmem_cache_destroy(ig.desc_cache);
return err;
#include <scsi/scsi_transport_iscsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
+#include <scsi/iser.h>
#include <linux/interrupt.h>
#include <linux/wait.h>
- ISER_MAX_RX_MISC_PDUS) / \
(1 + ISER_INFLIGHT_DATAOUTS))
-#define ISER_WC_BATCH_COUNT 16
#define ISER_SIGNAL_CMD_COUNT 32
-#define ISER_VER 0x10
-#define ISER_WSV 0x08
-#define ISER_RSV 0x04
-
-#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
-#define ISER_BEACON_WRID 0xfffffffffffffffeULL
-
-/**
- * struct iser_hdr - iSER header
- *
- * @flags: flags support (zbva, remote_inv)
- * @rsvd: reserved
- * @write_stag: write rkey
- * @write_va: write virtual address
- * @reaf_stag: read rkey
- * @read_va: read virtual address
- */
-struct iser_hdr {
- u8 flags;
- u8 rsvd[3];
- __be32 write_stag;
- __be64 write_va;
- __be32 read_stag;
- __be64 read_va;
-} __attribute__((packed));
-
-
-#define ISER_ZBVA_NOT_SUPPORTED 0x80
-#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40
-
-struct iser_cm_hdr {
- u8 flags;
- u8 rsvd[3];
-} __packed;
-
/* Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
#define ISER_RECV_DATA_SEG_LEN 128
#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
#define ISER_MAX_WRS 7
/**
- * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ * struct iser_tx_desc - iSER TX descriptor
*
* @iser_header: iser header
* @iscsi_header: iscsi header
* @sig_attrs: Signature attributes
*/
struct iser_tx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
enum iser_desc_type type;
u64 dma_addr;
struct ib_sge tx_sg[2];
int num_sge;
+ struct ib_cqe cqe;
bool mapped;
u8 wr_idx;
union iser_wr {
};
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
- sizeof(u64) + sizeof(struct ib_sge)))
+ sizeof(u64) + sizeof(struct ib_sge) + \
+ sizeof(struct ib_cqe)))
/**
- * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ * struct iser_rx_desc - iSER RX descriptor
*
* @iser_header: iser header
* @iscsi_header: iscsi header
* @pad: for sense data TODO: Modify to maximum sense length supported
*/
struct iser_rx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
char data[ISER_RECV_DATA_SEG_LEN];
u64 dma_addr;
struct ib_sge rx_sg;
+ struct ib_cqe cqe;
char pad[ISER_RX_PAD_SIZE];
+} __packed;
+
+/**
+ * struct iser_login_desc - iSER login descriptor
+ *
+ * @req: pointer to login request buffer
+ * @resp: pointer to login response buffer
+ * @req_dma: DMA address of login request buffer
+ * @rsp_dma: DMA address of login response buffer
+ * @sge: IB sge for login post recv
+ * @cqe: completion handler
+ */
+struct iser_login_desc {
+ void *req;
+ void *rsp;
+ u64 req_dma;
+ u64 rsp_dma;
+ struct ib_sge sge;
+ struct ib_cqe cqe;
} __attribute__((packed));
struct iser_conn;
/**
* struct iser_comp - iSER completion context
*
- * @device: pointer to device handle
* @cq: completion queue
- * @wcs: work completion array
- * @tasklet: Tasklet handle
* @active_qps: Number of active QPs attached
* to completion context
*/
struct iser_comp {
- struct iser_device *device;
struct ib_cq *cq;
- struct ib_wc wcs[ISER_WC_BATCH_COUNT];
- struct tasklet_struct tasklet;
int active_qps;
};
*
* @ib_device: RDMA device
* @pd: Protection Domain for this device
- * @dev_attr: Device attributes container
* @mr: Global DMA memory region
* @event_handler: IB events handle routine
* @ig_list: entry in devices list
* cpus and device max completion vectors
* @comps: Dinamically allocated array of completion handlers
* @reg_ops: Registration ops
+ * @remote_inv_sup: Remote invalidate is supported on this device
*/
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
- struct ib_device_attr dev_attr;
struct ib_mr *mr;
struct ib_event_handler event_handler;
struct list_head ig_list;
int refcount;
int comps_used;
struct iser_comp *comps;
- struct iser_reg_ops *reg_ops;
+ const struct iser_reg_ops *reg_ops;
+ bool remote_inv_sup;
};
#define ISER_CHECK_GUARD 0xc0
* @rx_wr: receive work request for batch posts
* @device: reference to iser device
* @comp: iser completion context
- * @pi_support: Indicate device T10-PI support
- * @beacon: beacon send wr to signal all flush errors were drained
- * @flush_comp: completes when all connection completions consumed
* @fr_pool: connection fast registration poool
+ * @pi_support: Indicate device T10-PI support
+ * @last: last send wr to signal all flush errors were drained
+ * @last_cqe: cqe handler for last wr
+ * @last_comp: completes when all connection completions consumed
*/
struct ib_conn {
struct rdma_cm_id *cma_id;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct iser_device *device;
struct iser_comp *comp;
- bool pi_support;
- struct ib_send_wr beacon;
- struct completion flush_comp;
struct iser_fr_pool fr_pool;
+ bool pi_support;
+ struct ib_send_wr last;
+ struct ib_cqe last_cqe;
+ struct ib_cqe reg_cqe;
+ struct completion last_comp;
};
/**
* @up_completion: connection establishment completed
* (state is ISER_CONN_UP)
* @conn_list: entry in ig conn list
- * @login_buf: login data buffer (stores login parameters)
- * @login_req_buf: login request buffer
- * @login_req_dma: login request buffer dma address
- * @login_resp_buf: login response buffer
- * @login_resp_dma: login response buffer dma address
+ * @login_desc: login descriptor
* @rx_desc_head: head of rx_descs cyclic buffer
* @rx_descs: rx buffers array (cyclic buffer)
* @num_rx_descs: number of rx descriptors
struct completion ib_completion;
struct completion up_completion;
struct list_head conn_list;
-
- char *login_buf;
- char *login_req_buf, *login_resp_buf;
- u64 login_req_dma, login_resp_dma;
+ struct iser_login_desc login_desc;
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
u32 num_rx_descs;
unsigned short scsi_sg_tablesize;
unsigned int scsi_max_sectors;
+ bool snd_w_inv;
};
/**
struct iser_page_vec {
u64 *pages;
- int length;
- int offset;
- int data_size;
+ int npages;
+ struct ib_mr fake_mr;
};
/**
void iser_release_work(struct work_struct *work);
-void iser_rcv_completion(struct iser_rx_desc *desc,
- unsigned long dto_xfer_len,
- struct ib_conn *ib_conn);
-
-void iser_snd_completion(struct iser_tx_desc *desc,
- struct ib_conn *ib_conn);
+void iser_err_comp(struct ib_wc *wc, const char *type);
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_task_rdma_init(struct iscsi_iser_task *task);
enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir dir);
+ enum iser_data_dir dir,
+ bool all_imm);
void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
enum iser_data_dir dir);
return cur_wr;
}
+static inline struct iser_conn *
+to_iser_conn(struct ib_conn *ib_conn)
+{
+ return container_of(ib_conn, struct iser_conn, ib_conn);
+}
+
+static inline struct iser_rx_desc *
+iser_rx(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_rx_desc, cqe);
+}
+
+static inline struct iser_tx_desc *
+iser_tx(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_tx_desc, cqe);
+}
+
+static inline struct iser_login_desc *
+iser_login(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_login_desc, cqe);
+}
+
#endif
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
int err;
- struct iser_hdr *hdr = &iser_task->desc.iser_header;
+ struct iser_ctrl *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
err = iser_dma_map_task_data(iser_task,
return err;
}
- err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
int err;
- struct iser_hdr *hdr = &iser_task->desc.iser_header;
+ struct iser_ctrl *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
return err;
}
- err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT,
+ buf_out->data_len == imm_sz);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
ib_dma_sync_single_for_cpu(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
- memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
+ memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
tx_desc->iser_header.flags = ISER_VER;
tx_desc->num_sge = 1;
}
static void iser_free_login_buf(struct iser_conn *iser_conn)
{
struct iser_device *device = iser_conn->ib_conn.device;
+ struct iser_login_desc *desc = &iser_conn->login_desc;
- if (!iser_conn->login_buf)
+ if (!desc->req)
return;
- if (iser_conn->login_req_dma)
- ib_dma_unmap_single(device->ib_device,
- iser_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+ ib_dma_unmap_single(device->ib_device, desc->req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (iser_conn->login_resp_dma)
- ib_dma_unmap_single(device->ib_device,
- iser_conn->login_resp_dma,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+ ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(iser_conn->login_buf);
+ kfree(desc->req);
+ kfree(desc->rsp);
/* make sure we never redo any unmapping */
- iser_conn->login_req_dma = 0;
- iser_conn->login_resp_dma = 0;
- iser_conn->login_buf = NULL;
+ desc->req = NULL;
+ desc->rsp = NULL;
}
static int iser_alloc_login_buf(struct iser_conn *iser_conn)
{
struct iser_device *device = iser_conn->ib_conn.device;
- int req_err, resp_err;
-
- BUG_ON(device == NULL);
-
- iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
- ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!iser_conn->login_buf)
- goto out_err;
-
- iser_conn->login_req_buf = iser_conn->login_buf;
- iser_conn->login_resp_buf = iser_conn->login_buf +
- ISCSI_DEF_MAX_RECV_SEG_LEN;
-
- iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
- iser_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN,
- DMA_TO_DEVICE);
-
- iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
- iser_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE,
- DMA_FROM_DEVICE);
-
- req_err = ib_dma_mapping_error(device->ib_device,
- iser_conn->login_req_dma);
- resp_err = ib_dma_mapping_error(device->ib_device,
- iser_conn->login_resp_dma);
-
- if (req_err || resp_err) {
- if (req_err)
- iser_conn->login_req_dma = 0;
- if (resp_err)
- iser_conn->login_resp_dma = 0;
- goto free_login_buf;
- }
+ struct iser_login_desc *desc = &iser_conn->login_desc;
+
+ desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
+ if (!desc->req)
+ return -ENOMEM;
+
+ desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device,
+ desc->req_dma))
+ goto free_req;
+
+ desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!desc->rsp)
+ goto unmap_req;
+
+ desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
+ ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device,
+ desc->rsp_dma))
+ goto free_rsp;
+
return 0;
-free_login_buf:
- iser_free_login_buf(iser_conn);
+free_rsp:
+ kfree(desc->rsp);
+unmap_req:
+ ib_dma_unmap_single(device->ib_device, desc->req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
+free_req:
+ kfree(desc->req);
-out_err:
- iser_err("unable to alloc or map login buf\n");
return -ENOMEM;
}
goto rx_desc_dma_map_failed;
rx_desc->dma_addr = dma_addr;
-
+ rx_desc->cqe.done = iser_task_rsp;
rx_sg = &rx_desc->rx_sg;
- rx_sg->addr = rx_desc->dma_addr;
+ rx_sg->addr = rx_desc->dma_addr;
rx_sg->length = ISER_RX_PAYLOAD_SIZE;
- rx_sg->lkey = device->pd->local_dma_lkey;
+ rx_sg->lkey = device->pd->local_dma_lkey;
}
iser_conn->rx_desc_head = 0;
/* build the tx desc regd header and add it to the tx desc dto */
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
+ tx_desc->cqe.done = iser_cmd_comp;
iser_create_send_desc(iser_conn, tx_desc);
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
}
tx_desc->type = ISCSI_TX_DATAOUT;
+ tx_desc->cqe.done = iser_dataout_comp;
tx_desc->iser_header.flags = ISER_VER;
memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
/* build the tx desc regd header and add it to the tx desc dto */
mdesc->type = ISCSI_TX_CONTROL;
+ mdesc->cqe.done = iser_ctrl_comp;
iser_create_send_desc(iser_conn, mdesc);
device = iser_conn->ib_conn.device;
data_seg_len = ntoh24(task->hdr->dlength);
if (data_seg_len > 0) {
+ struct iser_login_desc *desc = &iser_conn->login_desc;
struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
+
if (task != conn->login_task) {
iser_err("data present on non login task!!!\n");
goto send_control_error;
}
- ib_dma_sync_single_for_cpu(device->ib_device,
- iser_conn->login_req_dma, task->data_count,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
+ task->data_count, DMA_TO_DEVICE);
- memcpy(iser_conn->login_req_buf, task->data, task->data_count);
+ memcpy(desc->req, task->data, task->data_count);
- ib_dma_sync_single_for_device(device->ib_device,
- iser_conn->login_req_dma, task->data_count,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
+ task->data_count, DMA_TO_DEVICE);
- tx_dsg->addr = iser_conn->login_req_dma;
- tx_dsg->length = task->data_count;
- tx_dsg->lkey = device->pd->local_dma_lkey;
+ tx_dsg->addr = desc->req_dma;
+ tx_dsg->length = task->data_count;
+ tx_dsg->lkey = device->pd->local_dma_lkey;
mdesc->num_sge = 2;
}
return err;
}
-/**
- * iser_rcv_dto_completion - recv DTO completion
- */
-void iser_rcv_completion(struct iser_rx_desc *rx_desc,
- unsigned long rx_xfer_len,
- struct ib_conn *ib_conn)
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
{
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+ struct iser_login_desc *desc = iser_login(wc->wr_cqe);
struct iscsi_hdr *hdr;
- u64 rx_dma;
- int rx_buflen, outstanding, count, err;
+ char *data;
+ int length;
- /* differentiate between login to all other PDUs */
- if ((char *)rx_desc == iser_conn->login_resp_buf) {
- rx_dma = iser_conn->login_resp_dma;
- rx_buflen = ISER_RX_LOGIN_SIZE;
- } else {
- rx_dma = rx_desc->dma_addr;
- rx_buflen = ISER_RX_PAYLOAD_SIZE;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "login_rsp");
+ return;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+ desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
+ hdr = desc->rsp + sizeof(struct iser_ctrl);
+ data = desc->rsp + ISER_HEADERS_LEN;
+ length = wc->byte_len - ISER_HEADERS_LEN;
+
+ iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
+ hdr->itt, length);
+
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
+
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
+ ib_conn->post_recv_buf_count--;
+}
+
+static inline void
+iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
+{
+ if (likely(rkey == desc->rsc.mr->rkey))
+ desc->rsc.mr_valid = 0;
+ else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+ desc->pi_ctx->sig_mr_valid = 0;
+}
+
+static int
+iser_check_remote_inv(struct iser_conn *iser_conn,
+ struct ib_wc *wc,
+ struct iscsi_hdr *hdr)
+{
+ if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
+ struct iscsi_task *task;
+ u32 rkey = wc->ex.invalidate_rkey;
+
+ iser_dbg("conn %p: remote invalidation for rkey %#x\n",
+ iser_conn, rkey);
+
+ if (unlikely(!iser_conn->snd_w_inv)) {
+ iser_err("conn %p: unexepected remote invalidation, "
+ "terminating connection\n", iser_conn);
+ return -EPROTO;
+ }
+
+ task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
+ if (likely(task)) {
+ struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_fr_desc *desc;
+
+ if (iser_task->dir[ISER_DIR_IN]) {
+ desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
+ iser_inv_desc(desc, rkey);
+ }
+
+ if (iser_task->dir[ISER_DIR_OUT]) {
+ desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
+ iser_inv_desc(desc, rkey);
+ }
+ } else {
+ iser_err("failed to get task for itt=%d\n", hdr->itt);
+ return -EINVAL;
+ }
}
- ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ return 0;
+}
- hdr = &rx_desc->iscsi_header;
+
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+ struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
+ struct iscsi_hdr *hdr;
+ int length;
+ int outstanding, count, err;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "task_rsp");
+ return;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+ desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+ DMA_FROM_DEVICE);
+
+ hdr = &desc->iscsi_header;
+ length = wc->byte_len - ISER_HEADERS_LEN;
iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
- hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
+ hdr->itt, length);
+
+ if (iser_check_remote_inv(iser_conn, wc, hdr)) {
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ return;
+ }
- iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
- rx_xfer_len - ISER_HEADERS_LEN);
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
- ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+ DMA_FROM_DEVICE);
/* decrementing conn->post_recv_buf_count only --after-- freeing the *
* task eliminates the need to worry on tasks which are completed in *
* for the posted rx bufs refcount to become zero handles everything */
ib_conn->post_recv_buf_count--;
- if (rx_dma == iser_conn->login_resp_dma)
- return;
-
outstanding = ib_conn->post_recv_buf_count;
if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
count = min(iser_conn->qp_max_recv_dtos - outstanding,
}
}
-void iser_snd_completion(struct iser_tx_desc *tx_desc,
- struct ib_conn *ib_conn)
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
{
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ iser_err_comp(wc, "command");
+}
+
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
struct iscsi_task *task;
- struct iser_device *device = ib_conn->device;
- if (tx_desc->type == ISCSI_TX_DATAOUT) {
- ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
- ISER_HEADERS_LEN, DMA_TO_DEVICE);
- kmem_cache_free(ig.desc_cache, tx_desc);
- tx_desc = NULL;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "control");
+ return;
}
- if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
- /* this arithmetic is legal by libiscsi dd_data allocation */
- task = (void *) ((long)(void *)tx_desc -
- sizeof(struct iscsi_task));
- if (task->hdr->itt == RESERVED_ITT)
- iscsi_put_task(task);
- }
+ /* this arithmetic is legal by libiscsi dd_data allocation */
+ task = (void *)desc - sizeof(struct iscsi_task);
+ if (task->hdr->itt == RESERVED_ITT)
+ iscsi_put_task(task);
+}
+
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_device *device = ib_conn->device;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ iser_err_comp(wc, "dataout");
+
+ ib_dma_unmap_single(device->ib_device, desc->dma_addr,
+ ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ kmem_cache_free(ig.desc_cache, desc);
+}
+
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+
+ complete(&ib_conn->last_comp);
}
void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
struct iser_reg_resources *rsc,
struct iser_mem_reg *mem_reg);
-static struct iser_reg_ops fastreg_ops = {
+static const struct iser_reg_ops fastreg_ops = {
.alloc_reg_res = iser_alloc_fastreg_pool,
.free_reg_res = iser_free_fastreg_pool,
.reg_mem = iser_fast_reg_mr,
.reg_desc_put = iser_reg_desc_put_fr,
};
-static struct iser_reg_ops fmr_ops = {
+static const struct iser_reg_ops fmr_ops = {
.alloc_reg_res = iser_alloc_fmr_pool,
.free_reg_res = iser_free_fmr_pool,
.reg_mem = iser_fast_reg_fmr,
.reg_desc_put = iser_reg_desc_put_fmr,
};
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ iser_err_comp(wc, "memreg");
+}
+
int iser_assign_reg_ops(struct iser_device *device)
{
- struct ib_device_attr *dev_attr = &device->dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
/* Assign function handles - based on FMR support */
- if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
- device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
+ ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
iser_info("FMR supported, using FMR for registration\n");
device->reg_ops = &fmr_ops;
- } else
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
iser_info("FastReg supported, using FastReg for registration\n");
device->reg_ops = &fastreg_ops;
+ device->remote_inv_sup = iser_always_reg;
} else {
iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
return -1;
{
}
-#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
-
-/**
- * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
- * and returns the length of resulting physical address array (may be less than
- * the original due to possible compaction).
- *
- * we build a "page vec" under the assumption that the SG meets the RDMA
- * alignment requirements. Other then the first and last SG elements, all
- * the "internal" elements can be compacted into a list whose elements are
- * dma addresses of physical pages. The code supports also the weird case
- * where --few fragments of the same page-- are present in the SG as
- * consecutive elements. Also, it handles one entry SG.
- */
-
-static int iser_sg_to_page_vec(struct iser_data_buf *data,
- struct ib_device *ibdev, u64 *pages,
- int *offset, int *data_size)
-{
- struct scatterlist *sg, *sgl = data->sg;
- u64 start_addr, end_addr, page, chunk_start = 0;
- unsigned long total_sz = 0;
- unsigned int dma_len;
- int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
-
- /* compute the offset of first element */
- *offset = (u64) sgl[0].offset & ~MASK_4K;
-
- new_chunk = 1;
- cur_page = 0;
- for_each_sg(sgl, sg, data->dma_nents, i) {
- start_addr = ib_sg_dma_address(ibdev, sg);
- if (new_chunk)
- chunk_start = start_addr;
- dma_len = ib_sg_dma_len(ibdev, sg);
- end_addr = start_addr + dma_len;
- total_sz += dma_len;
-
- /* collect page fragments until aligned or end of SG list */
- if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
- new_chunk = 0;
- continue;
- }
- new_chunk = 1;
-
- /* address of the first page in the contiguous chunk;
- masking relevant for the very first SG entry,
- which might be unaligned */
- page = chunk_start & MASK_4K;
- do {
- pages[cur_page++] = page;
- page += SIZE_4K;
- } while (page < end_addr);
- }
-
- *data_size = total_sz;
- iser_dbg("page_vec->data_size:%d cur_page %d\n",
- *data_size, cur_page);
- return cur_page;
-}
-
static void iser_data_buf_dump(struct iser_data_buf *data,
struct ib_device *ibdev)
{
{
int i;
- iser_err("page vec length %d data size %d\n",
- page_vec->length, page_vec->data_size);
- for (i = 0; i < page_vec->length; i++)
- iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
+ iser_err("page vec npages %d data length %d\n",
+ page_vec->npages, page_vec->fake_mr.length);
+ for (i = 0; i < page_vec->npages; i++)
+ iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
}
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
struct scatterlist *sg = mem->sg;
reg->sge.lkey = device->pd->local_dma_lkey;
- reg->rkey = device->mr->rkey;
+ /*
+ * FIXME: rework the registration code path to differentiate
+ * rkey/lkey use cases
+ */
+ reg->rkey = device->mr ? device->mr->rkey : 0;
reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
return 0;
}
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
+static int iser_set_page(struct ib_mr *mr, u64 addr)
+{
+ struct iser_page_vec *page_vec =
+ container_of(mr, struct iser_page_vec, fake_mr);
+
+ page_vec->pages[page_vec->npages++] = addr;
+
+ return 0;
+}
+
static
int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
struct ib_pool_fmr *fmr;
int ret, plen;
- plen = iser_sg_to_page_vec(mem, device->ib_device,
- page_vec->pages,
- &page_vec->offset,
- &page_vec->data_size);
- page_vec->length = plen;
- if (plen * SIZE_4K < page_vec->data_size) {
+ page_vec->npages = 0;
+ page_vec->fake_mr.page_size = SIZE_4K;
+ plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
+ mem->size, iser_set_page);
+ if (unlikely(plen < mem->size)) {
iser_err("page vec too short to hold this SG\n");
iser_data_buf_dump(mem, device->ib_device);
iser_dump_page_vec(page_vec);
return -EINVAL;
}
- fmr = ib_fmr_pool_map_phys(fmr_pool,
- page_vec->pages,
- page_vec->length,
- page_vec->pages[0]);
+ fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
+ page_vec->npages, page_vec->pages[0]);
if (IS_ERR(fmr)) {
ret = PTR_ERR(fmr);
iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
reg->sge.lkey = fmr->fmr->lkey;
reg->rkey = fmr->fmr->rkey;
- reg->sge.addr = page_vec->pages[0] + page_vec->offset;
- reg->sge.length = page_vec->data_size;
+ reg->sge.addr = page_vec->fake_mr.iova;
+ reg->sge.length = page_vec->fake_mr.length;
reg->mem_h = fmr;
iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
*mask |= ISER_CHECK_GUARD;
}
-static void
-iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+static inline void
+iser_inv_rkey(struct ib_send_wr *inv_wr,
+ struct ib_mr *mr,
+ struct ib_cqe *cqe)
{
- u32 rkey;
-
inv_wr->opcode = IB_WR_LOCAL_INV;
- inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+ inv_wr->wr_cqe = cqe;
inv_wr->ex.invalidate_rkey = mr->rkey;
inv_wr->send_flags = 0;
inv_wr->num_sge = 0;
-
- rkey = ib_inc_rkey(mr->rkey);
- ib_update_fast_reg_key(mr, rkey);
}
static int
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
+ struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_sig_handover_wr *wr;
+ struct ib_mr *mr = pi_ctx->sig_mr;
int ret;
memset(sig_attrs, 0, sizeof(*sig_attrs));
iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
- if (!pi_ctx->sig_mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+ if (pi_ctx->sig_mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
wr->wr.opcode = IB_WR_REG_SIG_MR;
- wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.wr_cqe = cqe;
wr->wr.sg_list = &data_reg->sge;
wr->wr.num_sge = 1;
wr->wr.send_flags = 0;
wr->sig_attrs = sig_attrs;
- wr->sig_mr = pi_ctx->sig_mr;
+ wr->sig_mr = mr;
if (scsi_prot_sg_count(iser_task->sc))
wr->prot = &prot_reg->sge;
else
wr->access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
- pi_ctx->sig_mr_valid = 0;
+ pi_ctx->sig_mr_valid = 1;
- sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
- sig_reg->rkey = pi_ctx->sig_mr->rkey;
+ sig_reg->sge.lkey = mr->lkey;
+ sig_reg->rkey = mr->rkey;
sig_reg->sge.addr = 0;
sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
struct iser_mem_reg *reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
+ struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_mr *mr = rsc->mr;
struct ib_reg_wr *wr;
int n;
- if (!rsc->mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
+ if (rsc->mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
if (unlikely(n != mem->size)) {
wr = reg_wr(iser_tx_next_wr(tx_desc));
wr->wr.opcode = IB_WR_REG_MR;
- wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.wr_cqe = cqe;
wr->wr.send_flags = 0;
wr->wr.num_sge = 0;
wr->mr = mr;
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ;
- rsc->mr_valid = 0;
+ rsc->mr_valid = 1;
reg->sge.lkey = mr->lkey;
reg->rkey = mr->rkey;
}
int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir dir)
+ enum iser_data_dir dir,
+ bool all_imm)
{
struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
bool use_dma_key;
int err;
- use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
- scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+ use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
+ scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
if (!use_dma_key) {
desc = device->reg_ops->reg_desc_get(ib_conn);
#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
ISCSI_ISER_MAX_CONN)
-static int iser_cq_poll_limit = 512;
-
-static void iser_cq_tasklet_fn(unsigned long data);
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-
-static void iser_cq_event_callback(struct ib_event *cause, void *context)
-{
- iser_err("cq event %s (%d)\n",
- ib_event_msg(cause->event), cause->event);
-}
-
static void iser_qp_event_callback(struct ib_event *cause, void *context)
{
iser_err("qp event %s (%d)\n",
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
- struct ib_device_attr *dev_attr = &device->dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
int ret, i, max_cqe;
- ret = ib_query_device(device->ib_device, dev_attr);
- if (ret) {
- pr_warn("Query device failed for %s\n", device->ib_device->name);
- return ret;
- }
-
ret = iser_assign_reg_ops(device);
if (ret)
return ret;
device->comps_used = min_t(int, num_online_cpus(),
- device->ib_device->num_comp_vectors);
+ ib_dev->num_comp_vectors);
device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
GFP_KERNEL);
if (!device->comps)
goto comps_err;
- max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+ max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
- device->comps_used, device->ib_device->name,
- device->ib_device->num_comp_vectors, max_cqe);
+ device->comps_used, ib_dev->name,
+ ib_dev->num_comp_vectors, max_cqe);
- device->pd = ib_alloc_pd(device->ib_device);
+ device->pd = ib_alloc_pd(ib_dev);
if (IS_ERR(device->pd))
goto pd_err;
for (i = 0; i < device->comps_used; i++) {
- struct ib_cq_init_attr cq_attr = {};
struct iser_comp *comp = &device->comps[i];
- comp->device = device;
- cq_attr.cqe = max_cqe;
- cq_attr.comp_vector = i;
- comp->cq = ib_create_cq(device->ib_device,
- iser_cq_callback,
- iser_cq_event_callback,
- (void *)comp,
- &cq_attr);
+ comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
+ IB_POLL_SOFTIRQ);
if (IS_ERR(comp->cq)) {
comp->cq = NULL;
goto cq_err;
}
-
- if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
- goto cq_err;
-
- tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
- (unsigned long)comp);
}
if (!iser_always_reg) {
device->mr = ib_get_dma_mr(device->pd, access);
if (IS_ERR(device->mr))
- goto dma_mr_err;
+ goto cq_err;
}
- INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
- iser_event_handler);
+ INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
+ iser_event_handler);
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
handler_err:
if (device->mr)
ib_dereg_mr(device->mr);
-dma_mr_err:
- for (i = 0; i < device->comps_used; i++)
- tasklet_kill(&device->comps[i].tasklet);
cq_err:
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
if (comp->cq)
- ib_destroy_cq(comp->cq);
+ ib_free_cq(comp->cq);
}
ib_dealloc_pd(device->pd);
pd_err:
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
- tasklet_kill(&comp->tasklet);
- ib_destroy_cq(comp->cq);
+ ib_free_cq(comp->cq);
comp->cq = NULL;
}
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
return ret;
}
- res->mr_valid = 1;
+ res->mr_valid = 0;
return 0;
}
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
- pi_ctx->sig_mr_valid = 1;
+ pi_ctx->sig_mr_valid = 0;
desc->pi_ctx->sig_protected = 0;
return 0;
*/
static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
{
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
struct iser_device *device;
- struct ib_device_attr *dev_attr;
+ struct ib_device *ib_dev;
struct ib_qp_init_attr init_attr;
int ret = -ENOMEM;
int index, min_index = 0;
BUG_ON(ib_conn->device == NULL);
device = ib_conn->device;
- dev_attr = &device->dev_attr;
+ ib_dev = device->ib_device;
memset(&init_attr, 0, sizeof init_attr);
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
} else {
- if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+ if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
} else {
- init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+ init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
iser_conn->max_cmds =
- ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+ ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
iser_dbg("device %s supports max_send_wr %d\n",
- device->ib_device->name, dev_attr->max_qp_wr);
+ device->ib_device->name, ib_dev->attrs.max_qp_wr);
}
}
iser_conn, err);
/* post an indication that all flush errors were consumed */
- err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+ err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
if (err) {
- iser_err("conn %p failed to post beacon", ib_conn);
+ iser_err("conn %p failed to post last wr", ib_conn);
return 1;
}
- wait_for_completion(&ib_conn->flush_comp);
+ wait_for_completion(&ib_conn->last_comp);
}
return 1;
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
- device->dev_attr.max_fast_reg_page_list_len);
+ device->ib_device->attrs.max_fast_reg_page_list_len);
if (sg_tablesize > sup_sg_tablesize) {
sg_tablesize = sup_sg_tablesize;
/* connection T10-PI support */
if (iser_pi_enable) {
- if (!(device->dev_attr.device_cap_flags &
+ if (!(device->ib_device->attrs.device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
goto failure;
memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
+ conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom;
conn_param.initiator_depth = 1;
conn_param.retry_count = 7;
conn_param.rnr_retry_count = 6;
memset(&req_hdr, 0, sizeof(req_hdr));
- req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
- ISER_SEND_W_INV_NOT_SUPPORTED);
- conn_param.private_data = (void *)&req_hdr;
- conn_param.private_data_len = sizeof(struct iser_cm_hdr);
+ req_hdr.flags = ISER_ZBVA_NOT_SUP;
+ if (!device->remote_inv_sup)
+ req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP;
+ conn_param.private_data = (void *)&req_hdr;
+ conn_param.private_data_len = sizeof(struct iser_cm_hdr);
ret = rdma_connect(cma_id, &conn_param);
if (ret) {
iser_connect_error(cma_id);
}
-static void iser_connected_handler(struct rdma_cm_id *cma_id)
+static void iser_connected_handler(struct rdma_cm_id *cma_id,
+ const void *private_data)
{
struct iser_conn *iser_conn;
struct ib_qp_attr attr;
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
+ if (private_data) {
+ u8 flags = *(u8 *)private_data;
+
+ iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
+ }
+
+ iser_info("conn %p: negotiated %s invalidation\n",
+ iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
+
iser_conn->state = ISER_CONN_UP;
complete(&iser_conn->up_completion);
}
iser_route_handler(cma_id);
break;
case RDMA_CM_EVENT_ESTABLISHED:
- iser_connected_handler(cma_id);
+ iser_connected_handler(cma_id, event->param.conn.private_data);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
void iser_conn_init(struct iser_conn *iser_conn)
{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+
iser_conn->state = ISER_CONN_INIT;
- iser_conn->ib_conn.post_recv_buf_count = 0;
- init_completion(&iser_conn->ib_conn.flush_comp);
init_completion(&iser_conn->stop_completion);
init_completion(&iser_conn->ib_completion);
init_completion(&iser_conn->up_completion);
INIT_LIST_HEAD(&iser_conn->conn_list);
mutex_init(&iser_conn->state_mutex);
+
+ ib_conn->post_recv_buf_count = 0;
+ ib_conn->reg_cqe.done = iser_reg_comp;
+ ib_conn->last_cqe.done = iser_last_comp;
+ ib_conn->last.wr_cqe = &ib_conn->last_cqe;
+ ib_conn->last.opcode = IB_WR_SEND;
+ init_completion(&ib_conn->last_comp);
}
/**
iser_conn->state = ISER_CONN_PENDING;
- ib_conn->beacon.wr_id = ISER_BEACON_WRID;
- ib_conn->beacon.opcode = IB_WR_SEND;
-
ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
(void *)iser_conn,
RDMA_PS_TCP, IB_QPT_RC);
int iser_post_recvl(struct iser_conn *iser_conn)
{
- struct ib_recv_wr rx_wr, *rx_wr_failed;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
- struct ib_sge sge;
+ struct iser_login_desc *desc = &iser_conn->login_desc;
+ struct ib_recv_wr wr, *wr_failed;
int ib_ret;
- sge.addr = iser_conn->login_resp_dma;
- sge.length = ISER_RX_LOGIN_SIZE;
- sge.lkey = ib_conn->device->pd->local_dma_lkey;
+ desc->sge.addr = desc->rsp_dma;
+ desc->sge.length = ISER_RX_LOGIN_SIZE;
+ desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
- rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
- rx_wr.sg_list = &sge;
- rx_wr.num_sge = 1;
- rx_wr.next = NULL;
+ desc->cqe.done = iser_login_rsp;
+ wr.wr_cqe = &desc->cqe;
+ wr.sg_list = &desc->sge;
+ wr.num_sge = 1;
+ wr.next = NULL;
ib_conn->post_recv_buf_count++;
- ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
+ ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count--;
}
+
return ib_ret;
}
int iser_post_recvm(struct iser_conn *iser_conn, int count)
{
- struct ib_recv_wr *rx_wr, *rx_wr_failed;
- int i, ib_ret;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
unsigned int my_rx_head = iser_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
+ struct ib_recv_wr *wr, *wr_failed;
+ int i, ib_ret;
- for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &iser_conn->rx_descs[my_rx_head];
- rx_wr->wr_id = (uintptr_t)rx_desc;
- rx_wr->sg_list = &rx_desc->rx_sg;
- rx_wr->num_sge = 1;
- rx_wr->next = rx_wr + 1;
+ for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
+ rx_desc = &iser_conn->rx_descs[my_rx_head];
+ rx_desc->cqe.done = iser_task_rsp;
+ wr->wr_cqe = &rx_desc->cqe;
+ wr->sg_list = &rx_desc->rx_sg;
+ wr->num_sge = 1;
+ wr->next = wr + 1;
my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
}
- rx_wr--;
- rx_wr->next = NULL; /* mark end of work requests list */
+ wr--;
+ wr->next = NULL; /* mark end of work requests list */
ib_conn->post_recv_buf_count += count;
- ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
+ ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count -= count;
} else
iser_conn->rx_desc_head = my_rx_head;
+
return ib_ret;
}
DMA_TO_DEVICE);
wr->next = NULL;
- wr->wr_id = (uintptr_t)tx_desc;
+ wr->wr_cqe = &tx_desc->cqe;
wr->sg_list = tx_desc->tx_sg;
wr->num_sge = tx_desc->num_sge;
wr->opcode = IB_WR_SEND;
return ib_ret;
}
-/**
- * is_iser_tx_desc - Indicate if the completion wr_id
- * is a TX descriptor or not.
- * @iser_conn: iser connection
- * @wr_id: completion WR identifier
- *
- * Since we cannot rely on wc opcode in FLUSH errors
- * we must work around it by checking if the wr_id address
- * falls in the iser connection rx_descs buffer. If so
- * it is an RX descriptor, otherwize it is a TX.
- */
-static inline bool
-is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
-{
- void *start = iser_conn->rx_descs;
- int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
-
- if (wr_id >= start && wr_id < start + len)
- return false;
-
- return true;
-}
-
-/**
- * iser_handle_comp_error() - Handle error completion
- * @ib_conn: connection RDMA resources
- * @wc: work completion
- *
- * Notes: We may handle a FLUSH error completion and in this case
- * we only cleanup in case TX type was DATAOUT. For non-FLUSH
- * error completion we should also notify iscsi layer that
- * connection is failed (in case we passed bind stage).
- */
-static void
-iser_handle_comp_error(struct ib_conn *ib_conn,
- struct ib_wc *wc)
-{
- void *wr_id = (void *)(uintptr_t)wc->wr_id;
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
-
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- if (iser_conn->iscsi_conn)
- iscsi_conn_failure(iser_conn->iscsi_conn,
- ISCSI_ERR_CONN_FAILED);
-
- if (wc->wr_id == ISER_FASTREG_LI_WRID)
- return;
-
- if (is_iser_tx_desc(iser_conn, wr_id)) {
- struct iser_tx_desc *desc = wr_id;
-
- if (desc->type == ISCSI_TX_DATAOUT)
- kmem_cache_free(ig.desc_cache, desc);
- } else {
- ib_conn->post_recv_buf_count--;
- }
-}
-
-/**
- * iser_handle_wc - handle a single work completion
- * @wc: work completion
- *
- * Soft-IRQ context, work completion can be either
- * SEND or RECV, and can turn out successful or
- * with error (or flush error).
- */
-static void iser_handle_wc(struct ib_wc *wc)
-{
- struct ib_conn *ib_conn;
- struct iser_tx_desc *tx_desc;
- struct iser_rx_desc *rx_desc;
-
- ib_conn = wc->qp->qp_context;
- if (likely(wc->status == IB_WC_SUCCESS)) {
- if (wc->opcode == IB_WC_RECV) {
- rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
- iser_rcv_completion(rx_desc, wc->byte_len,
- ib_conn);
- } else
- if (wc->opcode == IB_WC_SEND) {
- tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
- iser_snd_completion(tx_desc, ib_conn);
- } else {
- iser_err("Unknown wc opcode %d\n", wc->opcode);
- }
- } else {
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- iser_err("%s (%d): wr id %llx vend_err %x\n",
- ib_wc_status_msg(wc->status), wc->status,
- wc->wr_id, wc->vendor_err);
- else
- iser_dbg("%s (%d): wr id %llx\n",
- ib_wc_status_msg(wc->status), wc->status,
- wc->wr_id);
-
- if (wc->wr_id == ISER_BEACON_WRID)
- /* all flush errors were consumed */
- complete(&ib_conn->flush_comp);
- else
- iser_handle_comp_error(ib_conn, wc);
- }
-}
-
-/**
- * iser_cq_tasklet_fn - iSER completion polling loop
- * @data: iSER completion context
- *
- * Soft-IRQ context, polling connection CQ until
- * either CQ was empty or we exausted polling budget
- */
-static void iser_cq_tasklet_fn(unsigned long data)
-{
- struct iser_comp *comp = (struct iser_comp *)data;
- struct ib_cq *cq = comp->cq;
- struct ib_wc *const wcs = comp->wcs;
- int i, n, completed = 0;
-
- while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
- for (i = 0; i < n; i++)
- iser_handle_wc(&wcs[i]);
-
- completed += n;
- if (completed >= iser_cq_poll_limit)
- break;
- }
-
- /*
- * It is assumed here that arming CQ only once its empty
- * would not cause interrupts to be missed.
- */
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
- iser_dbg("got %d completions\n", completed);
-}
-
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
-{
- struct iser_comp *comp = cq_context;
-
- tasklet_schedule(&comp->tasklet);
-}
-
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector)
{
/* Not alot we can do here, return ambiguous guard error */
return 0x1;
}
+
+void iser_err_comp(struct ib_wc *wc, const char *type)
+{
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
+
+ iser_err("%s failure: %s (%d) vend_err %x\n", type,
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->vendor_err);
+
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ } else {
+ iser_dbg("%s failure: %s (%d)\n", type,
+ ib_wc_status_msg(wc->status), wc->status);
+ }
+}
#include <target/iscsi/iscsi_transport.h>
#include <linux/semaphore.h>
-#include "isert_proto.h"
#include "ib_isert.h"
#define ISERT_MAX_CONN 8
}
}
-static int
-isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
-{
- int ret;
-
- ret = ib_query_device(ib_dev, devattr);
- if (ret) {
- isert_err("ib_query_device() failed: %d\n", ret);
- return ret;
- }
- isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
- isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
-
- return 0;
-}
-
static struct isert_comp *
isert_comp_get(struct isert_conn *isert_conn)
{
attr.recv_cq = comp->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
- attr.cap.max_send_sge = device->dev_attr.max_sge;
- isert_conn->max_sge = min(device->dev_attr.max_sge,
- device->dev_attr.max_sge_rd);
+ attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+ isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
+ device->ib_device->attrs.max_sge_rd);
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
}
static int
-isert_alloc_comps(struct isert_device *device,
- struct ib_device_attr *attr)
+isert_alloc_comps(struct isert_device *device)
{
int i, max_cqe, ret = 0;
return -ENOMEM;
}
- max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+ max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
for (i = 0; i < device->comps_used; i++) {
struct ib_cq_init_attr cq_attr = {};
static int
isert_create_device_ib_res(struct isert_device *device)
{
- struct ib_device_attr *dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
int ret;
- dev_attr = &device->dev_attr;
- ret = isert_query_device(device->ib_device, dev_attr);
- if (ret)
- goto out;
+ isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+ isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
/* asign function handlers */
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
- dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+ if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+ ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
device->use_fastreg = 1;
device->reg_rdma_mem = isert_reg_rdma;
device->unreg_rdma_mem = isert_unreg_rdma;
device->unreg_rdma_mem = isert_unmap_cmd;
}
- ret = isert_alloc_comps(device, dev_attr);
+ ret = isert_alloc_comps(device);
if (ret)
goto out;
- device->pd = ib_alloc_pd(device->ib_device);
+ device->pd = ib_alloc_pd(ib_dev);
if (IS_ERR(device->pd)) {
ret = PTR_ERR(device->pd);
isert_err("failed to allocate pd, device %p, ret=%d\n",
}
/* Check signature cap */
- device->pi_capable = dev_attr->device_cap_flags &
+ device->pi_capable = ib_dev->attrs.device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
return 0;
return ret;
}
+static void
+isert_set_nego_params(struct isert_conn *isert_conn,
+ struct rdma_conn_param *param)
+{
+ struct ib_device_attr *attr = &isert_conn->device->ib_device->attrs;
+
+ /* Set max inflight RDMA READ requests */
+ isert_conn->initiator_depth = min_t(u8, param->initiator_depth,
+ attr->max_qp_init_rd_atom);
+ isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+
+ if (param->private_data) {
+ u8 flags = *(u8 *)param->private_data;
+
+ /*
+ * use remote invalidation if the both initiator
+ * and the HCA support it
+ */
+ isert_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP) &&
+ (attr->device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (isert_conn->snd_w_inv)
+ isert_info("Using remote invalidation\n");
+ }
+}
+
static int
isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
}
isert_conn->device = device;
- /* Set max inflight RDMA READ requests */
- isert_conn->initiator_depth = min_t(u8,
- event->param.conn.initiator_depth,
- device->dev_attr.max_qp_init_rd_atom);
- isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+ isert_set_nego_params(isert_conn, &event->param.conn);
ret = isert_conn_setup_qp(isert_conn, cma_id);
if (ret)
ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
- memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
- tx_desc->iser_header.flags = ISER_VER;
+ memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
+ tx_desc->iser_header.flags = ISCSI_CTRL;
tx_desc->num_sge = 1;
tx_desc->isert_cmd = isert_cmd;
isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
- send_wr->opcode = IB_WR_SEND;
+
+ if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = isert_cmd->inv_rkey;
+ } else {
+ send_wr->opcode = IB_WR_SEND;
+ }
+
send_wr->sg_list = &tx_desc->tx_sg[0];
send_wr->num_sge = isert_cmd->tx_desc.num_sge;
send_wr->send_flags = IB_SEND_SIGNALED;
isert_cmd->read_va = read_va;
isert_cmd->write_stag = write_stag;
isert_cmd->write_va = write_va;
+ isert_cmd->inv_rkey = read_stag ? read_stag : write_stag;
ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
rx_desc, (unsigned char *)hdr);
static void
isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
{
- struct iser_hdr *iser_hdr = &rx_desc->iser_header;
+ struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
uint64_t read_va = 0, write_va = 0;
uint32_t read_stag = 0, write_stag = 0;
- switch (iser_hdr->flags & 0xF0) {
+ switch (iser_ctrl->flags & 0xF0) {
case ISCSI_CTRL:
- if (iser_hdr->flags & ISER_RSV) {
- read_stag = be32_to_cpu(iser_hdr->read_stag);
- read_va = be64_to_cpu(iser_hdr->read_va);
+ if (iser_ctrl->flags & ISER_RSV) {
+ read_stag = be32_to_cpu(iser_ctrl->read_stag);
+ read_va = be64_to_cpu(iser_ctrl->read_va);
isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
read_stag, (unsigned long long)read_va);
}
- if (iser_hdr->flags & ISER_WSV) {
- write_stag = be32_to_cpu(iser_hdr->write_stag);
- write_va = be64_to_cpu(iser_hdr->write_va);
+ if (iser_ctrl->flags & ISER_WSV) {
+ write_stag = be32_to_cpu(iser_ctrl->write_stag);
+ write_va = be64_to_cpu(iser_ctrl->write_va);
isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
write_stag, (unsigned long long)write_va);
}
isert_err("iSER Hello message\n");
break;
default:
- isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+ isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_ctrl->flags);
break;
}
struct rdma_cm_id *cm_id = isert_conn->cm_id;
struct rdma_conn_param cp;
int ret;
+ struct iser_cm_hdr rsp_hdr;
memset(&cp, 0, sizeof(struct rdma_conn_param));
cp.initiator_depth = isert_conn->initiator_depth;
cp.retry_count = 7;
cp.rnr_retry_count = 7;
+ memset(&rsp_hdr, 0, sizeof(rsp_hdr));
+ rsp_hdr.flags = ISERT_ZBVA_NOT_USED;
+ if (!isert_conn->snd_w_inv)
+ rsp_hdr.flags = rsp_hdr.flags | ISERT_SEND_W_INV_NOT_USED;
+ cp.private_data = (void *)&rsp_hdr;
+ cp.private_data_len = sizeof(rsp_hdr);
+
ret = rdma_accept(cm_id, &cp);
if (ret) {
isert_err("rdma_accept() failed with: %d\n", ret);
#include <linux/in6.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+#include <scsi/iser.h>
+
#define DRV_NAME "isert"
#define PFX DRV_NAME ": "
#define isert_err(fmt, arg...) \
pr_err(PFX "%s: " fmt, __func__ , ## arg)
+/* Constant PDU lengths calculations */
+#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + \
+ sizeof(struct iscsi_hdr))
+#define ISER_RECV_DATA_SEG_LEN 8192
+#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
+#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
+
+/* QP settings */
+/* Maximal bounds on received asynchronous PDUs */
+#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
+
+#define ISERT_MAX_RX_MISC_PDUS 6 /*
+ * NOOP_OUT(2), TEXT(1),
+ * SCSI_TMFUNC(2), LOGOUT(1)
+ */
+
+#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
+
+#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+
+#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+
+#define ISERT_INFLIGHT_DATAOUTS 8
+
+#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+ (1 + ISERT_INFLIGHT_DATAOUTS) + \
+ ISERT_MAX_TX_MISC_PDUS + \
+ ISERT_MAX_RX_MISC_PDUS)
+
+#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
+ (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
+
#define ISCSI_ISER_SG_TABLESIZE 256
#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
#define ISER_BEACON_WRID 0xfffffffffffffffeULL
};
struct iser_rx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
char data[ISER_RECV_DATA_SEG_LEN];
u64 dma_addr;
} __packed;
struct iser_tx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
enum isert_desc_type type;
u64 dma_addr;
uint32_t write_stag;
uint64_t read_va;
uint64_t write_va;
+ uint32_t inv_rkey;
u64 pdu_buf_dma;
u32 pdu_buf_len;
struct isert_conn *conn;
struct work_struct release_work;
struct ib_recv_wr beacon;
bool logout_posted;
+ bool snd_w_inv;
};
#define ISERT_MAX_CQ 64
struct isert_comp *comps;
int comps_used;
struct list_head dev_node;
- struct ib_device_attr dev_attr;
int (*reg_rdma_mem)(struct iscsi_conn *conn,
struct iscsi_cmd *cmd,
struct isert_rdma_wr *wr);
+++ /dev/null
-/* From iscsi_iser.h */
-
-struct iser_hdr {
- u8 flags;
- u8 rsvd[3];
- __be32 write_stag; /* write rkey */
- __be64 write_va;
- __be32 read_stag; /* read rkey */
- __be64 read_va;
-} __packed;
-
-/*Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
-
-#define ISER_RECV_DATA_SEG_LEN 8192
-#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
-#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
-
-/* QP settings */
-/* Maximal bounds on received asynchronous PDUs */
-#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
-
-#define ISERT_MAX_RX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
- * SCSI_TMFUNC(2), LOGOUT(1) */
-
-#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
-
-#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
-
-#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-
-#define ISERT_INFLIGHT_DATAOUTS 8
-
-#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
- (1 + ISERT_INFLIGHT_DATAOUTS) + \
- ISERT_MAX_TX_MISC_PDUS + \
- ISERT_MAX_RX_MISC_PDUS)
-
-#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
- (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
-
-#define ISER_VER 0x10
-#define ISER_WSV 0x08
-#define ISER_RSV 0x04
-#define ISCSI_CTRL 0x10
-#define ISER_HELLO 0x20
-#define ISER_HELLORPLY 0x30
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device, void *client_data);
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+ const char *opname);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
dev->max_pages_per_mr);
}
+static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srp_rdma_ch *ch = cq->cq_context;
+
+ complete(&ch->done);
+}
+
+static struct ib_cqe srp_drain_cqe = {
+ .done = srp_drain_done,
+};
+
/**
* srp_destroy_qp() - destroy an RDMA queue pair
* @ch: SRP RDMA channel.
static void srp_destroy_qp(struct srp_rdma_ch *ch)
{
static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
- static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
+ static struct ib_recv_wr wr = { 0 };
struct ib_recv_wr *bad_wr;
int ret;
+ wr.wr_cqe = &srp_drain_cqe;
/* Destroying a QP and reusing ch->done is only safe if not connected */
WARN_ON_ONCE(ch->connected);
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
const int m = dev->use_fast_reg ? 3 : 1;
- struct ib_cq_init_attr cq_attr = {};
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
if (!init_attr)
return -ENOMEM;
- /* + 1 for SRP_LAST_WR_ID */
- cq_attr.cqe = target->queue_size + 1;
- cq_attr.comp_vector = ch->comp_vector;
- recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
- &cq_attr);
+ /* queue_size + 1 for ib_drain_qp */
+ recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
+ ch->comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(recv_cq)) {
ret = PTR_ERR(recv_cq);
goto err;
}
- cq_attr.cqe = m * target->queue_size;
- cq_attr.comp_vector = ch->comp_vector;
- send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
- &cq_attr);
+ send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
+ ch->comp_vector, IB_POLL_DIRECT);
if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq);
goto err_recv_cq;
}
- ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
-
init_attr->event_handler = srp_qp_event;
init_attr->cap.max_send_wr = m * target->queue_size;
init_attr->cap.max_recv_wr = target->queue_size + 1;
if (ch->qp)
srp_destroy_qp(ch);
if (ch->recv_cq)
- ib_destroy_cq(ch->recv_cq);
+ ib_free_cq(ch->recv_cq);
if (ch->send_cq)
- ib_destroy_cq(ch->send_cq);
+ ib_free_cq(ch->send_cq);
ch->qp = qp;
ch->recv_cq = recv_cq;
return 0;
err_qp:
- ib_destroy_qp(qp);
+ srp_destroy_qp(ch);
err_send_cq:
- ib_destroy_cq(send_cq);
+ ib_free_cq(send_cq);
err_recv_cq:
- ib_destroy_cq(recv_cq);
+ ib_free_cq(recv_cq);
err:
kfree(init_attr);
if (ch->fmr_pool)
ib_destroy_fmr_pool(ch->fmr_pool);
}
+
srp_destroy_qp(ch);
- ib_destroy_cq(ch->send_cq);
- ib_destroy_cq(ch->recv_cq);
+ ib_free_cq(ch->send_cq);
+ ib_free_cq(ch->recv_cq);
/*
* Avoid that the SCSI error handler tries to use this channel after
return ret <= 0 ? ret : -ENODEV;
}
-static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
+static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ srp_handle_qp_err(cq, wc, "INV RKEY");
+}
+
+static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
+ u32 rkey)
{
struct ib_send_wr *bad_wr;
struct ib_send_wr wr = {
.opcode = IB_WR_LOCAL_INV,
- .wr_id = LOCAL_INV_WR_ID_MASK,
.next = NULL,
.num_sge = 0,
.send_flags = 0,
.ex.invalidate_rkey = rkey,
};
+ wr.wr_cqe = &req->reg_cqe;
+ req->reg_cqe.done = srp_inv_rkey_err_done;
return ib_post_send(ch->qp, &wr, &bad_wr);
}
struct srp_fr_desc **pfr;
for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
- res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
+ res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
if (res < 0) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"Queueing INV WR for rkey %#x failed (%d)\n",
return 0;
}
+static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ srp_handle_qp_err(cq, wc, "FAST REG");
+}
+
static int srp_map_finish_fr(struct srp_map_state *state,
+ struct srp_request *req,
struct srp_rdma_ch *ch, int sg_nents)
{
struct srp_target_port *target = ch->target;
if (unlikely(n < 0))
return n;
+ req->reg_cqe.done = srp_reg_mr_err_done;
+
wr.wr.next = NULL;
wr.wr.opcode = IB_WR_REG_MR;
- wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+ wr.wr.wr_cqe = &req->reg_cqe;
wr.wr.num_sge = 0;
wr.wr.send_flags = 0;
wr.mr = desc->mr;
while (count) {
int i, n;
- n = srp_map_finish_fr(state, ch, count);
+ n = srp_map_finish_fr(state, req, ch, count);
if (unlikely(n < 0))
return n;
#ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
- ret = srp_map_finish_fr(&state, ch, 1);
+ ret = srp_map_finish_fr(&state, req, ch, 1);
if (ret < 0)
return ret;
} else if (dev->use_fmr) {
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
- srp_send_completion(ch->send_cq, ch);
+ ib_process_cq_direct(ch->send_cq, -1);
if (list_empty(&ch->free_tx))
return NULL;
return iu;
}
+static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+ struct srp_rdma_ch *ch = cq->cq_context;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ srp_handle_qp_err(cq, wc, "SEND");
+ return;
+ }
+
+ list_add(&iu->list, &ch->free_tx);
+}
+
static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
{
struct srp_target_port *target = ch->target;
list.length = len;
list.lkey = target->lkey;
+ iu->cqe.done = srp_send_done;
+
wr.next = NULL;
- wr.wr_id = (uintptr_t) iu;
+ wr.wr_cqe = &iu->cqe;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
list.length = iu->size;
list.lkey = target->lkey;
+ iu->cqe.done = srp_recv_done;
+
wr.next = NULL;
- wr.wr_id = (uintptr_t) iu;
+ wr.wr_cqe = &iu->cqe;
wr.sg_list = &list;
wr.num_sge = 1;
"problems processing SRP_AER_REQ\n");
}
-static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+ struct srp_rdma_ch *ch = cq->cq_context;
struct srp_target_port *target = ch->target;
struct ib_device *dev = target->srp_host->srp_dev->dev;
- struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
int res;
u8 opcode;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ srp_handle_qp_err(cq, wc, "RECV");
+ return;
+ }
+
ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
DMA_FROM_DEVICE);
srp_start_tl_fail_timers(target->rport);
}
-static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
- bool send_err, struct srp_rdma_ch *ch)
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+ const char *opname)
{
+ struct srp_rdma_ch *ch = cq->cq_context;
struct srp_target_port *target = ch->target;
- if (wr_id == SRP_LAST_WR_ID) {
- complete(&ch->done);
- return;
- }
-
if (ch->connected && !target->qp_in_error) {
- if (wr_id & LOCAL_INV_WR_ID_MASK) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "LOCAL_INV failed with status %s (%d)\n",
- ib_wc_status_msg(wc_status), wc_status);
- } else if (wr_id & FAST_REG_WR_ID_MASK) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "FAST_REG_MR failed status %s (%d)\n",
- ib_wc_status_msg(wc_status), wc_status);
- } else {
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "failed %s status %s (%d) for iu %p\n",
- send_err ? "send" : "receive",
- ib_wc_status_msg(wc_status), wc_status,
- (void *)(uintptr_t)wr_id);
- }
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "failed %s status %s (%d) for CQE %p\n",
+ opname, ib_wc_status_msg(wc->status), wc->status,
+ wc->wr_cqe);
queue_work(system_long_wq, &target->tl_err_work);
}
target->qp_in_error = true;
}
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
-{
- struct srp_rdma_ch *ch = ch_ptr;
- struct ib_wc wc;
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- if (likely(wc.status == IB_WC_SUCCESS)) {
- srp_handle_recv(ch, &wc);
- } else {
- srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
- }
- }
-}
-
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
-{
- struct srp_rdma_ch *ch = ch_ptr;
- struct ib_wc wc;
- struct srp_iu *iu;
-
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- if (likely(wc.status == IB_WC_SUCCESS)) {
- iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
- list_add(&iu->list, &ch->free_tx);
- } else {
- srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
- }
- }
-}
-
static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(shost);
static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
- struct ib_device_attr *dev_attr;
struct srp_host *host;
int mr_page_shift, p;
u64 max_pages_per_mr;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- pr_warn("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
if (!srp_dev)
- goto free_attr;
+ return;
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (dev_attr->device_cap_flags &
+ srp_dev->has_fr = (device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!srp_dev->has_fmr && !srp_dev->has_fr)
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
* minimum of 4096 bytes. We're unlikely to build large sglists
* out of smaller entries.
*/
- mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
+ mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
srp_dev->mr_page_size = 1 << mr_page_shift;
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
- max_pages_per_mr = dev_attr->max_mr_size;
+ max_pages_per_mr = device->attrs.max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
- dev_attr->max_fast_reg_page_list_len);
+ device->attrs.max_fast_reg_page_list_len);
}
srp_dev->mr_max_size = srp_dev->mr_page_size *
srp_dev->max_pages_per_mr;
- pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
- device->name, mr_page_shift, dev_attr->max_mr_size,
- dev_attr->max_fast_reg_page_list_len,
+ pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+ device->name, mr_page_shift, device->attrs.max_mr_size,
+ device->attrs.max_fast_reg_page_list_len,
srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list);
}
ib_set_client_data(device, &srp_client, srp_dev);
-
- goto free_attr;
+ return;
err_pd:
ib_dealloc_pd(srp_dev->pd);
free_dev:
kfree(srp_dev);
-
-free_attr:
- kfree(dev_attr);
}
static void srp_remove_one(struct ib_device *device, void *client_data)
{
int ret;
- BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
-
if (srp_sg_tablesize) {
pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
if (!cmd_sg_entries)
SRP_TAG_TSK_MGMT = 1U << 31,
SRP_MAX_PAGES_PER_MR = 512,
-
- LOCAL_INV_WR_ID_MASK = 1,
- FAST_REG_WR_ID_MASK = 2,
-
- SRP_LAST_WR_ID = 0xfffffffcU,
};
enum srp_target_state {
struct srp_direct_buf *indirect_desc;
dma_addr_t indirect_dma_addr;
short nmdesc;
+ struct ib_cqe reg_cqe;
};
/**
void *buf;
size_t size;
enum dma_data_direction direction;
+ struct ib_cqe cqe;
};
/**
static struct ib_client srpt_client;
static void srpt_release_channel(struct srpt_rdma_ch *ch);
static int srpt_queue_status(struct se_cmd *cmd);
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
/**
* opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
memset(iocp, 0, sizeof *iocp);
strcpy(iocp->id_string, SRPT_ID_STRING);
iocp->guid = cpu_to_be64(srpt_service_guid);
- iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
- iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
- iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
- iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
+ iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
+ iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id);
+ iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver);
+ iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
iocp->subsys_device_id = 0x0;
iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
* srpt_mad_recv_handler() - MAD reception callback function.
*/
static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_wc)
{
struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
struct ib_recv_wr wr, *bad_wr;
BUG_ON(!sdev);
- wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
-
list.addr = ioctx->ioctx.dma;
list.length = srp_max_req_size;
list.lkey = sdev->pd->local_dma_lkey;
+ ioctx->ioctx.cqe.done = srpt_recv_done;
+ wr.wr_cqe = &ioctx->ioctx.cqe;
wr.next = NULL;
wr.sg_list = &list;
wr.num_sge = 1;
list.length = len;
list.lkey = sdev->pd->local_dma_lkey;
+ ioctx->ioctx.cqe.done = srpt_send_done;
wr.next = NULL;
- wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
+ wr.wr_cqe = &ioctx->ioctx.cqe;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
BUG_ON(!ch);
BUG_ON(!ioctx);
- BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
+ BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
while (ioctx->n_rdma)
- kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
+ kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
- kfree(ioctx->rdma_ius);
- ioctx->rdma_ius = NULL;
+ kfree(ioctx->rdma_wrs);
+ ioctx->rdma_wrs = NULL;
if (ioctx->mapped_sg_count) {
sg = ioctx->sg;
struct scatterlist *sg, *sg_orig;
int sg_cnt;
enum dma_data_direction dir;
- struct rdma_iu *riu;
+ struct ib_rdma_wr *riu;
struct srp_direct_buf *db;
dma_addr_t dma_addr;
struct ib_sge *sge;
ioctx->mapped_sg_count = count;
- if (ioctx->rdma_ius && ioctx->n_rdma_ius)
- nrdma = ioctx->n_rdma_ius;
+ if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
+ nrdma = ioctx->n_rdma_wrs;
else {
nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
+ ioctx->n_rbuf;
- ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
- if (!ioctx->rdma_ius)
+ ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
+ GFP_KERNEL);
+ if (!ioctx->rdma_wrs)
goto free_mem;
- ioctx->n_rdma_ius = nrdma;
+ ioctx->n_rdma_wrs = nrdma;
}
db = ioctx->rbufs;
tsize = cmd->data_length;
dma_len = ib_sg_dma_len(dev, &sg[0]);
- riu = ioctx->rdma_ius;
+ riu = ioctx->rdma_wrs;
/*
* For each remote desc - calculate the #ib_sge.
j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
rsize = be32_to_cpu(db->len);
raddr = be64_to_cpu(db->va);
- riu->raddr = raddr;
+ riu->remote_addr = raddr;
riu->rkey = be32_to_cpu(db->key);
- riu->sge_cnt = 0;
+ riu->wr.num_sge = 0;
/* calculate how many sge required for this remote_buf */
while (rsize > 0 && tsize > 0) {
rsize = 0;
}
- ++riu->sge_cnt;
+ ++riu->wr.num_sge;
- if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
+ if (rsize > 0 &&
+ riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
++ioctx->n_rdma;
- riu->sge =
- kmalloc(riu->sge_cnt * sizeof *riu->sge,
- GFP_KERNEL);
- if (!riu->sge)
+ riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+ sizeof(*riu->wr.sg_list),
+ GFP_KERNEL);
+ if (!riu->wr.sg_list)
goto free_mem;
++riu;
- riu->sge_cnt = 0;
- riu->raddr = raddr;
+ riu->wr.num_sge = 0;
+ riu->remote_addr = raddr;
riu->rkey = be32_to_cpu(db->key);
}
}
++ioctx->n_rdma;
- riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
- GFP_KERNEL);
- if (!riu->sge)
+ riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+ sizeof(*riu->wr.sg_list),
+ GFP_KERNEL);
+ if (!riu->wr.sg_list)
goto free_mem;
}
db = ioctx->rbufs;
tsize = cmd->data_length;
- riu = ioctx->rdma_ius;
+ riu = ioctx->rdma_wrs;
sg = sg_orig;
dma_len = ib_sg_dma_len(dev, &sg[0]);
dma_addr = ib_sg_dma_address(dev, &sg[0]);
for (i = 0, j = 0;
j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
rsize = be32_to_cpu(db->len);
- sge = riu->sge;
+ sge = riu->wr.sg_list;
k = 0;
while (rsize > 0 && tsize > 0) {
}
++k;
- if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
+ if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
++riu;
- sge = riu->sge;
+ sge = riu->wr.sg_list;
k = 0;
} else if (rsize > 0 && tsize > 0)
++sge;
ioctx->n_rbuf = 0;
ioctx->rbufs = NULL;
ioctx->n_rdma = 0;
- ioctx->n_rdma_ius = 0;
- ioctx->rdma_ius = NULL;
+ ioctx->n_rdma_wrs = 0;
+ ioctx->rdma_wrs = NULL;
ioctx->mapped_sg_count = 0;
init_completion(&ioctx->tx_done);
ioctx->queue_status_only = false;
}
/**
- * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
- */
-static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
-{
- struct srpt_send_ioctx *ioctx;
- enum srpt_command_state state;
- u32 index;
-
- atomic_inc(&ch->sq_wr_avail);
-
- index = idx_from_wr_id(wr_id);
- ioctx = ch->ioctx_ring[index];
- state = srpt_get_cmd_state(ioctx);
-
- WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
- && state != SRPT_STATE_MGMT_RSP_SENT
- && state != SRPT_STATE_NEED_DATA
- && state != SRPT_STATE_DONE);
-
- /* If SRP_RSP sending failed, undo the ch->req_lim change. */
- if (state == SRPT_STATE_CMD_RSP_SENT
- || state == SRPT_STATE_MGMT_RSP_SENT)
- atomic_dec(&ch->req_lim);
-
- srpt_abort_cmd(ioctx);
-}
-
-/**
- * srpt_handle_send_comp() - Process an IB send completion notification.
- */
-static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- enum srpt_command_state state;
-
- atomic_inc(&ch->sq_wr_avail);
-
- state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
-
- if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
- && state != SRPT_STATE_MGMT_RSP_SENT
- && state != SRPT_STATE_DONE))
- pr_debug("state = %d\n", state);
-
- if (state != SRPT_STATE_DONE) {
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
- transport_generic_free_cmd(&ioctx->cmd, 0);
- } else {
- pr_err("IB completion has been received too late for"
- " wr_id = %u.\n", ioctx->ioctx.index);
- }
-}
-
-/**
- * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
- *
* XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
* the data that has been transferred via IB RDMA had to be postponed until the
* check_stop_free() callback. None of this is necessary anymore and needs to
* be cleaned up.
*/
-static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx,
- enum srpt_opcode opcode)
+static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
+
WARN_ON(ioctx->n_rdma <= 0);
atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
- if (opcode == SRPT_RDMA_READ_LAST) {
- if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
- SRPT_STATE_DATA_IN))
- target_execute_cmd(&ioctx->cmd);
- else
- pr_err("%s[%d]: wrong state = %d\n", __func__,
- __LINE__, srpt_get_cmd_state(ioctx));
- } else if (opcode == SRPT_RDMA_ABORT) {
- ioctx->rdma_aborted = true;
- } else {
- WARN(true, "unexpected opcode %d\n", opcode);
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
+ srpt_abort_cmd(ioctx);
+ return;
}
+
+ if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
+ SRPT_STATE_DATA_IN))
+ target_execute_cmd(&ioctx->cmd);
+ else
+ pr_err("%s[%d]: wrong state = %d\n", __func__,
+ __LINE__, srpt_get_cmd_state(ioctx));
}
-/**
- * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
- */
-static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx,
- enum srpt_opcode opcode)
+static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
- enum srpt_command_state state;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
- state = srpt_get_cmd_state(ioctx);
- switch (opcode) {
- case SRPT_RDMA_READ_LAST:
- if (ioctx->n_rdma <= 0) {
- pr_err("Received invalid RDMA read"
- " error completion with idx %d\n",
- ioctx->ioctx.index);
- break;
- }
- atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
- if (state == SRPT_STATE_NEED_DATA)
- srpt_abort_cmd(ioctx);
- else
- pr_err("%s[%d]: wrong state = %d\n",
- __func__, __LINE__, state);
- break;
- case SRPT_RDMA_WRITE_LAST:
- break;
- default:
- pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
- break;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
+ srpt_abort_cmd(ioctx);
}
}
return;
}
-static void srpt_process_rcv_completion(struct ib_cq *cq,
- struct srpt_rdma_ch *ch,
- struct ib_wc *wc)
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_device *sdev = ch->sport->sdev;
- struct srpt_recv_ioctx *ioctx;
- u32 index;
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_recv_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
- index = idx_from_wr_id(wc->wr_id);
if (wc->status == IB_WC_SUCCESS) {
int req_lim;
req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0))
pr_err("req_lim = %d < 0\n", req_lim);
- ioctx = sdev->ioctx_ring[index];
srpt_handle_new_iu(ch, ioctx, NULL);
} else {
- pr_info("receiving failed for idx %u with status %d\n",
- index, wc->status);
+ pr_info("receiving failed for ioctx %p with status %d\n",
+ ioctx, wc->status);
}
}
/**
- * srpt_process_send_completion() - Process an IB send completion.
- *
* Note: Although this has not yet been observed during tests, at least in
* theory it is possible that the srpt_get_send_ioctx() call invoked by
* srpt_handle_new_iu() fails. This is possible because the req_lim_delta
* are queued on cmd_wait_list. The code below processes these delayed
* requests one at a time.
*/
-static void srpt_process_send_completion(struct ib_cq *cq,
- struct srpt_rdma_ch *ch,
- struct ib_wc *wc)
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_send_ioctx *send_ioctx;
- uint32_t index;
- enum srpt_opcode opcode;
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+ enum srpt_command_state state;
- index = idx_from_wr_id(wc->wr_id);
- opcode = opcode_from_wr_id(wc->wr_id);
- send_ioctx = ch->ioctx_ring[index];
- if (wc->status == IB_WC_SUCCESS) {
- if (opcode == SRPT_SEND)
- srpt_handle_send_comp(ch, send_ioctx);
- else {
- WARN_ON(opcode != SRPT_RDMA_ABORT &&
- wc->opcode != IB_WC_RDMA_READ);
- srpt_handle_rdma_comp(ch, send_ioctx, opcode);
- }
+ state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+
+ WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
+ state != SRPT_STATE_MGMT_RSP_SENT);
+
+ atomic_inc(&ch->sq_wr_avail);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ pr_info("sending response for ioctx 0x%p failed"
+ " with status %d\n", ioctx, wc->status);
+
+ atomic_dec(&ch->req_lim);
+ srpt_abort_cmd(ioctx);
+ goto out;
+ }
+
+ if (state != SRPT_STATE_DONE) {
+ srpt_unmap_sg_to_ib_sge(ch, ioctx);
+ transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
- if (opcode == SRPT_SEND) {
- pr_info("sending response for idx %u failed"
- " with status %d\n", index, wc->status);
- srpt_handle_send_err_comp(ch, wc->wr_id);
- } else if (opcode != SRPT_RDMA_MID) {
- pr_info("RDMA t %d for idx %u failed with"
- " status %d\n", opcode, index, wc->status);
- srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
- }
+ pr_err("IB completion has been received too late for"
+ " wr_id = %u.\n", ioctx->ioctx.index);
}
- while (unlikely(opcode == SRPT_SEND
- && !list_empty(&ch->cmd_wait_list)
- && srpt_get_ch_state(ch) == CH_LIVE
- && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
+out:
+ while (!list_empty(&ch->cmd_wait_list) &&
+ srpt_get_ch_state(ch) == CH_LIVE &&
+ (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
struct srpt_recv_ioctx *recv_ioctx;
recv_ioctx = list_first_entry(&ch->cmd_wait_list,
struct srpt_recv_ioctx,
wait_list);
list_del(&recv_ioctx->wait_list);
- srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
- }
-}
-
-static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
-{
- struct ib_wc *const wc = ch->wc;
- int i, n;
-
- WARN_ON(cq != ch->cq);
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
- for (i = 0; i < n; i++) {
- if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
- srpt_process_rcv_completion(cq, ch, &wc[i]);
- else
- srpt_process_send_completion(cq, ch, &wc[i]);
- }
+ srpt_handle_new_iu(ch, recv_ioctx, ioctx);
}
}
-/**
- * srpt_completion() - IB completion queue callback function.
- *
- * Notes:
- * - It is guaranteed that a completion handler will never be invoked
- * concurrently on two different CPUs for the same completion queue. See also
- * Documentation/infiniband/core_locking.txt and the implementation of
- * handle_edge_irq() in kernel/irq/chip.c.
- * - When threaded IRQs are enabled, completion handlers are invoked in thread
- * context instead of interrupt context.
- */
-static void srpt_completion(struct ib_cq *cq, void *ctx)
-{
- struct srpt_rdma_ch *ch = ctx;
-
- wake_up_interruptible(&ch->wait_queue);
-}
-
-static int srpt_compl_thread(void *arg)
-{
- struct srpt_rdma_ch *ch;
-
- /* Hibernation / freezing of the SRPT kernel thread is not supported. */
- current->flags |= PF_NOFREEZE;
-
- ch = arg;
- BUG_ON(!ch);
- pr_info("Session %s: kernel thread %s (PID %d) started\n",
- ch->sess_name, ch->thread->comm, current->pid);
- while (!kthread_should_stop()) {
- wait_event_interruptible(ch->wait_queue,
- (srpt_process_completion(ch->cq, ch),
- kthread_should_stop()));
- }
- pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
- ch->sess_name, ch->thread->comm, current->pid);
- return 0;
-}
-
/**
* srpt_create_ch_ib() - Create receive and send completion queues.
*/
struct srpt_port *sport = ch->sport;
struct srpt_device *sdev = sport->sdev;
u32 srp_sq_size = sport->port_attrib.srp_sq_size;
- struct ib_cq_init_attr cq_attr = {};
int ret;
WARN_ON(ch->rq_size < 1);
goto out;
retry:
- cq_attr.cqe = ch->rq_size + srp_sq_size;
- ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
- &cq_attr);
+ ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
+ 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq);
pr_err("failed to create CQ cqe= %d ret= %d\n",
if (ret)
goto err_destroy_qp;
- init_waitqueue_head(&ch->wait_queue);
-
- pr_debug("creating thread for session %s\n", ch->sess_name);
-
- ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
- if (IS_ERR(ch->thread)) {
- pr_err("failed to create kernel thread %ld\n",
- PTR_ERR(ch->thread));
- ch->thread = NULL;
- goto err_destroy_qp;
- }
-
out:
kfree(qp_init);
return ret;
err_destroy_qp:
ib_destroy_qp(ch->qp);
err_destroy_cq:
- ib_destroy_cq(ch->cq);
+ ib_free_cq(ch->cq);
goto out;
}
static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
{
- if (ch->thread)
- kthread_stop(ch->thread);
-
ib_destroy_qp(ch->qp);
- ib_destroy_cq(ch->cq);
+ ib_free_cq(ch->cq);
}
/**
static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *ioctx)
{
- struct ib_rdma_wr wr;
struct ib_send_wr *bad_wr;
- struct rdma_iu *riu;
- int i;
- int ret;
- int sq_wr_avail;
+ int sq_wr_avail, ret, i;
enum dma_data_direction dir;
const int n_rdma = ioctx->n_rdma;
}
}
- ioctx->rdma_aborted = false;
- ret = 0;
- riu = ioctx->rdma_ius;
- memset(&wr, 0, sizeof wr);
-
- for (i = 0; i < n_rdma; ++i, ++riu) {
- if (dir == DMA_FROM_DEVICE) {
- wr.wr.opcode = IB_WR_RDMA_WRITE;
- wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
- SRPT_RDMA_WRITE_LAST :
- SRPT_RDMA_MID,
- ioctx->ioctx.index);
- } else {
- wr.wr.opcode = IB_WR_RDMA_READ;
- wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
- SRPT_RDMA_READ_LAST :
- SRPT_RDMA_MID,
- ioctx->ioctx.index);
- }
- wr.wr.next = NULL;
- wr.remote_addr = riu->raddr;
- wr.rkey = riu->rkey;
- wr.wr.num_sge = riu->sge_cnt;
- wr.wr.sg_list = riu->sge;
+ for (i = 0; i < n_rdma; i++) {
+ struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
- /* only get completion event for the last rdma write */
- if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
- wr.wr.send_flags = IB_SEND_SIGNALED;
+ wr->opcode = (dir == DMA_FROM_DEVICE) ?
+ IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
- ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
- if (ret)
- break;
+ if (i == n_rdma - 1) {
+ /* only get completion event for the last rdma read */
+ if (dir == DMA_TO_DEVICE) {
+ wr->send_flags = IB_SEND_SIGNALED;
+ ioctx->rdma_cqe.done = srpt_rdma_read_done;
+ } else {
+ ioctx->rdma_cqe.done = srpt_rdma_write_done;
+ }
+ wr->wr_cqe = &ioctx->rdma_cqe;
+ wr->next = NULL;
+ } else {
+ wr->wr_cqe = NULL;
+ wr->next = &ioctx->rdma_wrs[i + 1].wr;
+ }
}
+ ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
if (ret)
pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
__func__, __LINE__, ret, i, n_rdma);
- if (ret && i > 0) {
- wr.wr.num_sge = 0;
- wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
- wr.wr.send_flags = IB_SEND_SIGNALED;
- while (ch->state == CH_LIVE &&
- ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
- pr_info("Trying to abort failed RDMA transfer [%d]\n",
- ioctx->ioctx.index);
- msleep(1000);
- }
- while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
- pr_info("Waiting until RDMA abort finished [%d]\n",
- ioctx->ioctx.index);
- msleep(1000);
- }
- }
out:
if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
atomic_add(n_rdma, &ch->sq_wr_avail);
init_waitqueue_head(&sdev->ch_releaseQ);
spin_lock_init(&sdev->spinlock);
- if (ib_query_device(device, &sdev->dev_attr))
- goto free_dev;
-
sdev->pd = ib_alloc_pd(device);
if (IS_ERR(sdev->pd))
goto free_dev;
- sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
+ sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr);
srq_attr.event_handler = srpt_srq_event;
srq_attr.srq_context = (void *)sdev;
goto err_pd;
pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
- __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
+ __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
device->name);
if (!srpt_service_guid)
DEFAULT_MAX_RDMA_SIZE = 65536,
};
-enum srpt_opcode {
- SRPT_RECV,
- SRPT_SEND,
- SRPT_RDMA_MID,
- SRPT_RDMA_ABORT,
- SRPT_RDMA_READ_LAST,
- SRPT_RDMA_WRITE_LAST,
-};
-
-static inline u64 encode_wr_id(u8 opcode, u32 idx)
-{
- return ((u64)opcode << 32) | idx;
-}
-static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
-{
- return wr_id >> 32;
-}
-static inline u32 idx_from_wr_id(u64 wr_id)
-{
- return (u32)wr_id;
-}
-
-struct rdma_iu {
- u64 raddr;
- u32 rkey;
- struct ib_sge *sge;
- u32 sge_cnt;
- int mem_id;
-};
-
/**
* enum srpt_command_state - SCSI command state managed by SRPT.
* @SRPT_STATE_NEW: New command arrived and is being processed.
* @index: Index of the I/O context in its ioctx_ring array.
*/
struct srpt_ioctx {
+ struct ib_cqe cqe;
void *buf;
dma_addr_t dma;
uint32_t index;
* @sg: Pointer to sg-list associated with this I/O context.
* @sg_cnt: SG-list size.
* @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_ius: Number of elements in the rdma_ius array.
- * @rdma_ius: Array with information about the RDMA mapping.
+ * @n_rdma_wrs: Number of elements in the rdma_wrs array.
+ * @rdma_wrs: Array with information about the RDMA mapping.
* @tag: Tag of the received SRP information unit.
* @spinlock: Protects 'state'.
* @state: I/O context state.
- * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
- * the already initiated transfers have finished.
* @cmd: Target core command data structure.
* @sense_data: SCSI sense data.
*/
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
struct srpt_rdma_ch *ch;
- struct rdma_iu *rdma_ius;
+ struct ib_rdma_wr *rdma_wrs;
+ struct ib_cqe rdma_cqe;
struct srp_direct_buf *rbufs;
struct srp_direct_buf single_rbuf;
struct scatterlist *sg;
struct list_head free_list;
spinlock_t spinlock;
enum srpt_command_state state;
- bool rdma_aborted;
struct se_cmd cmd;
struct completion tx_done;
int sg_cnt;
int mapped_sg_count;
- u16 n_rdma_ius;
+ u16 n_rdma_wrs;
u8 n_rdma;
u8 n_rbuf;
bool queue_status_only;
/**
* struct srpt_rdma_ch - RDMA channel.
- * @wait_queue: Allows the kernel thread to wait for more work.
- * @thread: Kernel thread that processes the IB queues associated with
- * the channel.
* @cm_id: IB CM ID associated with the channel.
* @qp: IB queue pair used for communicating over this channel.
* @cq: IB completion queue for this channel.
* @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state.
* @ioctx_ring: Send ring.
- * @wc: IB work completion array for srpt_process_completion().
* @list: Node for insertion in the srpt_device.rch_list list.
* @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
* list contains struct srpt_ioctx elements and is protected
* @release_done: Enables waiting for srpt_release_channel() completion.
*/
struct srpt_rdma_ch {
- wait_queue_head_t wait_queue;
- struct task_struct *thread;
struct ib_cm_id *cm_id;
struct ib_qp *qp;
struct ib_cq *cq;
struct list_head free_list;
enum rdma_ch_state state;
struct srpt_send_ioctx **ioctx_ring;
- struct ib_wc wc[16];
struct list_head list;
struct list_head cmd_wait_list;
struct se_session *sess;
* @mr: L_Key (local key) with write access to all local memory.
* @srq: Per-HCA SRQ (shared receive queue).
* @cm_id: Connection identifier.
- * @dev_attr: Attributes of the InfiniBand device as obtained during the
- * ib_client.add() callback.
* @srq_size: SRQ size.
* @ioctx_ring: Per-HCA SRQ.
* @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
struct ib_pd *pd;
struct ib_srq *srq;
struct ib_cm_id *cm_id;
- struct ib_device_attr dev_attr;
int srq_size;
struct srpt_recv_ioctx **ioctx_ring;
struct list_head rch_list;
*/
#include <linux/kernel.h>
+#include <linux/input.h>
+#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/usb/input.h>
+#include <linux/usb/quirks.h>
#define DRIVER_AUTHOR "Marko Friedemann <mfr@bmx-chemnitz.de>"
#define DRIVER_DESC "X-Box pad driver"
{ 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX },
{ 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 },
{ 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE },
- { 0x045e, 0x02dd, "Microsoft X-Box One pad (Covert Forces)", 0, XTYPE_XBOXONE },
+ { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE },
{ 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
{ 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W },
{ 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX },
MODULE_DEVICE_TABLE(usb, xpad_table);
+struct xpad_output_packet {
+ u8 data[XPAD_PKT_LEN];
+ u8 len;
+ bool pending;
+};
+
+#define XPAD_OUT_CMD_IDX 0
+#define XPAD_OUT_FF_IDX 1
+#define XPAD_OUT_LED_IDX (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF))
+#define XPAD_NUM_OUT_PACKETS (1 + \
+ IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \
+ IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS))
+
struct usb_xpad {
struct input_dev *dev; /* input device interface */
+ struct input_dev __rcu *x360w_dev;
struct usb_device *udev; /* usb device */
struct usb_interface *intf; /* usb interface */
- int pad_present;
+ bool pad_present;
+ bool input_created;
struct urb *irq_in; /* urb for interrupt in report */
unsigned char *idata; /* input data */
dma_addr_t idata_dma;
struct urb *irq_out; /* urb for interrupt out report */
+ struct usb_anchor irq_out_anchor;
+ bool irq_out_active; /* we must not use an active URB */
+ u8 odata_serial; /* serial number for xbox one protocol */
unsigned char *odata; /* output data */
dma_addr_t odata_dma;
- struct mutex odata_mutex;
+ spinlock_t odata_lock;
+
+ struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS];
+ int last_out_packet;
#if defined(CONFIG_JOYSTICK_XPAD_LEDS)
struct xpad_led *led;
int xtype; /* type of xbox device */
int pad_nr; /* the order x360 pads were attached */
const char *name; /* name of the device */
+ struct work_struct work; /* init/remove device from callback */
};
+static int xpad_init_input(struct usb_xpad *xpad);
+static void xpad_deinit_input(struct usb_xpad *xpad);
+
/*
* xpad_process_packet
*
* http://www.free60.org/wiki/Gamepad
*/
-static void xpad360_process_packet(struct usb_xpad *xpad,
+static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev,
u16 cmd, unsigned char *data)
{
- struct input_dev *dev = xpad->dev;
-
/* digital pad */
if (xpad->mapping & MAP_DPAD_TO_BUTTONS) {
/* dpad as buttons (left, right, up, down) */
input_sync(dev);
}
-static void xpad_identify_controller(struct usb_xpad *xpad);
+static void xpad_presence_work(struct work_struct *work)
+{
+ struct usb_xpad *xpad = container_of(work, struct usb_xpad, work);
+ int error;
+
+ if (xpad->pad_present) {
+ error = xpad_init_input(xpad);
+ if (error) {
+ /* complain only, not much else we can do here */
+ dev_err(&xpad->dev->dev,
+ "unable to init device: %d\n", error);
+ } else {
+ rcu_assign_pointer(xpad->x360w_dev, xpad->dev);
+ }
+ } else {
+ RCU_INIT_POINTER(xpad->x360w_dev, NULL);
+ synchronize_rcu();
+ /*
+ * Now that we are sure xpad360w_process_packet is not
+ * using input device we can get rid of it.
+ */
+ xpad_deinit_input(xpad);
+ }
+}
/*
* xpad360w_process_packet
*/
static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data)
{
+ struct input_dev *dev;
+ bool present;
+
/* Presence change */
if (data[0] & 0x08) {
- if (data[1] & 0x80) {
- xpad->pad_present = 1;
- /*
- * Light up the segment corresponding to
- * controller number.
- */
- xpad_identify_controller(xpad);
- } else
- xpad->pad_present = 0;
+ present = (data[1] & 0x80) != 0;
+
+ if (xpad->pad_present != present) {
+ xpad->pad_present = present;
+ schedule_work(&xpad->work);
+ }
}
/* Valid pad data */
- if (!(data[1] & 0x1))
+ if (data[1] != 0x1)
return;
- xpad360_process_packet(xpad, cmd, &data[4]);
+ rcu_read_lock();
+ dev = rcu_dereference(xpad->x360w_dev);
+ if (dev)
+ xpad360_process_packet(xpad, dev, cmd, &data[4]);
+ rcu_read_unlock();
}
/*
switch (xpad->xtype) {
case XTYPE_XBOX360:
- xpad360_process_packet(xpad, 0, xpad->idata);
+ xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata);
break;
case XTYPE_XBOX360W:
xpad360w_process_packet(xpad, 0, xpad->idata);
__func__, retval);
}
+/* Callers must hold xpad->odata_lock spinlock */
+static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad)
+{
+ struct xpad_output_packet *pkt, *packet = NULL;
+ int i;
+
+ for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) {
+ if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS)
+ xpad->last_out_packet = 0;
+
+ pkt = &xpad->out_packets[xpad->last_out_packet];
+ if (pkt->pending) {
+ dev_dbg(&xpad->intf->dev,
+ "%s - found pending output packet %d\n",
+ __func__, xpad->last_out_packet);
+ packet = pkt;
+ break;
+ }
+ }
+
+ if (packet) {
+ memcpy(xpad->odata, packet->data, packet->len);
+ xpad->irq_out->transfer_buffer_length = packet->len;
+ return true;
+ }
+
+ return false;
+}
+
+/* Callers must hold xpad->odata_lock spinlock */
+static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad)
+{
+ int error;
+
+ if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) {
+ usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor);
+ error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+ if (error) {
+ dev_err(&xpad->intf->dev,
+ "%s - usb_submit_urb failed with result %d\n",
+ __func__, error);
+ usb_unanchor_urb(xpad->irq_out);
+ return -EIO;
+ }
+
+ xpad->irq_out_active = true;
+ }
+
+ return 0;
+}
+
static void xpad_irq_out(struct urb *urb)
{
struct usb_xpad *xpad = urb->context;
struct device *dev = &xpad->intf->dev;
- int retval, status;
+ int status = urb->status;
+ int error;
+ unsigned long flags;
- status = urb->status;
+ spin_lock_irqsave(&xpad->odata_lock, flags);
switch (status) {
case 0:
/* success */
- return;
+ xpad->out_packets[xpad->last_out_packet].pending = false;
+ xpad->irq_out_active = xpad_prepare_next_out_packet(xpad);
+ break;
case -ECONNRESET:
case -ENOENT:
/* this urb is terminated, clean up */
dev_dbg(dev, "%s - urb shutting down with status: %d\n",
__func__, status);
- return;
+ xpad->irq_out_active = false;
+ break;
default:
dev_dbg(dev, "%s - nonzero urb status received: %d\n",
__func__, status);
- goto exit;
+ break;
}
-exit:
- retval = usb_submit_urb(urb, GFP_ATOMIC);
- if (retval)
- dev_err(dev, "%s - usb_submit_urb failed with result %d\n",
- __func__, retval);
+ if (xpad->irq_out_active) {
+ usb_anchor_urb(urb, &xpad->irq_out_anchor);
+ error = usb_submit_urb(urb, GFP_ATOMIC);
+ if (error) {
+ dev_err(dev,
+ "%s - usb_submit_urb failed with result %d\n",
+ __func__, error);
+ usb_unanchor_urb(urb);
+ xpad->irq_out_active = false;
+ }
+ }
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
}
static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad)
if (xpad->xtype == XTYPE_UNKNOWN)
return 0;
+ init_usb_anchor(&xpad->irq_out_anchor);
+
xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN,
GFP_KERNEL, &xpad->odata_dma);
if (!xpad->odata) {
goto fail1;
}
- mutex_init(&xpad->odata_mutex);
+ spin_lock_init(&xpad->odata_lock);
xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL);
if (!xpad->irq_out) {
static void xpad_stop_output(struct usb_xpad *xpad)
{
- if (xpad->xtype != XTYPE_UNKNOWN)
- usb_kill_urb(xpad->irq_out);
+ if (xpad->xtype != XTYPE_UNKNOWN) {
+ if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor,
+ 5000)) {
+ dev_warn(&xpad->intf->dev,
+ "timed out waiting for output URB to complete, killing\n");
+ usb_kill_anchored_urbs(&xpad->irq_out_anchor);
+ }
+ }
}
static void xpad_deinit_output(struct usb_xpad *xpad)
static int xpad_inquiry_pad_presence(struct usb_xpad *xpad)
{
+ struct xpad_output_packet *packet =
+ &xpad->out_packets[XPAD_OUT_CMD_IDX];
+ unsigned long flags;
int retval;
- mutex_lock(&xpad->odata_mutex);
+ spin_lock_irqsave(&xpad->odata_lock, flags);
+
+ packet->data[0] = 0x08;
+ packet->data[1] = 0x00;
+ packet->data[2] = 0x0F;
+ packet->data[3] = 0xC0;
+ packet->data[4] = 0x00;
+ packet->data[5] = 0x00;
+ packet->data[6] = 0x00;
+ packet->data[7] = 0x00;
+ packet->data[8] = 0x00;
+ packet->data[9] = 0x00;
+ packet->data[10] = 0x00;
+ packet->data[11] = 0x00;
+ packet->len = 12;
+ packet->pending = true;
+
+ /* Reset the sequence so we send out presence first */
+ xpad->last_out_packet = -1;
+ retval = xpad_try_sending_next_out_packet(xpad);
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
- xpad->odata[0] = 0x08;
- xpad->odata[1] = 0x00;
- xpad->odata[2] = 0x0F;
- xpad->odata[3] = 0xC0;
- xpad->odata[4] = 0x00;
- xpad->odata[5] = 0x00;
- xpad->odata[6] = 0x00;
- xpad->odata[7] = 0x00;
- xpad->odata[8] = 0x00;
- xpad->odata[9] = 0x00;
- xpad->odata[10] = 0x00;
- xpad->odata[11] = 0x00;
- xpad->irq_out->transfer_buffer_length = 12;
+ return retval;
+}
+
+static int xpad_start_xbox_one(struct usb_xpad *xpad)
+{
+ struct xpad_output_packet *packet =
+ &xpad->out_packets[XPAD_OUT_CMD_IDX];
+ unsigned long flags;
+ int retval;
- retval = usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+ spin_lock_irqsave(&xpad->odata_lock, flags);
- mutex_unlock(&xpad->odata_mutex);
+ /* Xbox one controller needs to be initialized. */
+ packet->data[0] = 0x05;
+ packet->data[1] = 0x20;
+ packet->data[2] = xpad->odata_serial++; /* packet serial */
+ packet->data[3] = 0x01; /* rumble bit enable? */
+ packet->data[4] = 0x00;
+ packet->len = 5;
+ packet->pending = true;
+
+ /* Reset the sequence so we send out start packet first */
+ xpad->last_out_packet = -1;
+ retval = xpad_try_sending_next_out_packet(xpad);
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
return retval;
}
static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect)
{
struct usb_xpad *xpad = input_get_drvdata(dev);
+ struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX];
__u16 strong;
__u16 weak;
+ int retval;
+ unsigned long flags;
if (effect->type != FF_RUMBLE)
return 0;
strong = effect->u.rumble.strong_magnitude;
weak = effect->u.rumble.weak_magnitude;
+ spin_lock_irqsave(&xpad->odata_lock, flags);
+
switch (xpad->xtype) {
case XTYPE_XBOX:
- xpad->odata[0] = 0x00;
- xpad->odata[1] = 0x06;
- xpad->odata[2] = 0x00;
- xpad->odata[3] = strong / 256; /* left actuator */
- xpad->odata[4] = 0x00;
- xpad->odata[5] = weak / 256; /* right actuator */
- xpad->irq_out->transfer_buffer_length = 6;
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x06;
+ packet->data[2] = 0x00;
+ packet->data[3] = strong / 256; /* left actuator */
+ packet->data[4] = 0x00;
+ packet->data[5] = weak / 256; /* right actuator */
+ packet->len = 6;
+ packet->pending = true;
break;
case XTYPE_XBOX360:
- xpad->odata[0] = 0x00;
- xpad->odata[1] = 0x08;
- xpad->odata[2] = 0x00;
- xpad->odata[3] = strong / 256; /* left actuator? */
- xpad->odata[4] = weak / 256; /* right actuator? */
- xpad->odata[5] = 0x00;
- xpad->odata[6] = 0x00;
- xpad->odata[7] = 0x00;
- xpad->irq_out->transfer_buffer_length = 8;
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x08;
+ packet->data[2] = 0x00;
+ packet->data[3] = strong / 256; /* left actuator? */
+ packet->data[4] = weak / 256; /* right actuator? */
+ packet->data[5] = 0x00;
+ packet->data[6] = 0x00;
+ packet->data[7] = 0x00;
+ packet->len = 8;
+ packet->pending = true;
break;
case XTYPE_XBOX360W:
- xpad->odata[0] = 0x00;
- xpad->odata[1] = 0x01;
- xpad->odata[2] = 0x0F;
- xpad->odata[3] = 0xC0;
- xpad->odata[4] = 0x00;
- xpad->odata[5] = strong / 256;
- xpad->odata[6] = weak / 256;
- xpad->odata[7] = 0x00;
- xpad->odata[8] = 0x00;
- xpad->odata[9] = 0x00;
- xpad->odata[10] = 0x00;
- xpad->odata[11] = 0x00;
- xpad->irq_out->transfer_buffer_length = 12;
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x01;
+ packet->data[2] = 0x0F;
+ packet->data[3] = 0xC0;
+ packet->data[4] = 0x00;
+ packet->data[5] = strong / 256;
+ packet->data[6] = weak / 256;
+ packet->data[7] = 0x00;
+ packet->data[8] = 0x00;
+ packet->data[9] = 0x00;
+ packet->data[10] = 0x00;
+ packet->data[11] = 0x00;
+ packet->len = 12;
+ packet->pending = true;
break;
case XTYPE_XBOXONE:
- xpad->odata[0] = 0x09; /* activate rumble */
- xpad->odata[1] = 0x08;
- xpad->odata[2] = 0x00;
- xpad->odata[3] = 0x08; /* continuous effect */
- xpad->odata[4] = 0x00; /* simple rumble mode */
- xpad->odata[5] = 0x03; /* L and R actuator only */
- xpad->odata[6] = 0x00; /* TODO: LT actuator */
- xpad->odata[7] = 0x00; /* TODO: RT actuator */
- xpad->odata[8] = strong / 256; /* left actuator */
- xpad->odata[9] = weak / 256; /* right actuator */
- xpad->odata[10] = 0x80; /* length of pulse */
- xpad->odata[11] = 0x00; /* stop period of pulse */
- xpad->irq_out->transfer_buffer_length = 12;
+ packet->data[0] = 0x09; /* activate rumble */
+ packet->data[1] = 0x08;
+ packet->data[2] = xpad->odata_serial++;
+ packet->data[3] = 0x08; /* continuous effect */
+ packet->data[4] = 0x00; /* simple rumble mode */
+ packet->data[5] = 0x03; /* L and R actuator only */
+ packet->data[6] = 0x00; /* TODO: LT actuator */
+ packet->data[7] = 0x00; /* TODO: RT actuator */
+ packet->data[8] = strong / 512; /* left actuator */
+ packet->data[9] = weak / 512; /* right actuator */
+ packet->data[10] = 0x80; /* length of pulse */
+ packet->data[11] = 0x00; /* stop period of pulse */
+ packet->data[12] = 0x00;
+ packet->len = 13;
+ packet->pending = true;
break;
default:
dev_dbg(&xpad->dev->dev,
"%s - rumble command sent to unsupported xpad type: %d\n",
__func__, xpad->xtype);
- return -EINVAL;
+ retval = -EINVAL;
+ goto out;
}
- return usb_submit_urb(xpad->irq_out, GFP_ATOMIC);
+ retval = xpad_try_sending_next_out_packet(xpad);
+
+out:
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
+ return retval;
}
static int xpad_init_ff(struct usb_xpad *xpad)
*/
static void xpad_send_led_command(struct usb_xpad *xpad, int command)
{
+ struct xpad_output_packet *packet =
+ &xpad->out_packets[XPAD_OUT_LED_IDX];
+ unsigned long flags;
+
command %= 16;
- mutex_lock(&xpad->odata_mutex);
+ spin_lock_irqsave(&xpad->odata_lock, flags);
switch (xpad->xtype) {
case XTYPE_XBOX360:
- xpad->odata[0] = 0x01;
- xpad->odata[1] = 0x03;
- xpad->odata[2] = command;
- xpad->irq_out->transfer_buffer_length = 3;
+ packet->data[0] = 0x01;
+ packet->data[1] = 0x03;
+ packet->data[2] = command;
+ packet->len = 3;
+ packet->pending = true;
break;
+
case XTYPE_XBOX360W:
- xpad->odata[0] = 0x00;
- xpad->odata[1] = 0x00;
- xpad->odata[2] = 0x08;
- xpad->odata[3] = 0x40 + command;
- xpad->odata[4] = 0x00;
- xpad->odata[5] = 0x00;
- xpad->odata[6] = 0x00;
- xpad->odata[7] = 0x00;
- xpad->odata[8] = 0x00;
- xpad->odata[9] = 0x00;
- xpad->odata[10] = 0x00;
- xpad->odata[11] = 0x00;
- xpad->irq_out->transfer_buffer_length = 12;
+ packet->data[0] = 0x00;
+ packet->data[1] = 0x00;
+ packet->data[2] = 0x08;
+ packet->data[3] = 0x40 + command;
+ packet->data[4] = 0x00;
+ packet->data[5] = 0x00;
+ packet->data[6] = 0x00;
+ packet->data[7] = 0x00;
+ packet->data[8] = 0x00;
+ packet->data[9] = 0x00;
+ packet->data[10] = 0x00;
+ packet->data[11] = 0x00;
+ packet->len = 12;
+ packet->pending = true;
break;
}
- usb_submit_urb(xpad->irq_out, GFP_KERNEL);
- mutex_unlock(&xpad->odata_mutex);
+ xpad_try_sending_next_out_packet(xpad);
+
+ spin_unlock_irqrestore(&xpad->odata_lock, flags);
}
/*
*/
static void xpad_identify_controller(struct usb_xpad *xpad)
{
- xpad_send_led_command(xpad, (xpad->pad_nr % 4) + 2);
+ led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2);
}
static void xpad_led_set(struct led_classdev *led_cdev,
if (error)
goto err_free_id;
- if (xpad->xtype == XTYPE_XBOX360) {
- /*
- * Light up the segment corresponding to controller
- * number on wired devices. On wireless we'll do that
- * when they respond to "presence" packet.
- */
- xpad_identify_controller(xpad);
- }
+ xpad_identify_controller(xpad);
return 0;
static void xpad_identify_controller(struct usb_xpad *xpad) { }
#endif
-static int xpad_open(struct input_dev *dev)
+static int xpad_start_input(struct usb_xpad *xpad)
{
- struct usb_xpad *xpad = input_get_drvdata(dev);
-
- /* URB was submitted in probe */
- if (xpad->xtype == XTYPE_XBOX360W)
- return 0;
+ int error;
- xpad->irq_in->dev = xpad->udev;
if (usb_submit_urb(xpad->irq_in, GFP_KERNEL))
return -EIO;
if (xpad->xtype == XTYPE_XBOXONE) {
- /* Xbox one controller needs to be initialized. */
- xpad->odata[0] = 0x05;
- xpad->odata[1] = 0x20;
- xpad->irq_out->transfer_buffer_length = 2;
- return usb_submit_urb(xpad->irq_out, GFP_KERNEL);
+ error = xpad_start_xbox_one(xpad);
+ if (error) {
+ usb_kill_urb(xpad->irq_in);
+ return error;
+ }
}
return 0;
}
-static void xpad_close(struct input_dev *dev)
+static void xpad_stop_input(struct usb_xpad *xpad)
{
- struct usb_xpad *xpad = input_get_drvdata(dev);
+ usb_kill_urb(xpad->irq_in);
+}
+
+static int xpad360w_start_input(struct usb_xpad *xpad)
+{
+ int error;
- if (xpad->xtype != XTYPE_XBOX360W)
+ error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+ if (error)
+ return -EIO;
+
+ /*
+ * Send presence packet.
+ * This will force the controller to resend connection packets.
+ * This is useful in the case we activate the module after the
+ * adapter has been plugged in, as it won't automatically
+ * send us info about the controllers.
+ */
+ error = xpad_inquiry_pad_presence(xpad);
+ if (error) {
usb_kill_urb(xpad->irq_in);
+ return error;
+ }
- xpad_stop_output(xpad);
+ return 0;
+}
+
+static void xpad360w_stop_input(struct usb_xpad *xpad)
+{
+ usb_kill_urb(xpad->irq_in);
+
+ /* Make sure we are done with presence work if it was scheduled */
+ flush_work(&xpad->work);
+}
+
+static int xpad_open(struct input_dev *dev)
+{
+ struct usb_xpad *xpad = input_get_drvdata(dev);
+
+ return xpad_start_input(xpad);
+}
+
+static void xpad_close(struct input_dev *dev)
+{
+ struct usb_xpad *xpad = input_get_drvdata(dev);
+
+ xpad_stop_input(xpad);
}
static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs)
static void xpad_deinit_input(struct usb_xpad *xpad)
{
- xpad_led_disconnect(xpad);
- input_unregister_device(xpad->dev);
+ if (xpad->input_created) {
+ xpad->input_created = false;
+ xpad_led_disconnect(xpad);
+ input_unregister_device(xpad->dev);
+ }
}
static int xpad_init_input(struct usb_xpad *xpad)
input_set_drvdata(input_dev, xpad);
- input_dev->open = xpad_open;
- input_dev->close = xpad_close;
+ if (xpad->xtype != XTYPE_XBOX360W) {
+ input_dev->open = xpad_open;
+ input_dev->close = xpad_close;
+ }
__set_bit(EV_KEY, input_dev->evbit);
if (error)
goto err_disconnect_led;
+ xpad->input_created = true;
return 0;
err_disconnect_led:
xpad->mapping = xpad_device[i].mapping;
xpad->xtype = xpad_device[i].xtype;
xpad->name = xpad_device[i].name;
+ INIT_WORK(&xpad->work, xpad_presence_work);
if (xpad->xtype == XTYPE_UNKNOWN) {
if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) {
usb_set_intfdata(intf, xpad);
- error = xpad_init_input(xpad);
- if (error)
- goto err_deinit_output;
-
if (xpad->xtype == XTYPE_XBOX360W) {
/*
* Submit the int URB immediately rather than waiting for open
* exactly the message that a controller has arrived that
* we're waiting for.
*/
- xpad->irq_in->dev = xpad->udev;
- error = usb_submit_urb(xpad->irq_in, GFP_KERNEL);
+ error = xpad360w_start_input(xpad);
if (error)
- goto err_deinit_input;
-
+ goto err_deinit_output;
/*
- * Send presence packet.
- * This will force the controller to resend connection packets.
- * This is useful in the case we activate the module after the
- * adapter has been plugged in, as it won't automatically
- * send us info about the controllers.
+ * Wireless controllers require RESET_RESUME to work properly
+ * after suspend. Ideally this quirk should be in usb core
+ * quirk list, but we have too many vendors producing these
+ * controllers and we'd need to maintain 2 identical lists
+ * here in this driver and in usb core.
*/
- error = xpad_inquiry_pad_presence(xpad);
+ udev->quirks |= USB_QUIRK_RESET_RESUME;
+ } else {
+ error = xpad_init_input(xpad);
if (error)
- goto err_kill_in_urb;
+ goto err_deinit_output;
}
return 0;
-err_kill_in_urb:
- usb_kill_urb(xpad->irq_in);
-err_deinit_input:
- xpad_deinit_input(xpad);
err_deinit_output:
xpad_deinit_output(xpad);
err_free_in_urb:
err_free_mem:
kfree(xpad);
return error;
-
}
static void xpad_disconnect(struct usb_interface *intf)
{
- struct usb_xpad *xpad = usb_get_intfdata (intf);
+ struct usb_xpad *xpad = usb_get_intfdata(intf);
+
+ if (xpad->xtype == XTYPE_XBOX360W)
+ xpad360w_stop_input(xpad);
xpad_deinit_input(xpad);
- xpad_deinit_output(xpad);
- if (xpad->xtype == XTYPE_XBOX360W) {
- usb_kill_urb(xpad->irq_in);
- }
+ /*
+ * Now that both input device and LED device are gone we can
+ * stop output URB.
+ */
+ xpad_stop_output(xpad);
+
+ xpad_deinit_output(xpad);
usb_free_urb(xpad->irq_in);
usb_free_coherent(xpad->udev, XPAD_PKT_LEN,
usb_set_intfdata(intf, NULL);
}
+static int xpad_suspend(struct usb_interface *intf, pm_message_t message)
+{
+ struct usb_xpad *xpad = usb_get_intfdata(intf);
+ struct input_dev *input = xpad->dev;
+
+ if (xpad->xtype == XTYPE_XBOX360W) {
+ /*
+ * Wireless controllers always listen to input so
+ * they are notified when controller shows up
+ * or goes away.
+ */
+ xpad360w_stop_input(xpad);
+ } else {
+ mutex_lock(&input->mutex);
+ if (input->users)
+ xpad_stop_input(xpad);
+ mutex_unlock(&input->mutex);
+ }
+
+ xpad_stop_output(xpad);
+
+ return 0;
+}
+
+static int xpad_resume(struct usb_interface *intf)
+{
+ struct usb_xpad *xpad = usb_get_intfdata(intf);
+ struct input_dev *input = xpad->dev;
+ int retval = 0;
+
+ if (xpad->xtype == XTYPE_XBOX360W) {
+ retval = xpad360w_start_input(xpad);
+ } else {
+ mutex_lock(&input->mutex);
+ if (input->users)
+ retval = xpad_start_input(xpad);
+ mutex_unlock(&input->mutex);
+ }
+
+ return retval;
+}
+
static struct usb_driver xpad_driver = {
.name = "xpad",
.probe = xpad_probe,
.disconnect = xpad_disconnect,
+ .suspend = xpad_suspend,
+ .resume = xpad_resume,
+ .reset_resume = xpad_resume,
.id_table = xpad_table,
};
if (!node)
return ERR_PTR(-ENODEV);
- nbuttons = of_get_child_count(node);
+ nbuttons = of_get_available_child_count(node);
if (nbuttons == 0)
return ERR_PTR(-ENODEV);
pdata->rep = !!of_get_property(node, "autorepeat", NULL);
+ of_property_read_string(node, "label", &pdata->name);
+
i = 0;
- for_each_child_of_node(node, pp) {
+ for_each_available_child_of_node(node, pp) {
enum of_gpio_flags flags;
button = &pdata->buttons[i++];
#define MXT_T9_DETECT (1 << 7)
struct t9_range {
- u16 x;
- u16 y;
+ __le16 x;
+ __le16 y;
} __packed;
/* MXT_TOUCH_MULTI_T9 orient */
unsigned int irq;
unsigned int max_x;
unsigned int max_y;
+ bool xy_switch;
bool in_bootloader;
u16 mem_size;
u8 t100_aux_ampl;
if (error)
return error;
- le16_to_cpus(&range.x);
- le16_to_cpus(&range.y);
+ data->max_x = get_unaligned_le16(&range.x);
+ data->max_y = get_unaligned_le16(&range.y);
error = __mxt_read_reg(client,
object->start_address + MXT_T9_ORIENT,
if (error)
return error;
- /* Handle default values */
- if (range.x == 0)
- range.x = 1023;
-
- if (range.y == 0)
- range.y = 1023;
-
- if (orient & MXT_T9_ORIENT_SWITCH) {
- data->max_x = range.y;
- data->max_y = range.x;
- } else {
- data->max_x = range.x;
- data->max_y = range.y;
- }
-
- dev_dbg(&client->dev,
- "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+ data->xy_switch = orient & MXT_T9_ORIENT_SWITCH;
return 0;
}
if (!object)
return -EINVAL;
+ /* read touchscreen dimensions */
error = __mxt_read_reg(client,
object->start_address + MXT_T100_XRANGE,
sizeof(range_x), &range_x);
if (error)
return error;
- le16_to_cpus(&range_x);
+ data->max_x = get_unaligned_le16(&range_x);
error = __mxt_read_reg(client,
object->start_address + MXT_T100_YRANGE,
if (error)
return error;
- le16_to_cpus(&range_y);
+ data->max_y = get_unaligned_le16(&range_y);
+ /* read orientation config */
error = __mxt_read_reg(client,
object->start_address + MXT_T100_CFG1,
1, &cfg);
if (error)
return error;
+ data->xy_switch = cfg & MXT_T100_CFG_SWITCHXY;
+
+ /* allocate aux bytes */
error = __mxt_read_reg(client,
object->start_address + MXT_T100_TCHAUX,
1, &tchaux);
if (error)
return error;
- /* Handle default values */
- if (range_x == 0)
- range_x = 1023;
-
- if (range_y == 0)
- range_y = 1023;
-
- if (cfg & MXT_T100_CFG_SWITCHXY) {
- data->max_x = range_y;
- data->max_y = range_x;
- } else {
- data->max_x = range_x;
- data->max_y = range_y;
- }
-
- /* allocate aux bytes */
aux = 6;
if (tchaux & MXT_T100_TCHAUX_VECT)
"T100 aux mappings vect:%u ampl:%u area:%u\n",
data->t100_aux_vect, data->t100_aux_ampl, data->t100_aux_area);
- dev_info(&client->dev,
- "T100 Touchscreen size X%uY%u\n", data->max_x, data->max_y);
-
return 0;
}
return -EINVAL;
}
+ /* Handle default values and orientation switch */
+ if (data->max_x == 0)
+ data->max_x = 1023;
+
+ if (data->max_y == 0)
+ data->max_y = 1023;
+
+ if (data->xy_switch)
+ swap(data->max_x, data->max_y);
+
+ dev_info(dev, "Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+
+ /* Register input device */
input_dev = input_allocate_device();
if (!input_dev) {
dev_err(dev, "Failed to allocate memory\n");
# Makefile for Open-Channel SSDs.
#
-obj-$(CONFIG_NVM) := core.o
+obj-$(CONFIG_NVM) := core.o sysblk.o
obj-$(CONFIG_NVM_GENNVM) += gennvm.o
obj-$(CONFIG_NVM_RRPC) += rrpc.o
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/lightnvm.h>
+#include <linux/sched/sysctl.h>
#include <uapi/linux/lightnvm.h>
static LIST_HEAD(nvm_targets);
lockdep_assert_held(&nvm_lock);
list_for_each_entry(mt, &nvm_mgrs, list) {
+ if (strncmp(dev->sb.mmtype, mt->name, NVM_MMTYPE_LEN))
+ continue;
+
ret = mt->register_mgr(dev);
if (ret < 0) {
pr_err("nvm: media mgr failed to init (%d) on dev %s\n",
return NULL;
}
+struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *dev, struct nvm_lun *lun,
+ unsigned long flags)
+{
+ return dev->mt->get_blk_unlocked(dev, lun, flags);
+}
+EXPORT_SYMBOL(nvm_get_blk_unlocked);
+
+/* Assumes that all valid pages have already been moved on release to bm */
+void nvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk)
+{
+ return dev->mt->put_blk_unlocked(dev, blk);
+}
+EXPORT_SYMBOL(nvm_put_blk_unlocked);
+
struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun,
unsigned long flags)
{
}
EXPORT_SYMBOL(nvm_erase_blk);
+void nvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+ int i;
+
+ if (rqd->nr_pages > 1) {
+ for (i = 0; i < rqd->nr_pages; i++)
+ rqd->ppa_list[i] = dev_to_generic_addr(dev,
+ rqd->ppa_list[i]);
+ } else {
+ rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
+ }
+}
+EXPORT_SYMBOL(nvm_addr_to_generic_mode);
+
+void nvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+ int i;
+
+ if (rqd->nr_pages > 1) {
+ for (i = 0; i < rqd->nr_pages; i++)
+ rqd->ppa_list[i] = generic_to_dev_addr(dev,
+ rqd->ppa_list[i]);
+ } else {
+ rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
+ }
+}
+EXPORT_SYMBOL(nvm_generic_to_addr_mode);
+
+int nvm_set_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd,
+ struct ppa_addr *ppas, int nr_ppas)
+{
+ int i, plane_cnt, pl_idx;
+
+ if (dev->plane_mode == NVM_PLANE_SINGLE && nr_ppas == 1) {
+ rqd->nr_pages = 1;
+ rqd->ppa_addr = ppas[0];
+
+ return 0;
+ }
+
+ plane_cnt = (1 << dev->plane_mode);
+ rqd->nr_pages = plane_cnt * nr_ppas;
+
+ if (dev->ops->max_phys_sect < rqd->nr_pages)
+ return -EINVAL;
+
+ rqd->ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, &rqd->dma_ppa_list);
+ if (!rqd->ppa_list) {
+ pr_err("nvm: failed to allocate dma memory\n");
+ return -ENOMEM;
+ }
+
+ for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
+ for (i = 0; i < nr_ppas; i++) {
+ ppas[i].g.pl = pl_idx;
+ rqd->ppa_list[(pl_idx * nr_ppas) + i] = ppas[i];
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(nvm_set_rqd_ppalist);
+
+void nvm_free_rqd_ppalist(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+ if (!rqd->ppa_list)
+ return;
+
+ nvm_dev_dma_free(dev, rqd->ppa_list, rqd->dma_ppa_list);
+}
+EXPORT_SYMBOL(nvm_free_rqd_ppalist);
+
+int nvm_erase_ppa(struct nvm_dev *dev, struct ppa_addr *ppas, int nr_ppas)
+{
+ struct nvm_rq rqd;
+ int ret;
+
+ if (!dev->ops->erase_block)
+ return 0;
+
+ memset(&rqd, 0, sizeof(struct nvm_rq));
+
+ ret = nvm_set_rqd_ppalist(dev, &rqd, ppas, nr_ppas);
+ if (ret)
+ return ret;
+
+ nvm_generic_to_addr_mode(dev, &rqd);
+
+ ret = dev->ops->erase_block(dev, &rqd);
+
+ nvm_free_rqd_ppalist(dev, &rqd);
+
+ return ret;
+}
+EXPORT_SYMBOL(nvm_erase_ppa);
+
+void nvm_end_io(struct nvm_rq *rqd, int error)
+{
+ rqd->error = error;
+ rqd->end_io(rqd);
+}
+EXPORT_SYMBOL(nvm_end_io);
+
+static void nvm_end_io_sync(struct nvm_rq *rqd)
+{
+ struct completion *waiting = rqd->wait;
+
+ rqd->wait = NULL;
+
+ complete(waiting);
+}
+
+int nvm_submit_ppa(struct nvm_dev *dev, struct ppa_addr *ppa, int nr_ppas,
+ int opcode, int flags, void *buf, int len)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct nvm_rq rqd;
+ struct bio *bio;
+ int ret;
+ unsigned long hang_check;
+
+ bio = bio_map_kern(dev->q, buf, len, GFP_KERNEL);
+ if (IS_ERR_OR_NULL(bio))
+ return -ENOMEM;
+
+ memset(&rqd, 0, sizeof(struct nvm_rq));
+ ret = nvm_set_rqd_ppalist(dev, &rqd, ppa, nr_ppas);
+ if (ret) {
+ bio_put(bio);
+ return ret;
+ }
+
+ rqd.opcode = opcode;
+ rqd.bio = bio;
+ rqd.wait = &wait;
+ rqd.dev = dev;
+ rqd.end_io = nvm_end_io_sync;
+ rqd.flags = flags;
+ nvm_generic_to_addr_mode(dev, &rqd);
+
+ ret = dev->ops->submit_io(dev, &rqd);
+
+ /* Prevent hang_check timer from firing at us during very long I/O */
+ hang_check = sysctl_hung_task_timeout_secs;
+ if (hang_check)
+ while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
+ else
+ wait_for_completion_io(&wait);
+
+ nvm_free_rqd_ppalist(dev, &rqd);
+
+ return rqd.error;
+}
+EXPORT_SYMBOL(nvm_submit_ppa);
+
+static int nvm_init_slc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
+{
+ int i;
+
+ dev->lps_per_blk = dev->pgs_per_blk;
+ dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL);
+ if (!dev->lptbl)
+ return -ENOMEM;
+
+ /* Just a linear array */
+ for (i = 0; i < dev->lps_per_blk; i++)
+ dev->lptbl[i] = i;
+
+ return 0;
+}
+
+static int nvm_init_mlc_tbl(struct nvm_dev *dev, struct nvm_id_group *grp)
+{
+ int i, p;
+ struct nvm_id_lp_mlc *mlc = &grp->lptbl.mlc;
+
+ if (!mlc->num_pairs)
+ return 0;
+
+ dev->lps_per_blk = mlc->num_pairs;
+ dev->lptbl = kcalloc(dev->lps_per_blk, sizeof(int), GFP_KERNEL);
+ if (!dev->lptbl)
+ return -ENOMEM;
+
+ /* The lower page table encoding consists of a list of bytes, where each
+ * has a lower and an upper half. The first half byte maintains the
+ * increment value and every value after is an offset added to the
+ * previous incrementation value */
+ dev->lptbl[0] = mlc->pairs[0] & 0xF;
+ for (i = 1; i < dev->lps_per_blk; i++) {
+ p = mlc->pairs[i >> 1];
+ if (i & 0x1) /* upper */
+ dev->lptbl[i] = dev->lptbl[i - 1] + ((p & 0xF0) >> 4);
+ else /* lower */
+ dev->lptbl[i] = dev->lptbl[i - 1] + (p & 0xF);
+ }
+
+ return 0;
+}
+
static int nvm_core_init(struct nvm_dev *dev)
{
struct nvm_id *id = &dev->identity;
dev->sec_size = grp->csecs;
dev->oob_size = grp->sos;
dev->sec_per_pg = grp->fpg_sz / grp->csecs;
+ dev->mccap = grp->mccap;
memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format));
dev->plane_mode = NVM_PLANE_SINGLE;
return -EINVAL;
}
- if (grp->fmtype != 0 && grp->fmtype != 1) {
+ switch (grp->fmtype) {
+ case NVM_ID_FMTYPE_SLC:
+ if (nvm_init_slc_tbl(dev, grp))
+ return -ENOMEM;
+ break;
+ case NVM_ID_FMTYPE_MLC:
+ if (nvm_init_mlc_tbl(dev, grp))
+ return -ENOMEM;
+ break;
+ default:
pr_err("nvm: flash type not supported\n");
return -EINVAL;
}
+ if (!dev->lps_per_blk)
+ pr_info("nvm: lower page programming table missing\n");
+
if (grp->mpos & 0x020202)
dev->plane_mode = NVM_PLANE_DOUBLE;
if (grp->mpos & 0x040404)
dev->nr_chnls;
dev->total_pages = dev->total_blocks * dev->pgs_per_blk;
INIT_LIST_HEAD(&dev->online_targets);
+ mutex_init(&dev->mlock);
return 0;
}
if (dev->mt)
dev->mt->unregister_mgr(dev);
+
+ kfree(dev->lptbl);
}
static int nvm_init(struct nvm_dev *dev)
}
}
+ ret = nvm_get_sysblock(dev, &dev->sb);
+ if (!ret)
+ pr_err("nvm: device not initialized.\n");
+ else if (ret < 0)
+ pr_err("nvm: err (%d) on device initialization\n", ret);
+
/* register device with a supported media manager */
down_write(&nvm_lock);
- dev->mt = nvm_init_mgr(dev);
+ if (ret > 0)
+ dev->mt = nvm_init_mgr(dev);
list_add(&dev->devices, &nvm_devices);
up_write(&nvm_lock);
return __nvm_configure_remove(&remove);
}
+static void nvm_setup_nvm_sb_info(struct nvm_sb_info *info)
+{
+ info->seqnr = 1;
+ info->erase_cnt = 0;
+ info->version = 1;
+}
+
+static long __nvm_ioctl_dev_init(struct nvm_ioctl_dev_init *init)
+{
+ struct nvm_dev *dev;
+ struct nvm_sb_info info;
+ int ret;
+
+ down_write(&nvm_lock);
+ dev = nvm_find_nvm_dev(init->dev);
+ up_write(&nvm_lock);
+ if (!dev) {
+ pr_err("nvm: device not found\n");
+ return -EINVAL;
+ }
+
+ nvm_setup_nvm_sb_info(&info);
+
+ strncpy(info.mmtype, init->mmtype, NVM_MMTYPE_LEN);
+ info.fs_ppa.ppa = -1;
+
+ ret = nvm_init_sysblock(dev, &info);
+ if (ret)
+ return ret;
+
+ memcpy(&dev->sb, &info, sizeof(struct nvm_sb_info));
+
+ down_write(&nvm_lock);
+ dev->mt = nvm_init_mgr(dev);
+ up_write(&nvm_lock);
+
+ return 0;
+}
+
+static long nvm_ioctl_dev_init(struct file *file, void __user *arg)
+{
+ struct nvm_ioctl_dev_init init;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&init, arg, sizeof(struct nvm_ioctl_dev_init)))
+ return -EFAULT;
+
+ if (init.flags != 0) {
+ pr_err("nvm: no flags supported\n");
+ return -EINVAL;
+ }
+
+ init.dev[DISK_NAME_LEN - 1] = '\0';
+
+ return __nvm_ioctl_dev_init(&init);
+}
+
+static long nvm_ioctl_dev_factory(struct file *file, void __user *arg)
+{
+ struct nvm_ioctl_dev_factory fact;
+ struct nvm_dev *dev;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&fact, arg, sizeof(struct nvm_ioctl_dev_factory)))
+ return -EFAULT;
+
+ fact.dev[DISK_NAME_LEN - 1] = '\0';
+
+ if (fact.flags & ~(NVM_FACTORY_NR_BITS - 1))
+ return -EINVAL;
+
+ down_write(&nvm_lock);
+ dev = nvm_find_nvm_dev(fact.dev);
+ up_write(&nvm_lock);
+ if (!dev) {
+ pr_err("nvm: device not found\n");
+ return -EINVAL;
+ }
+
+ if (dev->mt) {
+ dev->mt->unregister_mgr(dev);
+ dev->mt = NULL;
+ }
+
+ return nvm_dev_factory(dev, fact.flags);
+}
+
static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
return nvm_ioctl_dev_create(file, argp);
case NVM_DEV_REMOVE:
return nvm_ioctl_dev_remove(file, argp);
+ case NVM_DEV_INIT:
+ return nvm_ioctl_dev_init(file, argp);
+ case NVM_DEV_FACTORY:
+ return nvm_ioctl_dev_factory(file, argp);
}
return 0;
}
lun->vlun.lun_id = i % dev->luns_per_chnl;
lun->vlun.chnl_id = i / dev->luns_per_chnl;
lun->vlun.nr_free_blocks = dev->blks_per_lun;
- lun->vlun.nr_inuse_blocks = 0;
+ lun->vlun.nr_open_blocks = 0;
+ lun->vlun.nr_closed_blocks = 0;
lun->vlun.nr_bad_blocks = 0;
}
return 0;
list_move_tail(&blk->list, &lun->bb_list);
lun->vlun.nr_bad_blocks++;
+ lun->vlun.nr_free_blocks--;
}
return 0;
pba = pba - (dev->sec_per_lun * lun_id);
blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)];
- if (!blk->type) {
+ if (!blk->state) {
/* at this point, we don't know anything about the
* block. It's up to the FTL on top to re-etablish the
- * block state
+ * block state. The block is assumed to be open.
*/
list_move_tail(&blk->list, &lun->used_list);
- blk->type = 1;
+ blk->state = NVM_BLK_ST_OPEN;
lun->vlun.nr_free_blocks--;
- lun->vlun.nr_inuse_blocks++;
+ lun->vlun.nr_open_blocks++;
}
}
module_put(THIS_MODULE);
}
-static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
+static struct nvm_block *gennvm_get_blk_unlocked(struct nvm_dev *dev,
struct nvm_lun *vlun, unsigned long flags)
{
struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
struct nvm_block *blk = NULL;
int is_gc = flags & NVM_IOTYPE_GC;
- spin_lock(&vlun->lock);
+ assert_spin_locked(&vlun->lock);
if (list_empty(&lun->free_list)) {
pr_err_ratelimited("gennvm: lun %u have no free pages available",
blk = list_first_entry(&lun->free_list, struct nvm_block, list);
list_move_tail(&blk->list, &lun->used_list);
- blk->type = 1;
+ blk->state = NVM_BLK_ST_OPEN;
lun->vlun.nr_free_blocks--;
- lun->vlun.nr_inuse_blocks++;
+ lun->vlun.nr_open_blocks++;
out:
+ return blk;
+}
+
+static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev,
+ struct nvm_lun *vlun, unsigned long flags)
+{
+ struct nvm_block *blk;
+
+ spin_lock(&vlun->lock);
+ blk = gennvm_get_blk_unlocked(dev, vlun, flags);
spin_unlock(&vlun->lock);
return blk;
}
-static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
+static void gennvm_put_blk_unlocked(struct nvm_dev *dev, struct nvm_block *blk)
{
struct nvm_lun *vlun = blk->lun;
struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun);
- spin_lock(&vlun->lock);
+ assert_spin_locked(&vlun->lock);
- switch (blk->type) {
- case 1:
+ if (blk->state & NVM_BLK_ST_OPEN) {
list_move_tail(&blk->list, &lun->free_list);
+ lun->vlun.nr_open_blocks--;
lun->vlun.nr_free_blocks++;
- lun->vlun.nr_inuse_blocks--;
- blk->type = 0;
- break;
- case 2:
+ blk->state = NVM_BLK_ST_FREE;
+ } else if (blk->state & NVM_BLK_ST_CLOSED) {
+ list_move_tail(&blk->list, &lun->free_list);
+ lun->vlun.nr_closed_blocks--;
+ lun->vlun.nr_free_blocks++;
+ blk->state = NVM_BLK_ST_FREE;
+ } else if (blk->state & NVM_BLK_ST_BAD) {
list_move_tail(&blk->list, &lun->bb_list);
lun->vlun.nr_bad_blocks++;
- lun->vlun.nr_inuse_blocks--;
- break;
- default:
+ blk->state = NVM_BLK_ST_BAD;
+ } else {
WARN_ON_ONCE(1);
pr_err("gennvm: erroneous block type (%lu -> %u)\n",
- blk->id, blk->type);
+ blk->id, blk->state);
list_move_tail(&blk->list, &lun->bb_list);
lun->vlun.nr_bad_blocks++;
- lun->vlun.nr_inuse_blocks--;
- }
-
- spin_unlock(&vlun->lock);
-}
-
-static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
- int i;
-
- if (rqd->nr_pages > 1) {
- for (i = 0; i < rqd->nr_pages; i++)
- rqd->ppa_list[i] = dev_to_generic_addr(dev,
- rqd->ppa_list[i]);
- } else {
- rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr);
+ blk->state = NVM_BLK_ST_BAD;
}
}
-static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
- int i;
-
- if (rqd->nr_pages > 1) {
- for (i = 0; i < rqd->nr_pages; i++)
- rqd->ppa_list[i] = generic_to_dev_addr(dev,
- rqd->ppa_list[i]);
- } else {
- rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr);
- }
-}
-
-static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk)
{
- if (!dev->ops->submit_io)
- return 0;
-
- /* Convert address space */
- gennvm_generic_to_addr_mode(dev, rqd);
+ struct nvm_lun *vlun = blk->lun;
- rqd->dev = dev;
- return dev->ops->submit_io(dev, rqd);
+ spin_lock(&vlun->lock);
+ gennvm_put_blk_unlocked(dev, blk);
+ spin_unlock(&vlun->lock);
}
static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa,
blk = &lun->vlun.blocks[ppa->g.blk];
/* will be moved to bb list on put_blk from target */
- blk->type = type;
+ blk->state = type;
}
/* mark block bad. It is expected the target recover from the error. */
if (dev->ops->set_bb_tbl(dev, rqd, 1))
return;
- gennvm_addr_to_generic_mode(dev, rqd);
+ nvm_addr_to_generic_mode(dev, rqd);
/* look up blocks and mark them as bad */
if (rqd->nr_pages > 1)
for (i = 0; i < rqd->nr_pages; i++)
- gennvm_blk_set_type(dev, &rqd->ppa_list[i], 2);
+ gennvm_blk_set_type(dev, &rqd->ppa_list[i],
+ NVM_BLK_ST_BAD);
else
- gennvm_blk_set_type(dev, &rqd->ppa_addr, 2);
+ gennvm_blk_set_type(dev, &rqd->ppa_addr, NVM_BLK_ST_BAD);
}
-static int gennvm_end_io(struct nvm_rq *rqd, int error)
+static void gennvm_end_io(struct nvm_rq *rqd)
{
struct nvm_tgt_instance *ins = rqd->ins;
- int ret = 0;
- switch (error) {
+ switch (rqd->error) {
case NVM_RSP_SUCCESS:
- break;
case NVM_RSP_ERR_EMPTYPAGE:
break;
case NVM_RSP_ERR_FAILWRITE:
gennvm_mark_blk_bad(rqd->dev, rqd);
- default:
- ret++;
}
- ret += ins->tt->end_io(rqd, error);
-
- return ret;
+ ins->tt->end_io(rqd);
}
-static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
- unsigned long flags)
+static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
{
- int plane_cnt = 0, pl_idx, ret;
- struct ppa_addr addr;
- struct nvm_rq rqd;
-
- if (!dev->ops->erase_block)
- return 0;
-
- addr = block_to_ppa(dev, blk);
-
- if (dev->plane_mode == NVM_PLANE_SINGLE) {
- rqd.nr_pages = 1;
- rqd.ppa_addr = addr;
- } else {
- plane_cnt = (1 << dev->plane_mode);
- rqd.nr_pages = plane_cnt;
-
- rqd.ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL,
- &rqd.dma_ppa_list);
- if (!rqd.ppa_list) {
- pr_err("gennvm: failed to allocate dma memory\n");
- return -ENOMEM;
- }
-
- for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) {
- addr.g.pl = pl_idx;
- rqd.ppa_list[pl_idx] = addr;
- }
- }
+ if (!dev->ops->submit_io)
+ return -ENODEV;
- gennvm_generic_to_addr_mode(dev, &rqd);
+ /* Convert address space */
+ nvm_generic_to_addr_mode(dev, rqd);
- ret = dev->ops->erase_block(dev, &rqd);
+ rqd->dev = dev;
+ rqd->end_io = gennvm_end_io;
+ return dev->ops->submit_io(dev, rqd);
+}
- if (plane_cnt)
- nvm_dev_dma_free(dev, rqd.ppa_list, rqd.dma_ppa_list);
+static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk,
+ unsigned long flags)
+{
+ struct ppa_addr addr = block_to_ppa(dev, blk);
- return ret;
+ return nvm_erase_ppa(dev, &addr, 1);
}
static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid)
gennvm_for_each_lun(gn, lun, i) {
spin_lock(&lun->vlun.lock);
- pr_info("%s: lun%8u\t%u\t%u\t%u\n",
+ pr_info("%s: lun%8u\t%u\t%u\t%u\t%u\n",
dev->name, i,
lun->vlun.nr_free_blocks,
- lun->vlun.nr_inuse_blocks,
+ lun->vlun.nr_open_blocks,
+ lun->vlun.nr_closed_blocks,
lun->vlun.nr_bad_blocks);
spin_unlock(&lun->vlun.lock);
}
static struct nvmm_type gennvm = {
- .name = "gennvm",
- .version = {0, 1, 0},
+ .name = "gennvm",
+ .version = {0, 1, 0},
+
+ .register_mgr = gennvm_register,
+ .unregister_mgr = gennvm_unregister,
- .register_mgr = gennvm_register,
- .unregister_mgr = gennvm_unregister,
+ .get_blk_unlocked = gennvm_get_blk_unlocked,
+ .put_blk_unlocked = gennvm_put_blk_unlocked,
- .get_blk = gennvm_get_blk,
- .put_blk = gennvm_put_blk,
+ .get_blk = gennvm_get_blk,
+ .put_blk = gennvm_put_blk,
- .submit_io = gennvm_submit_io,
- .end_io = gennvm_end_io,
- .erase_blk = gennvm_erase_blk,
+ .submit_io = gennvm_submit_io,
+ .erase_blk = gennvm_erase_blk,
- .get_lun = gennvm_get_lun,
- .lun_info_print = gennvm_lun_info_print,
+ .get_lun = gennvm_get_lun,
+ .lun_info_print = gennvm_lun_info_print,
};
static int __init gennvm_module_init(void)
static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
unsigned long flags)
{
+ struct nvm_lun *lun = rlun->parent;
struct nvm_block *blk;
struct rrpc_block *rblk;
- blk = nvm_get_blk(rrpc->dev, rlun->parent, flags);
- if (!blk)
+ spin_lock(&lun->lock);
+ blk = nvm_get_blk_unlocked(rrpc->dev, rlun->parent, flags);
+ if (!blk) {
+ pr_err("nvm: rrpc: cannot get new block from media manager\n");
+ spin_unlock(&lun->lock);
return NULL;
+ }
rblk = &rlun->blocks[blk->id];
- blk->priv = rblk;
+ list_add_tail(&rblk->list, &rlun->open_list);
+ spin_unlock(&lun->lock);
+ blk->priv = rblk;
bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk);
rblk->next_page = 0;
rblk->nr_invalid_pages = 0;
static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
{
- nvm_put_blk(rrpc->dev, rblk->parent);
+ struct rrpc_lun *rlun = rblk->rlun;
+ struct nvm_lun *lun = rlun->parent;
+
+ spin_lock(&lun->lock);
+ nvm_put_blk_unlocked(rrpc->dev, rblk->parent);
+ list_del(&rblk->list);
+ spin_unlock(&lun->lock);
}
static void rrpc_put_blks(struct rrpc *rrpc)
}
page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
+ if (!page)
+ return -ENOMEM;
while ((slot = find_first_zero_bit(rblk->invalid_pages,
nr_pgs_per_blk)) < nr_pgs_per_blk) {
goto finished;
}
wait_for_completion_io(&wait);
+ if (bio->bi_error) {
+ rrpc_inflight_laddr_release(rrpc, rqd);
+ goto finished;
+ }
bio_reset(bio);
reinit_completion(&wait);
wait_for_completion_io(&wait);
rrpc_inflight_laddr_release(rrpc, rqd);
+ if (bio->bi_error)
+ goto finished;
bio_reset(bio);
}
struct rrpc *rrpc = gcb->rrpc;
struct rrpc_block *rblk = gcb->rblk;
struct nvm_dev *dev = rrpc->dev;
+ struct nvm_lun *lun = rblk->parent->lun;
+ struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
+ mempool_free(gcb, rrpc->gcb_pool);
pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
if (rrpc_move_valid_pages(rrpc, rblk))
- goto done;
+ goto put_back;
+
+ if (nvm_erase_blk(dev, rblk->parent))
+ goto put_back;
- nvm_erase_blk(dev, rblk->parent);
rrpc_put_blk(rrpc, rblk);
-done:
- mempool_free(gcb, rrpc->gcb_pool);
+
+ return;
+
+put_back:
+ spin_lock(&rlun->lock);
+ list_add_tail(&rblk->prio, &rlun->prio_list);
+ spin_unlock(&rlun->lock);
}
/* the block with highest number of invalid pages, will be in the beginning
if (nr_blocks_need < rrpc->nr_luns)
nr_blocks_need = rrpc->nr_luns;
- spin_lock(&lun->lock);
+ spin_lock(&rlun->lock);
while (nr_blocks_need > lun->nr_free_blocks &&
!list_empty(&rlun->prio_list)) {
struct rrpc_block *rblock = block_prio_find_max(rlun);
if (!rblock->nr_invalid_pages)
break;
+ gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
+ if (!gcb)
+ break;
+
list_del_init(&rblock->prio);
BUG_ON(!block_is_full(rrpc, rblock));
pr_debug("rrpc: selected block '%lu' for GC\n", block->id);
- gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
- if (!gcb)
- break;
-
gcb->rrpc = rrpc;
gcb->rblk = rblock;
INIT_WORK(&gcb->ws_gc, rrpc_block_gc);
nr_blocks_need--;
}
- spin_unlock(&lun->lock);
+ spin_unlock(&rlun->lock);
/* TODO: Hint that request queue can be started again */
}
lun = rblk->parent->lun;
cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
- if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
+ if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) {
+ struct nvm_block *blk = rblk->parent;
+ struct rrpc_lun *rlun = rblk->rlun;
+
+ spin_lock(&lun->lock);
+ lun->nr_open_blocks--;
+ lun->nr_closed_blocks++;
+ blk->state &= ~NVM_BLK_ST_OPEN;
+ blk->state |= NVM_BLK_ST_CLOSED;
+ list_move_tail(&rblk->list, &rlun->closed_list);
+ spin_unlock(&lun->lock);
+
rrpc_run_gc(rrpc, rblk);
+ }
}
}
-static int rrpc_end_io(struct nvm_rq *rqd, int error)
+static void rrpc_end_io(struct nvm_rq *rqd)
{
struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
if (bio_data_dir(rqd->bio) == WRITE)
rrpc_end_io_write(rrpc, rrqd, laddr, npages);
+ bio_put(rqd->bio);
+
if (rrqd->flags & NVM_IOTYPE_GC)
- return 0;
+ return;
rrpc_unlock_rq(rrpc, rqd);
- bio_put(rqd->bio);
if (npages > 1)
nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata);
mempool_free(rqd, rrpc->rq_pool);
-
- return 0;
}
static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
err = nvm_submit_io(rrpc->dev, rqd);
if (err) {
pr_err("rrpc: I/O submission failed: %d\n", err);
+ bio_put(bio);
+ if (!(flags & NVM_IOTYPE_GC)) {
+ rrpc_unlock_rq(rrpc, rqd);
+ if (rqd->nr_pages > 1)
+ nvm_dev_dma_free(rrpc->dev,
+ rqd->ppa_list, rqd->dma_ppa_list);
+ }
return NVM_IO_ERR;
}
struct rrpc_lun *rlun;
int i, j;
+ if (dev->pgs_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
+ pr_err("rrpc: number of pages per block too high.");
+ return -EINVAL;
+ }
+
spin_lock_init(&rrpc->rev_lock);
rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
for (i = 0; i < rrpc->nr_luns; i++) {
struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
- if (dev->pgs_per_blk >
- MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
- pr_err("rrpc: number of pages per block too high.");
- goto err;
- }
-
rlun = &rrpc->luns[i];
rlun->rrpc = rrpc;
rlun->parent = lun;
INIT_LIST_HEAD(&rlun->prio_list);
+ INIT_LIST_HEAD(&rlun->open_list);
+ INIT_LIST_HEAD(&rlun->closed_list);
+
INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
spin_lock_init(&rlun->lock);
struct nvm_block *blk = &lun->blocks[j];
rblk->parent = blk;
+ rblk->rlun = rlun;
INIT_LIST_HEAD(&rblk->prio);
spin_lock_init(&rblk->lock);
}
struct rrpc_block {
struct nvm_block *parent;
+ struct rrpc_lun *rlun;
struct list_head prio;
+ struct list_head list;
#define MAX_INVALID_PAGES_STORAGE 8
/* Bitmap for invalid page intries */
struct nvm_lun *parent;
struct rrpc_block *cur, *gc_cur;
struct rrpc_block *blocks; /* Reference to block allocation */
- struct list_head prio_list; /* Blocks that may be GC'ed */
+
+ struct list_head prio_list; /* Blocks that may be GC'ed */
+ struct list_head open_list; /* In-use open blocks. These are blocks
+ * that can be both written to and read
+ * from
+ */
+ struct list_head closed_list; /* In-use closed blocks. These are
+ * blocks that can _only_ be read from
+ */
+
struct work_struct ws_gc;
spinlock_t lock;
--- /dev/null
+/*
+ * Copyright (C) 2015 Matias Bjorling. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/lightnvm.h>
+
+#define MAX_SYSBLKS 3 /* remember to update mapping scheme on change */
+#define MAX_BLKS_PR_SYSBLK 2 /* 2 blks with 256 pages and 3000 erases
+ * enables ~1.5M updates per sysblk unit
+ */
+
+struct sysblk_scan {
+ /* A row is a collection of flash blocks for a system block. */
+ int nr_rows;
+ int row;
+ int act_blk[MAX_SYSBLKS];
+
+ int nr_ppas;
+ struct ppa_addr ppas[MAX_SYSBLKS * MAX_BLKS_PR_SYSBLK];/* all sysblks */
+};
+
+static inline int scan_ppa_idx(int row, int blkid)
+{
+ return (row * MAX_BLKS_PR_SYSBLK) + blkid;
+}
+
+void nvm_sysblk_to_cpu(struct nvm_sb_info *info, struct nvm_system_block *sb)
+{
+ info->seqnr = be32_to_cpu(sb->seqnr);
+ info->erase_cnt = be32_to_cpu(sb->erase_cnt);
+ info->version = be16_to_cpu(sb->version);
+ strncpy(info->mmtype, sb->mmtype, NVM_MMTYPE_LEN);
+ info->fs_ppa.ppa = be64_to_cpu(sb->fs_ppa);
+}
+
+void nvm_cpu_to_sysblk(struct nvm_system_block *sb, struct nvm_sb_info *info)
+{
+ sb->magic = cpu_to_be32(NVM_SYSBLK_MAGIC);
+ sb->seqnr = cpu_to_be32(info->seqnr);
+ sb->erase_cnt = cpu_to_be32(info->erase_cnt);
+ sb->version = cpu_to_be16(info->version);
+ strncpy(sb->mmtype, info->mmtype, NVM_MMTYPE_LEN);
+ sb->fs_ppa = cpu_to_be64(info->fs_ppa.ppa);
+}
+
+static int nvm_setup_sysblks(struct nvm_dev *dev, struct ppa_addr *sysblk_ppas)
+{
+ int nr_rows = min_t(int, MAX_SYSBLKS, dev->nr_chnls);
+ int i;
+
+ for (i = 0; i < nr_rows; i++)
+ sysblk_ppas[i].ppa = 0;
+
+ /* if possible, place sysblk at first channel, middle channel and last
+ * channel of the device. If not, create only one or two sys blocks
+ */
+ switch (dev->nr_chnls) {
+ case 2:
+ sysblk_ppas[1].g.ch = 1;
+ /* fall-through */
+ case 1:
+ sysblk_ppas[0].g.ch = 0;
+ break;
+ default:
+ sysblk_ppas[0].g.ch = 0;
+ sysblk_ppas[1].g.ch = dev->nr_chnls / 2;
+ sysblk_ppas[2].g.ch = dev->nr_chnls - 1;
+ break;
+ }
+
+ return nr_rows;
+}
+
+void nvm_setup_sysblk_scan(struct nvm_dev *dev, struct sysblk_scan *s,
+ struct ppa_addr *sysblk_ppas)
+{
+ memset(s, 0, sizeof(struct sysblk_scan));
+ s->nr_rows = nvm_setup_sysblks(dev, sysblk_ppas);
+}
+
+static int sysblk_get_host_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+ void *private)
+{
+ struct sysblk_scan *s = private;
+ int i, nr_sysblk = 0;
+
+ for (i = 0; i < nr_blks; i++) {
+ if (blks[i] != NVM_BLK_T_HOST)
+ continue;
+
+ if (s->nr_ppas == MAX_BLKS_PR_SYSBLK * MAX_SYSBLKS) {
+ pr_err("nvm: too many host blks\n");
+ return -EINVAL;
+ }
+
+ ppa.g.blk = i;
+
+ s->ppas[scan_ppa_idx(s->row, nr_sysblk)] = ppa;
+ s->nr_ppas++;
+ nr_sysblk++;
+ }
+
+ return 0;
+}
+
+static int nvm_get_all_sysblks(struct nvm_dev *dev, struct sysblk_scan *s,
+ struct ppa_addr *ppas, nvm_bb_update_fn *fn)
+{
+ struct ppa_addr dppa;
+ int i, ret;
+
+ s->nr_ppas = 0;
+
+ for (i = 0; i < s->nr_rows; i++) {
+ dppa = generic_to_dev_addr(dev, ppas[i]);
+ s->row = i;
+
+ ret = dev->ops->get_bb_tbl(dev, dppa, dev->blks_per_lun, fn, s);
+ if (ret) {
+ pr_err("nvm: failed bb tbl for ppa (%u %u)\n",
+ ppas[i].g.ch,
+ ppas[i].g.blk);
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * scans a block for latest sysblk.
+ * Returns:
+ * 0 - newer sysblk not found. PPA is updated to latest page.
+ * 1 - newer sysblk found and stored in *cur. PPA is updated to
+ * next valid page.
+ * <0- error.
+ */
+static int nvm_scan_block(struct nvm_dev *dev, struct ppa_addr *ppa,
+ struct nvm_system_block *sblk)
+{
+ struct nvm_system_block *cur;
+ int pg, cursz, ret, found = 0;
+
+ /* the full buffer for a flash page is allocated. Only the first of it
+ * contains the system block information
+ */
+ cursz = dev->sec_size * dev->sec_per_pg * dev->nr_planes;
+ cur = kmalloc(cursz, GFP_KERNEL);
+ if (!cur)
+ return -ENOMEM;
+
+ /* perform linear scan through the block */
+ for (pg = 0; pg < dev->lps_per_blk; pg++) {
+ ppa->g.pg = ppa_to_slc(dev, pg);
+
+ ret = nvm_submit_ppa(dev, ppa, 1, NVM_OP_PREAD, NVM_IO_SLC_MODE,
+ cur, cursz);
+ if (ret) {
+ if (ret == NVM_RSP_ERR_EMPTYPAGE) {
+ pr_debug("nvm: sysblk scan empty ppa (%u %u %u %u)\n",
+ ppa->g.ch,
+ ppa->g.lun,
+ ppa->g.blk,
+ ppa->g.pg);
+ break;
+ }
+ pr_err("nvm: read failed (%x) for ppa (%u %u %u %u)",
+ ret,
+ ppa->g.ch,
+ ppa->g.lun,
+ ppa->g.blk,
+ ppa->g.pg);
+ break; /* if we can't read a page, continue to the
+ * next blk
+ */
+ }
+
+ if (be32_to_cpu(cur->magic) != NVM_SYSBLK_MAGIC) {
+ pr_debug("nvm: scan break for ppa (%u %u %u %u)\n",
+ ppa->g.ch,
+ ppa->g.lun,
+ ppa->g.blk,
+ ppa->g.pg);
+ break; /* last valid page already found */
+ }
+
+ if (be32_to_cpu(cur->seqnr) < be32_to_cpu(sblk->seqnr))
+ continue;
+
+ memcpy(sblk, cur, sizeof(struct nvm_system_block));
+ found = 1;
+ }
+
+ kfree(cur);
+
+ return found;
+}
+
+static int nvm_set_bb_tbl(struct nvm_dev *dev, struct sysblk_scan *s, int type)
+{
+ struct nvm_rq rqd;
+ int ret;
+
+ if (s->nr_ppas > dev->ops->max_phys_sect) {
+ pr_err("nvm: unable to update all sysblocks atomically\n");
+ return -EINVAL;
+ }
+
+ memset(&rqd, 0, sizeof(struct nvm_rq));
+
+ nvm_set_rqd_ppalist(dev, &rqd, s->ppas, s->nr_ppas);
+ nvm_generic_to_addr_mode(dev, &rqd);
+
+ ret = dev->ops->set_bb_tbl(dev, &rqd, type);
+ nvm_free_rqd_ppalist(dev, &rqd);
+ if (ret) {
+ pr_err("nvm: sysblk failed bb mark\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int sysblk_get_free_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+ void *private)
+{
+ struct sysblk_scan *s = private;
+ struct ppa_addr *sppa;
+ int i, blkid = 0;
+
+ for (i = 0; i < nr_blks; i++) {
+ if (blks[i] == NVM_BLK_T_HOST)
+ return -EEXIST;
+
+ if (blks[i] != NVM_BLK_T_FREE)
+ continue;
+
+ sppa = &s->ppas[scan_ppa_idx(s->row, blkid)];
+ sppa->g.ch = ppa.g.ch;
+ sppa->g.lun = ppa.g.lun;
+ sppa->g.blk = i;
+ s->nr_ppas++;
+ blkid++;
+
+ pr_debug("nvm: use (%u %u %u) as sysblk\n",
+ sppa->g.ch, sppa->g.lun, sppa->g.blk);
+ if (blkid > MAX_BLKS_PR_SYSBLK - 1)
+ return 0;
+ }
+
+ pr_err("nvm: sysblk failed get sysblk\n");
+ return -EINVAL;
+}
+
+static int nvm_write_and_verify(struct nvm_dev *dev, struct nvm_sb_info *info,
+ struct sysblk_scan *s)
+{
+ struct nvm_system_block nvmsb;
+ void *buf;
+ int i, sect, ret, bufsz;
+ struct ppa_addr *ppas;
+
+ nvm_cpu_to_sysblk(&nvmsb, info);
+
+ /* buffer for flash page */
+ bufsz = dev->sec_size * dev->sec_per_pg * dev->nr_planes;
+ buf = kzalloc(bufsz, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ memcpy(buf, &nvmsb, sizeof(struct nvm_system_block));
+
+ ppas = kcalloc(dev->sec_per_pg, sizeof(struct ppa_addr), GFP_KERNEL);
+ if (!ppas) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /* Write and verify */
+ for (i = 0; i < s->nr_rows; i++) {
+ ppas[0] = s->ppas[scan_ppa_idx(i, s->act_blk[i])];
+
+ pr_debug("nvm: writing sysblk to ppa (%u %u %u %u)\n",
+ ppas[0].g.ch,
+ ppas[0].g.lun,
+ ppas[0].g.blk,
+ ppas[0].g.pg);
+
+ /* Expand to all sectors within a flash page */
+ if (dev->sec_per_pg > 1) {
+ for (sect = 1; sect < dev->sec_per_pg; sect++) {
+ ppas[sect].ppa = ppas[0].ppa;
+ ppas[sect].g.sec = sect;
+ }
+ }
+
+ ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PWRITE,
+ NVM_IO_SLC_MODE, buf, bufsz);
+ if (ret) {
+ pr_err("nvm: sysblk failed program (%u %u %u)\n",
+ ppas[0].g.ch,
+ ppas[0].g.lun,
+ ppas[0].g.blk);
+ break;
+ }
+
+ ret = nvm_submit_ppa(dev, ppas, dev->sec_per_pg, NVM_OP_PREAD,
+ NVM_IO_SLC_MODE, buf, bufsz);
+ if (ret) {
+ pr_err("nvm: sysblk failed read (%u %u %u)\n",
+ ppas[0].g.ch,
+ ppas[0].g.lun,
+ ppas[0].g.blk);
+ break;
+ }
+
+ if (memcmp(buf, &nvmsb, sizeof(struct nvm_system_block))) {
+ pr_err("nvm: sysblk failed verify (%u %u %u)\n",
+ ppas[0].g.ch,
+ ppas[0].g.lun,
+ ppas[0].g.blk);
+ ret = -EINVAL;
+ break;
+ }
+ }
+
+ kfree(ppas);
+err:
+ kfree(buf);
+
+ return ret;
+}
+
+static int nvm_prepare_new_sysblks(struct nvm_dev *dev, struct sysblk_scan *s)
+{
+ int i, ret;
+ unsigned long nxt_blk;
+ struct ppa_addr *ppa;
+
+ for (i = 0; i < s->nr_rows; i++) {
+ nxt_blk = (s->act_blk[i] + 1) % MAX_BLKS_PR_SYSBLK;
+ ppa = &s->ppas[scan_ppa_idx(i, nxt_blk)];
+ ppa->g.pg = ppa_to_slc(dev, 0);
+
+ ret = nvm_erase_ppa(dev, ppa, 1);
+ if (ret)
+ return ret;
+
+ s->act_blk[i] = nxt_blk;
+ }
+
+ return 0;
+}
+
+int nvm_get_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
+{
+ struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+ struct sysblk_scan s;
+ struct nvm_system_block *cur;
+ int i, j, found = 0;
+ int ret = -ENOMEM;
+
+ /*
+ * 1. setup sysblk locations
+ * 2. get bad block list
+ * 3. filter on host-specific (type 3)
+ * 4. iterate through all and find the highest seq nr.
+ * 5. return superblock information
+ */
+
+ if (!dev->ops->get_bb_tbl)
+ return -EINVAL;
+
+ nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+ mutex_lock(&dev->mlock);
+ ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks);
+ if (ret)
+ goto err_sysblk;
+
+ /* no sysblocks initialized */
+ if (!s.nr_ppas)
+ goto err_sysblk;
+
+ cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
+ if (!cur)
+ goto err_sysblk;
+
+ /* find the latest block across all sysblocks */
+ for (i = 0; i < s.nr_rows; i++) {
+ for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
+ struct ppa_addr ppa = s.ppas[scan_ppa_idx(i, j)];
+
+ ret = nvm_scan_block(dev, &ppa, cur);
+ if (ret > 0)
+ found = 1;
+ else if (ret < 0)
+ break;
+ }
+ }
+
+ nvm_sysblk_to_cpu(info, cur);
+
+ kfree(cur);
+err_sysblk:
+ mutex_unlock(&dev->mlock);
+
+ if (found)
+ return 1;
+ return ret;
+}
+
+int nvm_update_sysblock(struct nvm_dev *dev, struct nvm_sb_info *new)
+{
+ /* 1. for each latest superblock
+ * 2. if room
+ * a. write new flash page entry with the updated information
+ * 3. if no room
+ * a. find next available block on lun (linear search)
+ * if none, continue to next lun
+ * if none at all, report error. also report that it wasn't
+ * possible to write to all superblocks.
+ * c. write data to block.
+ */
+ struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+ struct sysblk_scan s;
+ struct nvm_system_block *cur;
+ int i, j, ppaidx, found = 0;
+ int ret = -ENOMEM;
+
+ if (!dev->ops->get_bb_tbl)
+ return -EINVAL;
+
+ nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+ mutex_lock(&dev->mlock);
+ ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_host_blks);
+ if (ret)
+ goto err_sysblk;
+
+ cur = kzalloc(sizeof(struct nvm_system_block), GFP_KERNEL);
+ if (!cur)
+ goto err_sysblk;
+
+ /* Get the latest sysblk for each sysblk row */
+ for (i = 0; i < s.nr_rows; i++) {
+ found = 0;
+ for (j = 0; j < MAX_BLKS_PR_SYSBLK; j++) {
+ ppaidx = scan_ppa_idx(i, j);
+ ret = nvm_scan_block(dev, &s.ppas[ppaidx], cur);
+ if (ret > 0) {
+ s.act_blk[i] = j;
+ found = 1;
+ } else if (ret < 0)
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_err("nvm: no valid sysblks found to update\n");
+ ret = -EINVAL;
+ goto err_cur;
+ }
+
+ /*
+ * All sysblocks found. Check that they have same page id in their flash
+ * blocks
+ */
+ for (i = 1; i < s.nr_rows; i++) {
+ struct ppa_addr l = s.ppas[scan_ppa_idx(0, s.act_blk[0])];
+ struct ppa_addr r = s.ppas[scan_ppa_idx(i, s.act_blk[i])];
+
+ if (l.g.pg != r.g.pg) {
+ pr_err("nvm: sysblks not on same page. Previous update failed.\n");
+ ret = -EINVAL;
+ goto err_cur;
+ }
+ }
+
+ /*
+ * Check that there haven't been another update to the seqnr since we
+ * began
+ */
+ if ((new->seqnr - 1) != be32_to_cpu(cur->seqnr)) {
+ pr_err("nvm: seq is not sequential\n");
+ ret = -EINVAL;
+ goto err_cur;
+ }
+
+ /*
+ * When all pages in a block has been written, a new block is selected
+ * and writing is performed on the new block.
+ */
+ if (s.ppas[scan_ppa_idx(0, s.act_blk[0])].g.pg ==
+ dev->lps_per_blk - 1) {
+ ret = nvm_prepare_new_sysblks(dev, &s);
+ if (ret)
+ goto err_cur;
+ }
+
+ ret = nvm_write_and_verify(dev, new, &s);
+err_cur:
+ kfree(cur);
+err_sysblk:
+ mutex_unlock(&dev->mlock);
+
+ return ret;
+}
+
+int nvm_init_sysblock(struct nvm_dev *dev, struct nvm_sb_info *info)
+{
+ struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+ struct sysblk_scan s;
+ int ret;
+
+ /*
+ * 1. select master blocks and select first available blks
+ * 2. get bad block list
+ * 3. mark MAX_SYSBLKS block as host-based device allocated.
+ * 4. write and verify data to block
+ */
+
+ if (!dev->ops->get_bb_tbl || !dev->ops->set_bb_tbl)
+ return -EINVAL;
+
+ if (!(dev->mccap & NVM_ID_CAP_SLC) || !dev->lps_per_blk) {
+ pr_err("nvm: memory does not support SLC access\n");
+ return -EINVAL;
+ }
+
+ /* Index all sysblocks and mark them as host-driven */
+ nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+
+ mutex_lock(&dev->mlock);
+ ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas, sysblk_get_free_blks);
+ if (ret)
+ goto err_mark;
+
+ ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_HOST);
+ if (ret)
+ goto err_mark;
+
+ /* Write to the first block of each row */
+ ret = nvm_write_and_verify(dev, info, &s);
+err_mark:
+ mutex_unlock(&dev->mlock);
+ return ret;
+}
+
+struct factory_blks {
+ struct nvm_dev *dev;
+ int flags;
+ unsigned long *blks;
+};
+
+static int factory_nblks(int nblks)
+{
+ /* Round up to nearest BITS_PER_LONG */
+ return (nblks + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
+}
+
+static unsigned int factory_blk_offset(struct nvm_dev *dev, int ch, int lun)
+{
+ int nblks = factory_nblks(dev->blks_per_lun);
+
+ return ((ch * dev->luns_per_chnl * nblks) + (lun * nblks)) /
+ BITS_PER_LONG;
+}
+
+static int nvm_factory_blks(struct ppa_addr ppa, int nr_blks, u8 *blks,
+ void *private)
+{
+ struct factory_blks *f = private;
+ struct nvm_dev *dev = f->dev;
+ int i, lunoff;
+
+ lunoff = factory_blk_offset(dev, ppa.g.ch, ppa.g.lun);
+
+ /* non-set bits correspond to the block must be erased */
+ for (i = 0; i < nr_blks; i++) {
+ switch (blks[i]) {
+ case NVM_BLK_T_FREE:
+ if (f->flags & NVM_FACTORY_ERASE_ONLY_USER)
+ set_bit(i, &f->blks[lunoff]);
+ break;
+ case NVM_BLK_T_HOST:
+ if (!(f->flags & NVM_FACTORY_RESET_HOST_BLKS))
+ set_bit(i, &f->blks[lunoff]);
+ break;
+ case NVM_BLK_T_GRWN_BAD:
+ if (!(f->flags & NVM_FACTORY_RESET_GRWN_BBLKS))
+ set_bit(i, &f->blks[lunoff]);
+ break;
+ default:
+ set_bit(i, &f->blks[lunoff]);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int nvm_fact_get_blks(struct nvm_dev *dev, struct ppa_addr *erase_list,
+ int max_ppas, struct factory_blks *f)
+{
+ struct ppa_addr ppa;
+ int ch, lun, blkid, idx, done = 0, ppa_cnt = 0;
+ unsigned long *offset;
+
+ while (!done) {
+ done = 1;
+ for (ch = 0; ch < dev->nr_chnls; ch++) {
+ for (lun = 0; lun < dev->luns_per_chnl; lun++) {
+ idx = factory_blk_offset(dev, ch, lun);
+ offset = &f->blks[idx];
+
+ blkid = find_first_zero_bit(offset,
+ dev->blks_per_lun);
+ if (blkid >= dev->blks_per_lun)
+ continue;
+ set_bit(blkid, offset);
+
+ ppa.ppa = 0;
+ ppa.g.ch = ch;
+ ppa.g.lun = lun;
+ ppa.g.blk = blkid;
+ pr_debug("nvm: erase ppa (%u %u %u)\n",
+ ppa.g.ch,
+ ppa.g.lun,
+ ppa.g.blk);
+
+ erase_list[ppa_cnt] = ppa;
+ ppa_cnt++;
+ done = 0;
+
+ if (ppa_cnt == max_ppas)
+ return ppa_cnt;
+ }
+ }
+ }
+
+ return ppa_cnt;
+}
+
+static int nvm_fact_get_bb_tbl(struct nvm_dev *dev, struct ppa_addr ppa,
+ nvm_bb_update_fn *fn, void *priv)
+{
+ struct ppa_addr dev_ppa;
+ int ret;
+
+ dev_ppa = generic_to_dev_addr(dev, ppa);
+
+ ret = dev->ops->get_bb_tbl(dev, dev_ppa, dev->blks_per_lun, fn, priv);
+ if (ret)
+ pr_err("nvm: failed bb tbl for ch%u lun%u\n",
+ ppa.g.ch, ppa.g.blk);
+ return ret;
+}
+
+static int nvm_fact_select_blks(struct nvm_dev *dev, struct factory_blks *f)
+{
+ int ch, lun, ret;
+ struct ppa_addr ppa;
+
+ ppa.ppa = 0;
+ for (ch = 0; ch < dev->nr_chnls; ch++) {
+ for (lun = 0; lun < dev->luns_per_chnl; lun++) {
+ ppa.g.ch = ch;
+ ppa.g.lun = lun;
+
+ ret = nvm_fact_get_bb_tbl(dev, ppa, nvm_factory_blks,
+ f);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int nvm_dev_factory(struct nvm_dev *dev, int flags)
+{
+ struct factory_blks f;
+ struct ppa_addr *ppas;
+ int ppa_cnt, ret = -ENOMEM;
+ int max_ppas = dev->ops->max_phys_sect / dev->nr_planes;
+ struct ppa_addr sysblk_ppas[MAX_SYSBLKS];
+ struct sysblk_scan s;
+
+ f.blks = kzalloc(factory_nblks(dev->blks_per_lun) * dev->nr_luns,
+ GFP_KERNEL);
+ if (!f.blks)
+ return ret;
+
+ ppas = kcalloc(max_ppas, sizeof(struct ppa_addr), GFP_KERNEL);
+ if (!ppas)
+ goto err_blks;
+
+ f.dev = dev;
+ f.flags = flags;
+
+ /* create list of blks to be erased */
+ ret = nvm_fact_select_blks(dev, &f);
+ if (ret)
+ goto err_ppas;
+
+ /* continue to erase until list of blks until empty */
+ while ((ppa_cnt = nvm_fact_get_blks(dev, ppas, max_ppas, &f)) > 0)
+ nvm_erase_ppa(dev, ppas, ppa_cnt);
+
+ /* mark host reserved blocks free */
+ if (flags & NVM_FACTORY_RESET_HOST_BLKS) {
+ nvm_setup_sysblk_scan(dev, &s, sysblk_ppas);
+ mutex_lock(&dev->mlock);
+ ret = nvm_get_all_sysblks(dev, &s, sysblk_ppas,
+ sysblk_get_host_blks);
+ if (!ret)
+ ret = nvm_set_bb_tbl(dev, &s, NVM_BLK_T_FREE);
+ mutex_unlock(&dev->mlock);
+ }
+err_ppas:
+ kfree(ppas);
+err_blks:
+ kfree(f.blks);
+ return ret;
+}
+EXPORT_SYMBOL(nvm_dev_factory);
do {
ret = btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
+ cond_resched();
if (ret && ret != -EAGAIN)
pr_warn("gc failed!");
rw_lock(true, b, b->level);
if (b->key.ptr[0] != btree_ptr ||
- b->seq != seq + 1)
+ b->seq != seq + 1) {
+ op->lock = b->level;
goto out;
+ }
}
SET_KEY_PTRS(check_key, 1);
WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") ||
sysfs_create_link(&c->kobj, &d->kobj, d->name),
"Couldn't create device <-> cache set symlinks");
+
+ clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
}
static void bcache_device_detach(struct bcache_device *d)
buf[SB_LABEL_SIZE] = '\0';
env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
- if (atomic_xchg(&dc->running, 1))
+ if (atomic_xchg(&dc->running, 1)) {
+ kfree(env[1]);
+ kfree(env[2]);
return;
+ }
if (!d->c &&
BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) {
else
err = "device busy";
mutex_unlock(&bch_register_lock);
+ if (attr == &ksysfs_register_quiet)
+ goto out;
}
goto err;
}
err_close:
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
err:
- if (attr != &ksysfs_register_quiet)
- pr_info("error opening %s: %s", path, err);
+ pr_info("error opening %s: %s", path, err);
ret = -EINVAL;
goto out;
}
closure_debug_init();
bcache_major = register_blkdev(0, "bcache");
- if (bcache_major < 0)
+ if (bcache_major < 0) {
+ unregister_reboot_notifier(&reboot);
return bcache_major;
+ }
if (!(bcache_wq = create_workqueue("bcache")) ||
!(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) ||
static bool dirty_pred(struct keybuf *buf, struct bkey *k)
{
+ struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys);
+
+ BUG_ON(KEY_INODE(k) != dc->disk.id);
+
return KEY_DIRTY(k);
}
}
}
+/*
+ * Returns true if we scanned the entire disk
+ */
static bool refill_dirty(struct cached_dev *dc)
{
struct keybuf *buf = &dc->writeback_keys;
+ struct bkey start = KEY(dc->disk.id, 0, 0);
struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0);
- bool searched_from_start = false;
+ struct bkey start_pos;
+
+ /*
+ * make sure keybuf pos is inside the range for this disk - at bringup
+ * we might not be attached yet so this disk's inode nr isn't
+ * initialized then
+ */
+ if (bkey_cmp(&buf->last_scanned, &start) < 0 ||
+ bkey_cmp(&buf->last_scanned, &end) > 0)
+ buf->last_scanned = start;
if (dc->partial_stripes_expensive) {
refill_full_stripes(dc);
return false;
}
- if (bkey_cmp(&buf->last_scanned, &end) >= 0) {
- buf->last_scanned = KEY(dc->disk.id, 0, 0);
- searched_from_start = true;
- }
-
+ start_pos = buf->last_scanned;
bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
- return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start;
+ if (bkey_cmp(&buf->last_scanned, &end) < 0)
+ return false;
+
+ /*
+ * If we get to the end start scanning again from the beginning, and
+ * only scan up to where we initially started scanning from:
+ */
+ buf->last_scanned = start;
+ bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred);
+
+ return bkey_cmp(&buf->last_scanned, &start_pos) >= 0;
}
static int bch_writeback_thread(void *arg)
static inline void bch_writeback_queue(struct cached_dev *dc)
{
- wake_up_process(dc->writeback_thread);
+ if (!IS_ERR_OR_NULL(dc->writeback_thread))
+ wake_up_process(dc->writeback_thread);
}
static inline void bch_writeback_add(struct cached_dev *dc)
str = "invalid";
break;
}
- seq_printf(s, "signal voltage:\t%u (%s)\n", ios->chip_select, str);
+ seq_printf(s, "signal voltage:\t%u (%s)\n", ios->signal_voltage, str);
switch (ios->drv_type) {
case MMC_SET_DRIVER_TYPE_A:
static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
int value)
{
- int i;
struct gpio_descs *reset_gpios = pwrseq->reset_gpios;
- int values[reset_gpios->ndescs];
- for (i = 0; i < reset_gpios->ndescs; i++)
- values[i] = value;
+ if (!IS_ERR(reset_gpios)) {
+ int i;
+ int values[reset_gpios->ndescs];
- gpiod_set_array_value_cansleep(reset_gpios->ndescs, reset_gpios->desc,
- values);
+ for (i = 0; i < reset_gpios->ndescs; i++)
+ values[i] = value;
+
+ gpiod_set_array_value_cansleep(
+ reset_gpios->ndescs, reset_gpios->desc, values);
+ }
}
static void mmc_pwrseq_simple_pre_power_on(struct mmc_host *host)
struct mmc_pwrseq_simple *pwrseq = container_of(host->pwrseq,
struct mmc_pwrseq_simple, pwrseq);
- gpiod_put_array(pwrseq->reset_gpios);
+ if (!IS_ERR(pwrseq->reset_gpios))
+ gpiod_put_array(pwrseq->reset_gpios);
if (!IS_ERR(pwrseq->ext_clk))
clk_put(pwrseq->ext_clk);
}
pwrseq->reset_gpios = gpiod_get_array(dev, "reset", GPIOD_OUT_HIGH);
- if (IS_ERR(pwrseq->reset_gpios)) {
+ if (IS_ERR(pwrseq->reset_gpios) &&
+ PTR_ERR(pwrseq->reset_gpios) != -ENOENT &&
+ PTR_ERR(pwrseq->reset_gpios) != -ENOSYS) {
ret = PTR_ERR(pwrseq->reset_gpios);
goto clk_put;
}
card->sw_caps.sd3_bus_mode = status[13];
/* Driver Strengths supported by the card */
card->sw_caps.sd3_drv_type = status[9];
+ card->sw_caps.sd3_curr_limit = status[7] | status[6] << 8;
}
out:
* when we set current limit to 200ma, the card will draw 200ma, and
* when we set current limit to 400/600/800ma, the card will draw its
* maximum 300ma from the host.
+ *
+ * The above is incorrect: if we try to set a current limit that is
+ * not supported by the card, the card can rightfully error out the
+ * attempt, and remain at the default current limit. This results
+ * in a 300mA card being limited to 200mA even though the host
+ * supports 800mA. Failures seen with SanDisk 8GB UHS cards with
+ * an iMX6 host. --rmk
*/
- if (max_current >= 800)
+ if (max_current >= 800 &&
+ card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_800)
current_limit = SD_SET_CURRENT_LIMIT_800;
- else if (max_current >= 600)
+ else if (max_current >= 600 &&
+ card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_600)
current_limit = SD_SET_CURRENT_LIMIT_600;
- else if (max_current >= 400)
+ else if (max_current >= 400 &&
+ card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_400)
current_limit = SD_SET_CURRENT_LIMIT_400;
- else if (max_current >= 200)
+ else if (max_current >= 200 &&
+ card->sw_caps.sd3_curr_limit & SD_MAX_CURRENT_200)
current_limit = SD_SET_CURRENT_LIMIT_200;
if (current_limit != SD_SET_CURRENT_NO_CHANGE) {
* SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
*/
if (!mmc_host_is_spi(card->host) &&
- (card->sd_bus_speed == UHS_SDR50_BUS_SPEED ||
- card->sd_bus_speed == UHS_DDR50_BUS_SPEED ||
- card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) {
+ (card->host->ios.timing == MMC_TIMING_UHS_SDR50 ||
+ card->host->ios.timing == MMC_TIMING_UHS_DDR50 ||
+ card->host->ios.timing == MMC_TIMING_UHS_SDR104)) {
err = mmc_execute_tuning(card);
/*
* difference between v3.00 and 3.01 spec means that CMD19
* tuning is also available for DDR50 mode.
*/
- if (err && card->sd_bus_speed == UHS_DDR50_BUS_SPEED) {
+ if (err && card->host->ios.timing == MMC_TIMING_UHS_DDR50) {
pr_warn("%s: ddr50 tuning failed\n",
mmc_hostname(card->host));
err = 0;
* SDR104 mode SD-cards. Note that tuning is mandatory for SDR104.
*/
if (!mmc_host_is_spi(card->host) &&
- ((card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50) ||
- (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)))
+ ((card->host->ios.timing == MMC_TIMING_UHS_SDR50) ||
+ (card->host->ios.timing == MMC_TIMING_UHS_SDR104)))
err = mmc_execute_tuning(card);
out:
return err;
{ 0x20, 4, cistpl_manfid },
{ 0x21, 2, /* cistpl_funcid */ },
{ 0x22, 0, cistpl_funce },
+ { 0x91, 2, /* cistpl_sdio_std */ },
};
static int sdio_read_cis(struct mmc_card *card, struct sdio_func *func)
.fifosize = 16 * 4,
.fifohalfsize = 8 * 4,
.clkreg = MCI_CLK_ENABLE,
+ .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
.datalength_bits = 24,
.datactrl_mask_sdio = MCI_ST_DPSM_SDIOEN,
.st_sdio = true,
{
.id = 0x00280180,
.mask = 0x00ffffff,
- .data = &variant_u300,
+ .data = &variant_nomadik,
},
{
.id = 0x00480180,
desc = NULL;
ret = cookie;
}
+ dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+ __func__, host->sg_len, ret, cookie, host->mrq);
}
- dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
- __func__, host->sg_len, ret, cookie, host->mrq);
pio:
if (!desc) {
"DMA failed: %d, falling back to PIO\n", ret);
}
- dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d, sg[%d]\n", __func__,
- desc, cookie, host->sg_len);
+ dev_dbg(&host->pdev->dev, "%s(): desc %p, sg[%d]\n", __func__,
+ desc, host->sg_len);
}
static void tmio_mmc_start_dma_tx(struct tmio_mmc_host *host)
desc = NULL;
ret = cookie;
}
+ dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
+ __func__, host->sg_len, ret, cookie, host->mrq);
}
- dev_dbg(&host->pdev->dev, "%s(): mapped %d -> %d, cookie %d, rq %p\n",
- __func__, host->sg_len, ret, cookie, host->mrq);
pio:
if (!desc) {
"DMA failed: %d, falling back to PIO\n", ret);
}
- dev_dbg(&host->pdev->dev, "%s(): desc %p, cookie %d\n", __func__,
- desc, cookie);
+ dev_dbg(&host->pdev->dev, "%s(): desc %p\n", __func__, desc);
}
void tmio_mmc_start_dma(struct tmio_mmc_host *host,
struct ubi_device *ubi = desc->vol->ubi;
struct inode *inode = file_inode(file);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = ubi_sync(ubi->ubi_num);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
[29] = "802.1ad offload support",
[31] = "Modifying loopback source checks using UPDATE_QP support",
[32] = "Loopback source checks support",
+ [33] = "RoCEv2 support"
};
int i;
return err;
}
+static void disable_unsupported_roce_caps(void *buf);
+
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
struct mlx4_cmd_mailbox *mailbox;
if (err)
goto out;
+ if (mlx4_is_mfunc(dev))
+ disable_unsupported_roce_caps(outbox);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
dev_cap->reserved_qps = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
if (err)
return err;
+ disable_unsupported_roce_caps(outbox->buf);
/* add port mng change event capability and disable mw type 1
* unconditionally to slaves
*/
return 0;
}
+static void disable_unsupported_roce_caps(void *buf)
+{
+ u32 flags;
+
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ flags &= ~(1UL << 31);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ flags &= ~(1UL << 24);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ flags &= ~(MLX4_FLAG_ROCE_V1_V2);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+}
+
int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
__be32 rsvd1[3];
__be16 vxlan_udp_dport;
__be16 rsvd2;
- __be32 rsvd3;
+ __be16 roce_v2_entropy;
+ __be16 roce_v2_udp_dport;
__be32 roce_flags;
__be32 rsvd4[25];
__be16 rsvd5;
};
#define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_ROCE_V2_UDP_DPORT BIT(3)
#define MLX4_DISABLE_RX_PORT BIT(18)
static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
return mlx4_CONFIG_DEV_set(dev, &config_dev);
}
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port)
+{
+ struct mlx4_config_dev config_dev;
+
+ memset(&config_dev, 0, sizeof(config_dev));
+ config_dev.update_flags = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT);
+ config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port);
+
+ return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port);
+
int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
{
struct mlx4_cmd_mailbox *mailbox;
u16 reserved1;
u8 v_ignore_fcs;
u8 flags;
- u8 ignore_fcs;
+ union {
+ u8 ignore_fcs;
+ u8 roce_mode;
+ };
u8 reserved2;
__be16 mtu;
u8 pptx;
return err;
}
+#define SET_PORT_ROCE_2_FLAGS 0x10
+#define MLX4_SET_PORT_ROCE_V1_V2 0x2
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
{
context->pprx = (pprx * (!pfcrx)) << 7;
context->pfcrx = pfcrx;
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ context->flags |= SET_PORT_ROCE_2_FLAGS;
+ context->roce_mode |=
+ MLX4_SET_PORT_ROCE_V1_V2 << 4;
+ }
in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
}
+ if ((cur_state == MLX4_QP_STATE_RTR) &&
+ (new_state == MLX4_QP_STATE_RTS) &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ context->roce_entropy =
+ cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
+
*(__be32 *) mailbox->buf = cpu_to_be32(optpar);
memcpy(mailbox->buf + 8, context, sizeof *context);
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn)
+{
+ struct mlx4_qp_context context;
+ struct mlx4_qp qp;
+ int err;
+
+ qp.qpn = qpn;
+ err = mlx4_qp_query(dev, &qp, &context);
+ if (!err) {
+ u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff;
+ u16 folded_dst = folded_qp(dest_qpn);
+ u16 folded_src = folded_qp(qpn);
+
+ return (dest_qpn != qpn) ?
+ ((folded_dst ^ folded_src) | 0xC000) :
+ folded_src | 0xC000;
+ }
+ return 0xdead;
+}
#include <linux/mlx5/qp.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
#include "wq.h"
-#include "transobj.h"
#include "mlx5_core.h"
#define MLX5E_MAX_NUM_TC 8
goto err_unmap_free_uar;
}
- err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+ err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
if (err) {
mlx5_core_err(mdev, "alloc td failed, %d\n", err);
goto err_dealloc_pd;
mlx5_core_destroy_mkey(mdev, &priv->mr);
err_dealloc_transport_domain:
- mlx5_dealloc_transport_domain(mdev, priv->tdn);
+ mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
err_dealloc_pd:
mlx5_core_dealloc_pd(mdev, priv->pdn);
mlx5e_close_drop_rq(priv);
mlx5e_destroy_tises(priv);
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
- mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
+ mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
free_netdev(netdev);
case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
eqe_type_str(eqe->type), eqe->type, rsn);
mlx5_rsc_event(dev, rsn, eqe->type);
void *context;
};
+enum {
+ MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
return err;
}
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
{
u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
int err;
memset(out, 0, sizeof(out));
MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
if (err)
return err;
return err;
}
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+{
+ void *set_ctx;
+ void *set_hca_cap;
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ int req_endianness;
+ int err;
+
+ if (MLX5_CAP_GEN(dev, atomic)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
+ HCA_CAP_OPMOD_GET_CUR);
+ if (err)
+ return err;
+ } else {
+ return 0;
+ }
+
+ req_endianness =
+ MLX5_CAP_ATOMIC(dev,
+ supported_atomic_req_8B_endianess_mode_1);
+
+ if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+ return 0;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+ /* Set requestor to host endianness */
+ MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+ err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+
+ kfree(set_ctx);
+ return err;
+}
+
static int handle_hca_cap(struct mlx5_core_dev *dev)
{
void *set_ctx = NULL;
MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
- err = set_caps(dev, set_ctx, set_sz);
+ err = set_caps(dev, set_ctx, set_sz,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
query_ex:
kfree(set_ctx);
return err;
}
-#ifdef CONFIG_MLX5_CORE_EN
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
{
u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
return -ENOTSUPP;
}
-#endif
static int map_bf_area(struct mlx5_core_dev *dev)
{
goto err_pagealloc_cleanup;
}
-#ifdef CONFIG_MLX5_CORE_EN
err = mlx5_core_set_issi(dev);
if (err) {
dev_err(&pdev->dev, "failed to set issi\n");
goto err_disable_hca;
}
-#endif
err = mlx5_satisfy_startup_pages(dev, 1);
if (err) {
goto reclaim_boot_pages;
}
+ err = handle_hca_cap_atomic(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ goto reclaim_boot_pages;
+ }
+
err = mlx5_satisfy_startup_pages(dev, 0);
if (err) {
dev_err(&pdev->dev, "failed to allocate init pages\n");
#include <linux/mlx5/cmd.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/transobj.h>
#include "mlx5_core.h"
complete(&common->free);
}
+static u64 qp_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+ BIT(MLX5_EVENT_TYPE_COMM_EST) |
+ BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+ BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+ BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+ BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+ BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+ return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+ return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+ switch (rsc_type) {
+ case MLX5_EVENT_QUEUE_TYPE_QP:
+ return BIT(event_type) & qp_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_RQ:
+ return BIT(event_type) & rq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_SQ:
+ return BIT(event_type) & sq_allowed_event_types();
+ default:
+ WARN(1, "Event arrived for unknown resource type");
+ return false;
+ }
+}
+
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
{
struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
if (!common)
return;
+ if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
+ mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
+ event_type, rsn);
+ return;
+ }
+
switch (common->res) {
case MLX5_RES_QP:
+ case MLX5_RES_RQ:
+ case MLX5_RES_SQ:
qp = (struct mlx5_core_qp *)common;
qp->event(qp, event_type);
break;
}
#endif
+static int create_qprqsq_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp,
+ int rsc_type)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ int err;
+
+ qp->common.res = rsc_type;
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree,
+ qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+ qp);
+ spin_unlock_irq(&table->lock);
+ if (err)
+ return err;
+
+ atomic_set(&qp->common.refcount, 1);
+ init_completion(&qp->common.free);
+ qp->pid = current->pid;
+
+ return 0;
+}
+
+static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ radix_tree_delete(&table->tree,
+ qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+ spin_unlock_irqrestore(&table->lock, flags);
+ mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+ wait_for_completion(&qp->common.free);
+}
+
int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
struct mlx5_create_qp_mbox_in *in,
int inlen)
{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
struct mlx5_create_qp_mbox_out out;
struct mlx5_destroy_qp_mbox_in din;
struct mlx5_destroy_qp_mbox_out dout;
int err;
- void *qpc;
memset(&out, 0, sizeof(out));
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
- if (dev->issi) {
- qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
- /* 0xffffff means we ask to work with cqe version 0 */
- MLX5_SET(qpc, qpc, user_index, 0xffffff);
- }
-
err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
if (err) {
mlx5_core_warn(dev, "ret %d\n", err);
qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
- qp->common.res = MLX5_RES_QP;
- spin_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, qp->qpn, qp);
- spin_unlock_irq(&table->lock);
- if (err) {
- mlx5_core_warn(dev, "err %d\n", err);
+ err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
+ if (err)
goto err_cmd;
- }
err = mlx5_debug_qp_add(dev, qp);
if (err)
mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
qp->qpn);
- qp->pid = current->pid;
- atomic_set(&qp->common.refcount, 1);
atomic_inc(&dev->num_qps);
- init_completion(&qp->common.free);
return 0;
{
struct mlx5_destroy_qp_mbox_in in;
struct mlx5_destroy_qp_mbox_out out;
- struct mlx5_qp_table *table = &dev->priv.qp_table;
- unsigned long flags;
int err;
mlx5_debug_qp_remove(dev, qp);
- spin_lock_irqsave(&table->lock, flags);
- radix_tree_delete(&table->tree, qp->qpn);
- spin_unlock_irqrestore(&table->lock, flags);
-
- mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
- wait_for_completion(&qp->common.free);
+ destroy_qprqsq_common(dev, qp);
memset(&in, 0, sizeof(in));
memset(&out, 0, sizeof(out));
}
EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
- enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
struct mlx5_modify_qp_mbox_in *in, int sqd_event,
struct mlx5_core_qp *qp)
{
- static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
- [MLX5_QP_STATE_RST] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
- },
- [MLX5_QP_STATE_INIT] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
- [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
- },
- [MLX5_QP_STATE_RTR] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
- },
- [MLX5_QP_STATE_RTS] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
- },
- [MLX5_QP_STATE_SQD] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- },
- [MLX5_QP_STATE_SQER] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
- },
- [MLX5_QP_STATE_ERR] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- }
- };
-
struct mlx5_modify_qp_mbox_out out;
int err = 0;
- u16 op;
-
- if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
- !optab[cur_state][new_state])
- return -EINVAL;
memset(&out, 0, sizeof(out));
- op = optab[cur_state][new_state];
- in->hdr.opcode = cpu_to_be16(op);
+ in->hdr.opcode = cpu_to_be16(operation);
in->qpn = cpu_to_be32(qp->qpn);
err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
if (err)
}
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
#endif
+
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq)
+{
+ int err;
+ u32 rqn;
+
+ err = mlx5_core_create_rq(dev, in, inlen, &rqn);
+ if (err)
+ return err;
+
+ rq->qpn = rqn;
+ err = create_qprqsq_common(dev, rq, MLX5_RES_RQ);
+ if (err)
+ goto err_destroy_rq;
+
+ return 0;
+
+err_destroy_rq:
+ mlx5_core_destroy_rq(dev, rq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *rq)
+{
+ destroy_qprqsq_common(dev, rq);
+ mlx5_core_destroy_rq(dev, rq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq)
+{
+ int err;
+ u32 sqn;
+
+ err = mlx5_core_create_sq(dev, in, inlen, &sqn);
+ if (err)
+ return err;
+
+ sq->qpn = sqn;
+ err = create_qprqsq_common(dev, sq, MLX5_RES_SQ);
+ if (err)
+ goto err_destroy_sq;
+
+ return 0;
+
+err_destroy_sq:
+ mlx5_core_destroy_sq(dev, sq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *sq)
+{
+ destroy_qprqsq_common(dev, sq);
+ mlx5_core_destroy_sq(dev, sq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
#include <linux/mlx5/srq.h>
#include <rdma/ib_verbs.h>
#include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
{
memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
memcpy(pas, in->pas, pas_size);
- /* 0xffffff means we ask to work with cqe version 0 */
- MLX5_SET(xrc_srqc, xrc_srqc, user_index, 0xffffff);
MLX5_SET(create_xrc_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_XRC_SRQ);
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
{
u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
return err;
}
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
{
u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
{
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_modify_rq);
void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
{
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+
+ MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+ MLX5_SET(query_rq_in, in, rqn, rqn);
+
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
{
u32 out[MLX5_ST_SZ_DW(create_sq_out)];
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_modify_sq);
void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
{
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+
+ MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+ MLX5_SET(query_sq_in, in, sqn, sqn);
+
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tirn)
{
return err;
}
+EXPORT_SYMBOL(mlx5_core_create_tir);
int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
int inlen)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tisn)
return err;
}
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
+
+ MLX5_SET(modify_tis_in, in, tisn, tisn);
+ MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+ return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
{
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rmpn)
+++ /dev/null
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __TRANSOBJ_H__
-#define __TRANSOBJ_H__
-
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
-int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rqn);
-int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
-void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
-int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *sqn);
-int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
-void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
-int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *tirn);
-int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
- int inlen);
-void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
-int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *tisn);
-void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
-int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn);
-int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
-int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rmpn);
-int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
-int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
-int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
-
-int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
- u32 *rqtn);
-int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
- int inlen);
-void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
-
-#endif /* __TRANSOBJ_H__ */
return MLX5_GET(query_vport_state_out, out, admin_state);
}
-EXPORT_SYMBOL(mlx5_query_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 state)
return err;
}
-EXPORT_SYMBOL(mlx5_modify_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
u32 *out, int outlen)
nic_vport_context.permanent_address);
err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
- if (err)
- goto out;
-
- ether_addr_copy(addr, &out_addr[2]);
+ if (!err)
+ ether_addr_copy(addr, &out_addr[2]);
-out:
kvfree(out);
return err;
}
return err;
}
-EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address);
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
u32 vport,
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+ u64 *system_image_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.system_image_guid);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.node_guid);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+ u16 *qkey_viol_cntr)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.qkey_violation_counter);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
u8 port_num, u16 vf_num, u16 gid_index,
union ib_gid *gid)
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum mlx5_vport_roce_state {
+ MLX5_VPORT_ROCE_DISABLED = 0,
+ MLX5_VPORT_ROCE_ENABLED = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+ enum mlx5_vport_roce_state state)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ mlx5_core_warn(mdev, "failed to allocate inbox\n");
+ return -ENOMEM;
+ }
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+ state);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+ return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+ return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
+source "drivers/ntb/hw/amd/Kconfig"
source "drivers/ntb/hw/intel/Kconfig"
+obj-$(CONFIG_NTB_AMD) += amd/
obj-$(CONFIG_NTB_INTEL) += intel/
--- /dev/null
+config NTB_AMD
+ tristate "AMD Non-Transparent Bridge support"
+ depends on X86_64
+ help
+ This driver supports AMD NTB on capable Zeppelin hardware.
+
+ If unsure, say N.
--- /dev/null
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
--- /dev/null
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of AMD Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME "ntb_hw_amd"
+#define NTB_DESC "AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER "1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+ if (idx < 0 || idx > ndev->mw_count)
+ return -EINVAL;
+
+ return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+ phys_addr_t *base,
+ resource_size_t *size,
+ resource_size_t *align,
+ resource_size_t *align_size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ int bar;
+
+ bar = ndev_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
+
+ if (base)
+ *base = pci_resource_start(ndev->ntb.pdev, bar);
+
+ if (size)
+ *size = pci_resource_len(ndev->ntb.pdev, bar);
+
+ if (align)
+ *align = SZ_4K;
+
+ if (align_size)
+ *align_size = 1;
+
+ return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+ dma_addr_t addr, resource_size_t size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ unsigned long xlat_reg, limit_reg = 0;
+ resource_size_t mw_size;
+ void __iomem *mmio, *peer_mmio;
+ u64 base_addr, limit, reg_val;
+ int bar;
+
+ bar = ndev_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
+
+ mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+ /* make sure the range fits in the usable mw size */
+ if (size > mw_size)
+ return -EINVAL;
+
+ mmio = ndev->self_mmio;
+ peer_mmio = ndev->peer_mmio;
+
+ base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+
+ if (bar != 1) {
+ xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
+ limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
+
+ /* Set the limit if supported */
+ limit = base_addr + size;
+
+ /* set and verify setting the translation address */
+ write64(addr, peer_mmio + xlat_reg);
+ reg_val = read64(peer_mmio + xlat_reg);
+ if (reg_val != addr) {
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+
+ /* set and verify setting the limit */
+ write64(limit, mmio + limit_reg);
+ reg_val = read64(mmio + limit_reg);
+ if (reg_val != limit) {
+ write64(base_addr, mmio + limit_reg);
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+ } else {
+ xlat_reg = AMD_BAR1XLAT_OFFSET;
+ limit_reg = AMD_BAR1LMT_OFFSET;
+
+ /* split bar addr range must all be 32 bit */
+ if (addr & (~0ull << 32))
+ return -EINVAL;
+ if ((addr + size) & (~0ull << 32))
+ return -EINVAL;
+
+ /* Set the limit if supported */
+ limit = base_addr + size;
+
+ /* set and verify setting the translation address */
+ write64(addr, peer_mmio + xlat_reg);
+ reg_val = read64(peer_mmio + xlat_reg);
+ if (reg_val != addr) {
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+
+ /* set and verify setting the limit */
+ writel(limit, mmio + limit_reg);
+ reg_val = readl(mmio + limit_reg);
+ if (reg_val != limit) {
+ writel(base_addr, mmio + limit_reg);
+ writel(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+ if (!ndev->peer_sta)
+ return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+
+ /* If peer_sta is reset or D0 event, the ISR has
+ * started a timer to check link status of hardware.
+ * So here just clear status bit. And if peer_sta is
+ * D3 or PME_TO, D0/reset event will be happened when
+ * system wakeup/poweron, so do nothing here.
+ */
+ if (ndev->peer_sta & AMD_PEER_RESET_EVENT)
+ ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+ else if (ndev->peer_sta & AMD_PEER_D0_EVENT)
+ ndev->peer_sta = 0;
+
+ return 0;
+}
+
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+ enum ntb_speed *speed,
+ enum ntb_width *width)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ int ret = 0;
+
+ if (amd_link_is_up(ndev)) {
+ if (speed)
+ *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+ if (width)
+ *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+ dev_dbg(ndev_dev(ndev), "link is up.\n");
+
+ ret = 1;
+ } else {
+ if (speed)
+ *speed = NTB_SPEED_NONE;
+ if (width)
+ *width = NTB_WIDTH_NONE;
+
+ dev_dbg(ndev_dev(ndev), "link is down.\n");
+ }
+
+ return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+ enum ntb_speed max_speed,
+ enum ntb_width max_width)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 ntb_ctl;
+
+ /* Enable event interrupt */
+ ndev->int_mask &= ~AMD_EVENT_INTMASK;
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ if (ndev->ntb.topo == NTB_TOPO_SEC)
+ return -EINVAL;
+ dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+ ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+ ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
+ writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 ntb_ctl;
+
+ /* Disable event interrupt */
+ ndev->int_mask |= AMD_EVENT_INTMASK;
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ if (ndev->ntb.topo == NTB_TOPO_SEC)
+ return -EINVAL;
+ dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+ ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+ ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
+ writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+ return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (db_vector < 0 || db_vector > ndev->db_count)
+ return 0;
+
+ return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned long flags;
+
+ if (db_bits & ~ndev->db_valid_mask)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ndev->db_mask_lock, flags);
+ ndev->db_mask |= db_bits;
+ writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+ spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+ return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned long flags;
+
+ if (db_bits & ~ndev->db_valid_mask)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ndev->db_mask_lock, flags);
+ ndev->db_mask &= ~db_bits;
+ writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+ spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+ return 0;
+}
+
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+ phys_addr_t *db_addr,
+ resource_size_t *db_size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (db_addr)
+ *db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
+ if (db_size)
+ *db_size = sizeof(u32);
+
+ return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return 0;
+
+ offset = ndev->self_spad + (idx << 2);
+ return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+ int idx, u32 val)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->self_spad + (idx << 2);
+ writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+ return 0;
+}
+
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+ phys_addr_t *spad_addr)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ if (spad_addr)
+ *spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
+ ndev->peer_spad + (idx << 2));
+ return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->peer_spad + (idx << 2);
+ return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
+ int idx, u32 val)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->peer_spad + (idx << 2);
+ writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+ return 0;
+}
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+ .mw_count = amd_ntb_mw_count,
+ .mw_get_range = amd_ntb_mw_get_range,
+ .mw_set_trans = amd_ntb_mw_set_trans,
+ .link_is_up = amd_ntb_link_is_up,
+ .link_enable = amd_ntb_link_enable,
+ .link_disable = amd_ntb_link_disable,
+ .db_valid_mask = amd_ntb_db_valid_mask,
+ .db_vector_count = amd_ntb_db_vector_count,
+ .db_vector_mask = amd_ntb_db_vector_mask,
+ .db_read = amd_ntb_db_read,
+ .db_clear = amd_ntb_db_clear,
+ .db_set_mask = amd_ntb_db_set_mask,
+ .db_clear_mask = amd_ntb_db_clear_mask,
+ .peer_db_addr = amd_ntb_peer_db_addr,
+ .peer_db_set = amd_ntb_peer_db_set,
+ .spad_count = amd_ntb_spad_count,
+ .spad_read = amd_ntb_spad_read,
+ .spad_write = amd_ntb_spad_write,
+ .peer_spad_addr = amd_ntb_peer_spad_addr,
+ .peer_spad_read = amd_ntb_peer_spad_read,
+ .peer_spad_write = amd_ntb_peer_spad_write,
+};
+
+static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ int reg;
+
+ reg = readl(mmio + AMD_SMUACK_OFFSET);
+ reg |= bit;
+ writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+ ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ u32 status;
+
+ status = readl(mmio + AMD_INTSTAT_OFFSET);
+ if (!(status & AMD_EVENT_INTMASK))
+ return;
+
+ dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+
+ status &= AMD_EVENT_INTMASK;
+ switch (status) {
+ case AMD_PEER_FLUSH_EVENT:
+ dev_info(ndev_dev(ndev), "Flush is done.\n");
+ break;
+ case AMD_PEER_RESET_EVENT:
+ amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
+
+ /* link down first */
+ ntb_link_event(&ndev->ntb);
+ /* polling peer status */
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+ break;
+ case AMD_PEER_D3_EVENT:
+ case AMD_PEER_PMETO_EVENT:
+ amd_ack_smu(ndev, status);
+
+ /* link down */
+ ntb_link_event(&ndev->ntb);
+
+ break;
+ case AMD_PEER_D0_EVENT:
+ mmio = ndev->peer_mmio;
+ status = readl(mmio + AMD_PMESTAT_OFFSET);
+ /* check if this is WAKEUP event */
+ if (status & 0x1)
+ dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+
+ amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
+
+ /* start a timer to poll link status */
+ schedule_delayed_work(&ndev->hb_timer,
+ AMD_LINK_HB_TIMEOUT);
+ break;
+ default:
+ dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+ break;
+ }
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+ dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+
+ if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
+ amd_handle_event(ndev, vec);
+
+ if (vec < AMD_DB_CNT)
+ ntb_db_event(&ndev->ntb, vec);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+ struct amd_ntb_vec *nvec = dev;
+
+ return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+ struct amd_ntb_dev *ndev = dev;
+
+ return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+ int msix_min, int msix_max)
+{
+ struct pci_dev *pdev;
+ int rc, i, msix_count, node;
+
+ pdev = ndev_pdev(ndev);
+
+ node = dev_to_node(&pdev->dev);
+
+ ndev->db_mask = ndev->db_valid_mask;
+
+ /* Try to set up msix irq */
+ ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
+ GFP_KERNEL, node);
+ if (!ndev->vec)
+ goto err_msix_vec_alloc;
+
+ ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
+ GFP_KERNEL, node);
+ if (!ndev->msix)
+ goto err_msix_alloc;
+
+ for (i = 0; i < msix_max; ++i)
+ ndev->msix[i].entry = i;
+
+ msix_count = pci_enable_msix_range(pdev, ndev->msix,
+ msix_min, msix_max);
+ if (msix_count < 0)
+ goto err_msix_enable;
+
+ /* NOTE: Disable MSIX if msix count is less than 16 because of
+ * hardware limitation.
+ */
+ if (msix_count < msix_min) {
+ pci_disable_msix(pdev);
+ goto err_msix_enable;
+ }
+
+ for (i = 0; i < msix_count; ++i) {
+ ndev->vec[i].ndev = ndev;
+ ndev->vec[i].num = i;
+ rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+ "ndev_vec_isr", &ndev->vec[i]);
+ if (rc)
+ goto err_msix_request;
+ }
+
+ dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+ ndev->db_count = msix_min;
+ ndev->msix_vec_count = msix_max;
+ return 0;
+
+err_msix_request:
+ while (i-- > 0)
+ free_irq(ndev->msix[i].vector, ndev);
+ pci_disable_msix(pdev);
+err_msix_enable:
+ kfree(ndev->msix);
+err_msix_alloc:
+ kfree(ndev->vec);
+err_msix_vec_alloc:
+ ndev->msix = NULL;
+ ndev->vec = NULL;
+
+ /* Try to set up msi irq */
+ rc = pci_enable_msi(pdev);
+ if (rc)
+ goto err_msi_enable;
+
+ rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+ "ndev_irq_isr", ndev);
+ if (rc)
+ goto err_msi_request;
+
+ dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+ ndev->db_count = 1;
+ ndev->msix_vec_count = 1;
+ return 0;
+
+err_msi_request:
+ pci_disable_msi(pdev);
+err_msi_enable:
+
+ /* Try to set up intx irq */
+ pci_intx(pdev, 1);
+
+ rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+ "ndev_irq_isr", ndev);
+ if (rc)
+ goto err_intx_request;
+
+ dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+ ndev->db_count = 1;
+ ndev->msix_vec_count = 1;
+ return 0;
+
+err_intx_request:
+ return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev;
+ void __iomem *mmio = ndev->self_mmio;
+ int i;
+
+ pdev = ndev_pdev(ndev);
+
+ /* Mask all doorbell interrupts */
+ ndev->db_mask = ndev->db_valid_mask;
+ writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+ if (ndev->msix) {
+ i = ndev->msix_vec_count;
+ while (i--)
+ free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+ pci_disable_msix(pdev);
+ kfree(ndev->msix);
+ kfree(ndev->vec);
+ } else {
+ free_irq(pdev->irq, ndev);
+ if (pci_dev_msi_enabled(pdev))
+ pci_disable_msi(pdev);
+ else
+ pci_intx(pdev, 0);
+ }
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct amd_ntb_dev *ndev;
+ void __iomem *mmio;
+ char *buf;
+ size_t buf_size;
+ ssize_t ret, off;
+ union { u64 v64; u32 v32; u16 v16; } u;
+
+ ndev = filp->private_data;
+ mmio = ndev->self_mmio;
+
+ buf_size = min(count, 0x800ul);
+
+ buf = kmalloc(buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ off = 0;
+
+ off += scnprintf(buf + off, buf_size - off,
+ "NTB Device Information:\n");
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Connection Topology -\t%s\n",
+ ntb_topo_string(ndev->ntb.topo));
+
+ off += scnprintf(buf + off, buf_size - off,
+ "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+ if (!amd_link_is_up(ndev)) {
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Status -\t\tDown\n");
+ } else {
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Status -\t\tUp\n");
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Speed -\t\tPCI-E Gen %u\n",
+ NTB_LNK_STA_SPEED(ndev->lnk_sta));
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Width -\t\tx%u\n",
+ NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+ }
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Memory Window Count -\t%u\n", ndev->mw_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "Scratchpad Count -\t%u\n", ndev->spad_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Count -\t%u\n", ndev->db_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+ u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+ u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "\nNTB Incoming XLAT:\n");
+
+ u.v64 = read64(mmio + AMD_BAR1XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT1 -\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT23 -\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT45 -\t\t%#018llx\n", u.v64);
+
+ u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT1 -\t\t\t%#06x\n", u.v32);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+ ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+ kfree(buf);
+ return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+ if (!debugfs_dir) {
+ ndev->debugfs_dir = NULL;
+ ndev->debugfs_info = NULL;
+ } else {
+ ndev->debugfs_dir =
+ debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+ if (!ndev->debugfs_dir)
+ ndev->debugfs_info = NULL;
+ else
+ ndev->debugfs_info =
+ debugfs_create_file("info", S_IRUSR,
+ ndev->debugfs_dir, ndev,
+ &amd_ntb_debugfs_info);
+ }
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+ debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+ struct pci_dev *pdev)
+{
+ ndev->ntb.pdev = pdev;
+ ndev->ntb.topo = NTB_TOPO_NONE;
+ ndev->ntb.ops = &amd_ntb_ops;
+ ndev->int_mask = AMD_EVENT_INTMASK;
+ spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->peer_mmio;
+ u32 reg, stat;
+ int rc;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+ dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+
+ if (reg == ndev->cntl_sta)
+ return 0;
+
+ ndev->cntl_sta = reg;
+
+ rc = pci_read_config_dword(ndev->ntb.pdev,
+ AMD_LINK_STATUS_OFFSET, &stat);
+ if (rc)
+ return 0;
+ ndev->lnk_sta = stat;
+
+ return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+ struct amd_ntb_dev *ndev = hb_ndev(work);
+
+ if (amd_poll_link(ndev))
+ ntb_link_event(&ndev->ntb);
+
+ if (!amd_link_is_up(ndev))
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+ return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned int reg;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (!(reg & AMD_SIDE_READY)) {
+ reg |= AMD_SIDE_READY;
+ writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+ }
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned int reg;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (reg & AMD_SIDE_READY) {
+ reg &= ~AMD_SIDE_READY;
+ writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+ readl(mmio + AMD_SIDEINFO_OFFSET);
+ }
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+
+ ndev->mw_count = AMD_MW_CNT;
+ ndev->spad_count = AMD_SPADS_CNT;
+ ndev->db_count = AMD_DB_CNT;
+
+ switch (ndev->ntb.topo) {
+ case NTB_TOPO_PRI:
+ case NTB_TOPO_SEC:
+ ndev->spad_count >>= 1;
+ if (ndev->ntb.topo == NTB_TOPO_PRI) {
+ ndev->self_spad = 0;
+ ndev->peer_spad = 0x20;
+ } else {
+ ndev->self_spad = 0x20;
+ ndev->peer_spad = 0;
+ }
+
+ INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+ break;
+ default:
+ dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+ return -EINVAL;
+ }
+
+ ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+ /* Mask event interrupts */
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ return 0;
+}
+
+static enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ u32 info;
+
+ info = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (info & AMD_SIDE_MASK)
+ return NTB_TOPO_SEC;
+ else
+ return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev;
+ int rc = 0;
+
+ pdev = ndev_pdev(ndev);
+
+ ndev->ntb.topo = amd_get_topo(ndev);
+ dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+ ntb_topo_string(ndev->ntb.topo));
+
+ rc = amd_init_ntb(ndev);
+ if (rc)
+ return rc;
+
+ rc = amd_init_isr(ndev);
+ if (rc) {
+ dev_err(ndev_dev(ndev), "fail to init isr.\n");
+ return rc;
+ }
+
+ ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+ return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+ cancel_delayed_work_sync(&ndev->hb_timer);
+
+ ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+ struct pci_dev *pdev)
+{
+ int rc;
+
+ pci_set_drvdata(pdev, ndev);
+
+ rc = pci_enable_device(pdev);
+ if (rc)
+ goto err_pci_enable;
+
+ rc = pci_request_regions(pdev, NTB_NAME);
+ if (rc)
+ goto err_pci_regions;
+
+ pci_set_master(pdev);
+
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (rc) {
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (rc)
+ goto err_dma_mask;
+ dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+ }
+
+ rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (rc) {
+ rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (rc)
+ goto err_dma_mask;
+ dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+ }
+
+ ndev->self_mmio = pci_iomap(pdev, 0, 0);
+ if (!ndev->self_mmio) {
+ rc = -EIO;
+ goto err_dma_mask;
+ }
+ ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+ return 0;
+
+err_dma_mask:
+ pci_clear_master(pdev);
+err_pci_regions:
+ pci_disable_device(pdev);
+err_pci_enable:
+ pci_set_drvdata(pdev, NULL);
+ return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev = ndev_pdev(ndev);
+
+ pci_iounmap(pdev, ndev->self_mmio);
+
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct amd_ntb_dev *ndev;
+ int rc, node;
+
+ node = dev_to_node(&pdev->dev);
+
+ ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+ if (!ndev) {
+ rc = -ENOMEM;
+ goto err_ndev;
+ }
+
+ ndev_init_struct(ndev, pdev);
+
+ rc = amd_ntb_init_pci(ndev, pdev);
+ if (rc)
+ goto err_init_pci;
+
+ rc = amd_init_dev(ndev);
+ if (rc)
+ goto err_init_dev;
+
+ /* write side info */
+ amd_init_side_info(ndev);
+
+ amd_poll_link(ndev);
+
+ ndev_init_debugfs(ndev);
+
+ rc = ntb_register_device(&ndev->ntb);
+ if (rc)
+ goto err_register;
+
+ dev_info(&pdev->dev, "NTB device registered.\n");
+
+ return 0;
+
+err_register:
+ ndev_deinit_debugfs(ndev);
+ amd_deinit_dev(ndev);
+err_init_dev:
+ amd_ntb_deinit_pci(ndev);
+err_init_pci:
+ kfree(ndev);
+err_ndev:
+ return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+ struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+ ntb_unregister_device(&ndev->ntb);
+ ndev_deinit_debugfs(ndev);
+ amd_deinit_side_info(ndev);
+ amd_deinit_dev(ndev);
+ amd_ntb_deinit_pci(ndev);
+ kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+ {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+ {0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = amd_ntb_pci_tbl,
+ .probe = amd_ntb_pci_probe,
+ .remove = amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+ pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+ if (debugfs_initialized())
+ debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+ return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+ pci_unregister_driver(&amd_ntb_pci_driver);
+ debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
--- /dev/null
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of AMD Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB 0x145B
+#define AMD_LINK_HB_TIMEOUT msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET 0x68
+#define NTB_LIN_STA_ACTIVE_BIT 0x00000002
+#define NTB_LNK_STA_SPEED_MASK 0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK 0x03F00000
+#define NTB_LNK_STA_ACTIVE(x) (!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x) (((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x) (((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef read64
+#ifdef readq
+#define read64 readq
+#else
+#define read64 _read64
+static inline u64 _read64(void __iomem *mmio)
+{
+ u64 low, high;
+
+ low = readl(mmio);
+ high = readl(mmio + sizeof(u32));
+ return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef write64
+#ifdef writeq
+#define write64 writeq
+#else
+#define write64 _write64
+static inline void _write64(u64 val, void __iomem *mmio)
+{
+ writel(val, mmio);
+ writel(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+ /* AMD NTB Capability */
+ AMD_MW_CNT = 3,
+ AMD_DB_CNT = 16,
+ AMD_MSIX_VECTOR_CNT = 24,
+ AMD_SPADS_CNT = 16,
+
+ /* AMD NTB register offset */
+ AMD_CNTL_OFFSET = 0x200,
+
+ /* NTB control register bits */
+ PMM_REG_CTL = BIT(21),
+ SMM_REG_CTL = BIT(20),
+ SMM_REG_ACC_PATH = BIT(18),
+ PMM_REG_ACC_PATH = BIT(17),
+ NTB_CLK_EN = BIT(16),
+
+ AMD_STA_OFFSET = 0x204,
+ AMD_PGSLV_OFFSET = 0x208,
+ AMD_SPAD_MUX_OFFSET = 0x20C,
+ AMD_SPAD_OFFSET = 0x210,
+ AMD_RSMU_HCID = 0x250,
+ AMD_RSMU_SIID = 0x254,
+ AMD_PSION_OFFSET = 0x300,
+ AMD_SSION_OFFSET = 0x330,
+ AMD_MMINDEX_OFFSET = 0x400,
+ AMD_MMDATA_OFFSET = 0x404,
+ AMD_SIDEINFO_OFFSET = 0x408,
+
+ AMD_SIDE_MASK = BIT(0),
+ AMD_SIDE_READY = BIT(1),
+
+ /* limit register */
+ AMD_ROMBARLMT_OFFSET = 0x410,
+ AMD_BAR1LMT_OFFSET = 0x414,
+ AMD_BAR23LMT_OFFSET = 0x418,
+ AMD_BAR45LMT_OFFSET = 0x420,
+ /* xlat address */
+ AMD_POMBARXLAT_OFFSET = 0x428,
+ AMD_BAR1XLAT_OFFSET = 0x430,
+ AMD_BAR23XLAT_OFFSET = 0x438,
+ AMD_BAR45XLAT_OFFSET = 0x440,
+ /* doorbell and interrupt */
+ AMD_DBFM_OFFSET = 0x450,
+ AMD_DBREQ_OFFSET = 0x454,
+ AMD_MIRRDBSTAT_OFFSET = 0x458,
+ AMD_DBMASK_OFFSET = 0x45C,
+ AMD_DBSTAT_OFFSET = 0x460,
+ AMD_INTMASK_OFFSET = 0x470,
+ AMD_INTSTAT_OFFSET = 0x474,
+
+ /* event type */
+ AMD_PEER_FLUSH_EVENT = BIT(0),
+ AMD_PEER_RESET_EVENT = BIT(1),
+ AMD_PEER_D3_EVENT = BIT(2),
+ AMD_PEER_PMETO_EVENT = BIT(3),
+ AMD_PEER_D0_EVENT = BIT(4),
+ AMD_EVENT_INTMASK = (AMD_PEER_FLUSH_EVENT |
+ AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+ AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
+
+ AMD_PMESTAT_OFFSET = 0x480,
+ AMD_PMSGTRIG_OFFSET = 0x490,
+ AMD_LTRLATENCY_OFFSET = 0x494,
+ AMD_FLUSHTRIG_OFFSET = 0x498,
+
+ /* SMU register*/
+ AMD_SMUACK_OFFSET = 0x4A0,
+ AMD_SINRST_OFFSET = 0x4A4,
+ AMD_RSPNUM_OFFSET = 0x4A8,
+ AMD_SMU_SPADMUTEX = 0x4B0,
+ AMD_SMU_SPADOFFSET = 0x4B4,
+
+ AMD_PEER_OFFSET = 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+ struct amd_ntb_dev *ndev;
+ int num;
+};
+
+struct amd_ntb_dev {
+ struct ntb_dev ntb;
+
+ u32 ntb_side;
+ u32 lnk_sta;
+ u32 cntl_sta;
+ u32 peer_sta;
+
+ unsigned char mw_count;
+ unsigned char spad_count;
+ unsigned char db_count;
+ unsigned char msix_vec_count;
+
+ u64 db_valid_mask;
+ u64 db_mask;
+ u32 int_mask;
+
+ struct msix_entry *msix;
+ struct amd_ntb_vec *vec;
+
+ /* synchronize rmw access of db_mask and hw reg */
+ spinlock_t db_mask_lock;
+
+ void __iomem *self_mmio;
+ void __iomem *peer_mmio;
+ unsigned int self_spad;
+ unsigned int peer_spad;
+
+ struct delayed_work hb_timer;
+
+ struct dentry *debugfs_dir;
+ struct dentry *debugfs_info;
+};
+
+#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
+#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
+#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+#endif
limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar);
if (bar < 4 || !ndev->bar4_split) {
- base = ioread64(mmio + base_reg);
+ base = ioread64(mmio + base_reg) & NTB_BAR_MASK_64;
/* Set the limit if supported, if size is not mw_size */
if (limit_reg && size != mw_size)
if ((addr + size) & (~0ull << 32))
return -EINVAL;
- base = ioread32(mmio + base_reg);
+ base = ioread32(mmio + base_reg) & NTB_BAR_MASK_32;
/* Set the limit if supported, if size is not mw_size */
if (limit_reg && size != mw_size)
#define NTB_UNSAFE_DB BIT_ULL(0)
#define NTB_UNSAFE_SPAD BIT_ULL(1)
+#define NTB_BAR_MASK_64 ~(0xfull)
+#define NTB_BAR_MASK_32 ~(0xfu)
+
struct intel_ntb_dev;
struct intel_ntb_reg {
#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
-#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb)
-#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work)
+#define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
+ hb_timer.work)
#endif
u64 rx_err_ver;
u64 rx_memcpy;
u64 rx_async;
+ u64 dma_rx_prep_err;
u64 tx_bytes;
u64 tx_pkts;
u64 tx_ring_full;
u64 tx_err_no_buf;
u64 tx_memcpy;
u64 tx_async;
+ u64 dma_tx_prep_err;
};
struct ntb_transport_mw {
#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
#define NTB_QP_DEF_NUM_ENTRIES 100
#define NTB_LINK_DOWN_TIMEOUT 10
+#define DMA_RETRIES 20
+#define DMA_OUT_RESOURCE_TO 50
static void ntb_transport_rxc_db(unsigned long data);
static const struct ntb_ctx_ops ntb_transport_ops;
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"free tx - \t%u\n",
ntb_transport_tx_free_entry(qp));
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "DMA tx prep err - \t%llu\n",
+ qp->dma_tx_prep_err);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "DMA rx prep err - \t%llu\n",
+ qp->dma_rx_prep_err);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"\n");
qp->tx_err_no_buf = 0;
qp->tx_memcpy = 0;
qp->tx_async = 0;
+ qp->dma_tx_prep_err = 0;
+ qp->dma_rx_prep_err = 0;
}
static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
struct dmaengine_unmap_data *unmap;
dma_cookie_t cookie;
void *buf = entry->buf;
+ int retries = 0;
len = entry->len;
unmap->from_cnt = 1;
- txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
- unmap->addr[0], len,
- DMA_PREP_INTERRUPT);
- if (!txd)
+ for (retries = 0; retries < DMA_RETRIES; retries++) {
+ txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+ unmap->addr[0], len,
+ DMA_PREP_INTERRUPT);
+ if (txd)
+ break;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+
+ if (!txd) {
+ qp->dma_rx_prep_err++;
goto err_get_unmap;
+ }
txd->callback = ntb_rx_copy_callback;
txd->callback_param = entry;
void __iomem *offset;
size_t len = entry->len;
void *buf = entry->buf;
+ int retries = 0;
offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
unmap->to_cnt = 1;
- txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
- DMA_PREP_INTERRUPT);
- if (!txd)
+ for (retries = 0; retries < DMA_RETRIES; retries++) {
+ txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0],
+ len, DMA_PREP_INTERRUPT);
+ if (txd)
+ break;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+
+ if (!txd) {
+ qp->dma_tx_prep_err++;
goto err_get_unmap;
+ }
txd->callback = ntb_tx_copy_callback;
txd->callback_param = entry;
if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
if (qp->tx_handler)
- qp->tx_handler(qp->cb_data, qp, NULL, -EIO);
+ qp->tx_handler(qp, qp->cb_data, NULL, -EIO);
ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
&qp->tx_free_q);
functioning at a basic level.
If unsure, say N.
+
+config NTB_PERF
+ tristate "NTB RAW Perf Measuring Tool"
+ help
+ This is a tool to measure raw NTB performance by transferring data
+ to and from the window without additional software interaction.
+
+ If unsure, say N.
obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
--- /dev/null
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Perf Linux driver
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/ntb.h>
+
+#define DRIVER_NAME "ntb_perf"
+#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
+
+#define DRIVER_LICENSE "Dual BSD/GPL"
+#define DRIVER_VERSION "1.0"
+#define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
+
+#define PERF_LINK_DOWN_TIMEOUT 10
+#define PERF_VERSION 0xffff0001
+#define MAX_THREADS 32
+#define MAX_TEST_SIZE SZ_1M
+#define MAX_SRCS 32
+#define DMA_OUT_RESOURCE_TO 50
+#define DMA_RETRIES 20
+#define SZ_4G (1ULL << 32)
+#define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct dentry *perf_debugfs_dir;
+
+static unsigned int seg_order = 19; /* 512K */
+module_param(seg_order, uint, 0644);
+MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+
+static unsigned int run_order = 32; /* 4G */
+module_param(run_order, uint, 0644);
+MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
+
+struct perf_mw {
+ phys_addr_t phys_addr;
+ resource_size_t phys_size;
+ resource_size_t xlat_align;
+ resource_size_t xlat_align_size;
+ void __iomem *vbase;
+ size_t xlat_size;
+ size_t buf_size;
+ void *virt_addr;
+ dma_addr_t dma_addr;
+};
+
+struct perf_ctx;
+
+struct pthr_ctx {
+ struct task_struct *thread;
+ struct perf_ctx *perf;
+ atomic_t dma_sync;
+ struct dma_chan *dma_chan;
+ int dma_prep_err;
+ int src_idx;
+ void *srcs[MAX_SRCS];
+};
+
+struct perf_ctx {
+ struct ntb_dev *ntb;
+ spinlock_t db_lock;
+ struct perf_mw mw;
+ bool link_is_up;
+ struct work_struct link_cleanup;
+ struct delayed_work link_work;
+ struct dentry *debugfs_node_dir;
+ struct dentry *debugfs_run;
+ struct dentry *debugfs_threads;
+ u8 perf_threads;
+ bool run;
+ struct pthr_ctx pthr_ctx[MAX_THREADS];
+ atomic_t tsync;
+};
+
+enum {
+ VERSION = 0,
+ MW_SZ_HIGH,
+ MW_SZ_LOW,
+ SPAD_MSG,
+ SPAD_ACK,
+ MAX_SPAD
+};
+
+static void perf_link_event(void *ctx)
+{
+ struct perf_ctx *perf = ctx;
+
+ if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
+ schedule_delayed_work(&perf->link_work, 2*HZ);
+ else
+ schedule_work(&perf->link_cleanup);
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+ struct perf_ctx *perf = ctx;
+ u64 db_bits, db_mask;
+
+ db_mask = ntb_db_vector_mask(perf->ntb, vec);
+ db_bits = ntb_db_read(perf->ntb);
+
+ dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+ vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+ .link_event = perf_link_event,
+ .db_event = perf_db_event,
+};
+
+static void perf_copy_callback(void *data)
+{
+ struct pthr_ctx *pctx = data;
+
+ atomic_dec(&pctx->dma_sync);
+}
+
+static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+ char *src, size_t size)
+{
+ struct perf_ctx *perf = pctx->perf;
+ struct dma_async_tx_descriptor *txd;
+ struct dma_chan *chan = pctx->dma_chan;
+ struct dma_device *device;
+ struct dmaengine_unmap_data *unmap;
+ dma_cookie_t cookie;
+ size_t src_off, dst_off;
+ struct perf_mw *mw = &perf->mw;
+ u64 vbase, dst_vaddr;
+ dma_addr_t dst_phys;
+ int retries = 0;
+
+ if (!use_dma) {
+ memcpy_toio(dst, src, size);
+ return size;
+ }
+
+ if (!chan) {
+ dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
+ return -EINVAL;
+ }
+
+ device = chan->device;
+ src_off = (size_t)src & ~PAGE_MASK;
+ dst_off = (size_t)dst & ~PAGE_MASK;
+
+ if (!is_dma_copy_aligned(device, src_off, dst_off, size))
+ return -ENODEV;
+
+ vbase = (u64)(u64 *)mw->vbase;
+ dst_vaddr = (u64)(u64 *)dst;
+ dst_phys = mw->phys_addr + (dst_vaddr - vbase);
+
+ unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+ if (!unmap)
+ return -ENOMEM;
+
+ unmap->len = size;
+ unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
+ src_off, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(device->dev, unmap->addr[0]))
+ goto err_get_unmap;
+
+ unmap->to_cnt = 1;
+
+ do {
+ txd = device->device_prep_dma_memcpy(chan, dst_phys,
+ unmap->addr[0],
+ size, DMA_PREP_INTERRUPT);
+ if (!txd) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+ } while (!txd && (++retries < DMA_RETRIES));
+
+ if (!txd) {
+ pctx->dma_prep_err++;
+ goto err_get_unmap;
+ }
+
+ txd->callback = perf_copy_callback;
+ txd->callback_param = pctx;
+ dma_set_unmap(txd, unmap);
+
+ cookie = dmaengine_submit(txd);
+ if (dma_submit_error(cookie))
+ goto err_set_unmap;
+
+ atomic_inc(&pctx->dma_sync);
+ dma_async_issue_pending(chan);
+
+ return size;
+
+err_set_unmap:
+ dmaengine_unmap_put(unmap);
+err_get_unmap:
+ dmaengine_unmap_put(unmap);
+ return 0;
+}
+
+static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+ u64 buf_size, u64 win_size, u64 total)
+{
+ int chunks, total_chunks, i;
+ int copied_chunks = 0;
+ u64 copied = 0, result;
+ char *tmp = dst;
+ u64 perf, diff_us;
+ ktime_t kstart, kstop, kdiff;
+
+ chunks = div64_u64(win_size, buf_size);
+ total_chunks = div64_u64(total, buf_size);
+ kstart = ktime_get();
+
+ for (i = 0; i < total_chunks; i++) {
+ result = perf_copy(pctx, tmp, src, buf_size);
+ copied += result;
+ copied_chunks++;
+ if (copied_chunks == chunks) {
+ tmp = dst;
+ copied_chunks = 0;
+ } else
+ tmp += buf_size;
+
+ /* Probably should schedule every 4GB to prevent soft hang. */
+ if (((copied % SZ_4G) == 0) && !use_dma) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+ }
+
+ if (use_dma) {
+ pr_info("%s: All DMA descriptors submitted\n", current->comm);
+ while (atomic_read(&pctx->dma_sync) != 0)
+ msleep(20);
+ }
+
+ kstop = ktime_get();
+ kdiff = ktime_sub(kstop, kstart);
+ diff_us = ktime_to_us(kdiff);
+
+ pr_info("%s: copied %llu bytes\n", current->comm, copied);
+
+ pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
+
+ perf = div64_u64(copied, diff_us);
+
+ pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
+
+ return 0;
+}
+
+static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+ return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+static int ntb_perf_thread(void *data)
+{
+ struct pthr_ctx *pctx = data;
+ struct perf_ctx *perf = pctx->perf;
+ struct pci_dev *pdev = perf->ntb->pdev;
+ struct perf_mw *mw = &perf->mw;
+ char *dst;
+ u64 win_size, buf_size, total;
+ void *src;
+ int rc, node, i;
+ struct dma_chan *dma_chan = NULL;
+
+ pr_info("kthread %s starting...\n", current->comm);
+
+ node = dev_to_node(&pdev->dev);
+
+ if (use_dma && !pctx->dma_chan) {
+ dma_cap_mask_t dma_mask;
+
+ dma_cap_zero(dma_mask);
+ dma_cap_set(DMA_MEMCPY, dma_mask);
+ dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
+ (void *)(unsigned long)node);
+ if (!dma_chan) {
+ pr_warn("%s: cannot acquire DMA channel, quitting\n",
+ current->comm);
+ return -ENODEV;
+ }
+ pctx->dma_chan = dma_chan;
+ }
+
+ for (i = 0; i < MAX_SRCS; i++) {
+ pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
+ if (!pctx->srcs[i]) {
+ rc = -ENOMEM;
+ goto err;
+ }
+ }
+
+ win_size = mw->phys_size;
+ buf_size = 1ULL << seg_order;
+ total = 1ULL << run_order;
+
+ if (buf_size > MAX_TEST_SIZE)
+ buf_size = MAX_TEST_SIZE;
+
+ dst = (char *)mw->vbase;
+
+ atomic_inc(&perf->tsync);
+ while (atomic_read(&perf->tsync) != perf->perf_threads)
+ schedule();
+
+ src = pctx->srcs[pctx->src_idx];
+ pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
+
+ rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
+
+ atomic_dec(&perf->tsync);
+
+ if (rc < 0) {
+ pr_err("%s: failed\n", current->comm);
+ rc = -ENXIO;
+ goto err;
+ }
+
+ for (i = 0; i < MAX_SRCS; i++) {
+ kfree(pctx->srcs[i]);
+ pctx->srcs[i] = NULL;
+ }
+
+ return 0;
+
+err:
+ for (i = 0; i < MAX_SRCS; i++) {
+ kfree(pctx->srcs[i]);
+ pctx->srcs[i] = NULL;
+ }
+
+ if (dma_chan) {
+ dma_release_channel(dma_chan);
+ pctx->dma_chan = NULL;
+ }
+
+ return rc;
+}
+
+static void perf_free_mw(struct perf_ctx *perf)
+{
+ struct perf_mw *mw = &perf->mw;
+ struct pci_dev *pdev = perf->ntb->pdev;
+
+ if (!mw->virt_addr)
+ return;
+
+ ntb_mw_clear_trans(perf->ntb, 0);
+ dma_free_coherent(&pdev->dev, mw->buf_size,
+ mw->virt_addr, mw->dma_addr);
+ mw->xlat_size = 0;
+ mw->buf_size = 0;
+ mw->virt_addr = NULL;
+}
+
+static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
+{
+ struct perf_mw *mw = &perf->mw;
+ size_t xlat_size, buf_size;
+
+ if (!size)
+ return -EINVAL;
+
+ xlat_size = round_up(size, mw->xlat_align_size);
+ buf_size = round_up(size, mw->xlat_align);
+
+ if (mw->xlat_size == xlat_size)
+ return 0;
+
+ if (mw->buf_size)
+ perf_free_mw(perf);
+
+ mw->xlat_size = xlat_size;
+ mw->buf_size = buf_size;
+
+ mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
+ &mw->dma_addr, GFP_KERNEL);
+ if (!mw->virt_addr) {
+ mw->xlat_size = 0;
+ mw->buf_size = 0;
+ }
+
+ return 0;
+}
+
+static void perf_link_work(struct work_struct *work)
+{
+ struct perf_ctx *perf =
+ container_of(work, struct perf_ctx, link_work.work);
+ struct ntb_dev *ndev = perf->ntb;
+ struct pci_dev *pdev = ndev->pdev;
+ u32 val;
+ u64 size;
+ int rc;
+
+ dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+ size = perf->mw.phys_size;
+ ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
+ ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
+ ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+
+ /* now read what peer wrote */
+ val = ntb_spad_read(ndev, VERSION);
+ if (val != PERF_VERSION) {
+ dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
+ goto out;
+ }
+
+ val = ntb_spad_read(ndev, MW_SZ_HIGH);
+ size = (u64)val << 32;
+
+ val = ntb_spad_read(ndev, MW_SZ_LOW);
+ size |= val;
+
+ dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
+
+ rc = perf_set_mw(perf, size);
+ if (rc)
+ goto out1;
+
+ perf->link_is_up = true;
+
+ return;
+
+out1:
+ perf_free_mw(perf);
+
+out:
+ if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+ schedule_delayed_work(&perf->link_work,
+ msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
+}
+
+static void perf_link_cleanup(struct work_struct *work)
+{
+ struct perf_ctx *perf = container_of(work,
+ struct perf_ctx,
+ link_cleanup);
+
+ dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+ if (!perf->link_is_up)
+ cancel_delayed_work_sync(&perf->link_work);
+}
+
+static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
+{
+ struct perf_mw *mw;
+ int rc;
+
+ mw = &perf->mw;
+
+ rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
+ &mw->xlat_align, &mw->xlat_align_size);
+ if (rc)
+ return rc;
+
+ perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+ if (!mw->vbase)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct perf_ctx *perf = filp->private_data;
+ char *buf;
+ ssize_t ret, out_offset;
+
+ if (!perf)
+ return 0;
+
+ buf = kmalloc(64, GFP_KERNEL);
+ out_offset = snprintf(buf, 64, "%d\n", perf->run);
+ ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+ kfree(buf);
+
+ return ret;
+}
+
+static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct perf_ctx *perf = filp->private_data;
+ int node, i;
+
+ if (!perf->link_is_up)
+ return 0;
+
+ if (perf->perf_threads == 0)
+ return 0;
+
+ if (atomic_read(&perf->tsync) == 0)
+ perf->run = false;
+
+ if (perf->run) {
+ /* lets stop the threads */
+ perf->run = false;
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (perf->pthr_ctx[i].thread) {
+ kthread_stop(perf->pthr_ctx[i].thread);
+ perf->pthr_ctx[i].thread = NULL;
+ } else
+ break;
+ }
+ } else {
+ perf->run = true;
+
+ if (perf->perf_threads > MAX_THREADS) {
+ perf->perf_threads = MAX_THREADS;
+ pr_info("Reset total threads to: %u\n", MAX_THREADS);
+ }
+
+ /* no greater than 1M */
+ if (seg_order > MAX_SEG_ORDER) {
+ seg_order = MAX_SEG_ORDER;
+ pr_info("Fix seg_order to %u\n", seg_order);
+ }
+
+ if (run_order < seg_order) {
+ run_order = seg_order;
+ pr_info("Fix run_order to %u\n", run_order);
+ }
+
+ node = dev_to_node(&perf->ntb->pdev->dev);
+ /* launch kernel thread */
+ for (i = 0; i < perf->perf_threads; i++) {
+ struct pthr_ctx *pctx;
+
+ pctx = &perf->pthr_ctx[i];
+ atomic_set(&pctx->dma_sync, 0);
+ pctx->perf = perf;
+ pctx->thread =
+ kthread_create_on_node(ntb_perf_thread,
+ (void *)pctx,
+ node, "ntb_perf %d", i);
+ if (pctx->thread)
+ wake_up_process(pctx->thread);
+ else {
+ perf->run = false;
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (pctx->thread) {
+ kthread_stop(pctx->thread);
+ pctx->thread = NULL;
+ }
+ }
+ }
+
+ if (perf->run == false)
+ return -ENXIO;
+ }
+
+ }
+
+ return count;
+}
+
+static const struct file_operations ntb_perf_debugfs_run = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = debugfs_run_read,
+ .write = debugfs_run_write,
+};
+
+static int perf_debugfs_setup(struct perf_ctx *perf)
+{
+ struct pci_dev *pdev = perf->ntb->pdev;
+
+ if (!debugfs_initialized())
+ return -ENODEV;
+
+ if (!perf_debugfs_dir) {
+ perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!perf_debugfs_dir)
+ return -ENODEV;
+ }
+
+ perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+ perf_debugfs_dir);
+ if (!perf->debugfs_node_dir)
+ return -ENODEV;
+
+ perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+ perf->debugfs_node_dir, perf,
+ &ntb_perf_debugfs_run);
+ if (!perf->debugfs_run)
+ return -ENODEV;
+
+ perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+ perf->debugfs_node_dir,
+ &perf->perf_threads);
+ if (!perf->debugfs_threads)
+ return -ENODEV;
+
+ return 0;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct pci_dev *pdev = ntb->pdev;
+ struct perf_ctx *perf;
+ int node;
+ int rc = 0;
+
+ node = dev_to_node(&pdev->dev);
+
+ perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
+ if (!perf) {
+ rc = -ENOMEM;
+ goto err_perf;
+ }
+
+ perf->ntb = ntb;
+ perf->perf_threads = 1;
+ atomic_set(&perf->tsync, 0);
+ perf->run = false;
+ spin_lock_init(&perf->db_lock);
+ perf_setup_mw(ntb, perf);
+ INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
+ INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
+
+ rc = ntb_set_ctx(ntb, perf, &perf_ops);
+ if (rc)
+ goto err_ctx;
+
+ perf->link_is_up = false;
+ ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+ ntb_link_event(ntb);
+
+ rc = perf_debugfs_setup(perf);
+ if (rc)
+ goto err_ctx;
+
+ return 0;
+
+err_ctx:
+ cancel_delayed_work_sync(&perf->link_work);
+ cancel_work_sync(&perf->link_cleanup);
+ kfree(perf);
+err_perf:
+ return rc;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct perf_ctx *perf = ntb->ctx;
+ int i;
+
+ dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+
+ cancel_delayed_work_sync(&perf->link_work);
+ cancel_work_sync(&perf->link_cleanup);
+
+ ntb_clear_ctx(ntb);
+ ntb_link_disable(ntb);
+
+ debugfs_remove_recursive(perf_debugfs_dir);
+ perf_debugfs_dir = NULL;
+
+ if (use_dma) {
+ for (i = 0; i < MAX_THREADS; i++) {
+ struct pthr_ctx *pctx = &perf->pthr_ctx[i];
+
+ if (pctx->dma_chan)
+ dma_release_channel(pctx->dma_chan);
+ }
+ }
+
+ kfree(perf);
+}
+
+static struct ntb_client perf_client = {
+ .ops = {
+ .probe = perf_probe,
+ .remove = perf_remove,
+ },
+};
+module_ntb_client(perf_client);
To compile this driver as a module, choose M here: the
module will be called nvme.
+
+config BLK_DEV_NVME_SCSI
+ bool "SCSI emulation for NVMe device nodes"
+ depends on BLK_DEV_NVME
+ ---help---
+ This adds support for the SG_IO ioctl on the NVMe character
+ and block devices nodes, as well a a translation for a small
+ number of selected SCSI commands to NVMe commands to the NVMe
+ driver. If you don't know what this means you probably want
+ to say N here, and if you know what it means you probably
+ want to say N as well.
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
-lightnvm-$(CONFIG_NVM) := lightnvm.o
-nvme-y += pci.o scsi.o $(lightnvm-y)
+lightnvm-$(CONFIG_NVM) := lightnvm.o
+nvme-y += core.o pci.o $(lightnvm-y)
+nvme-$(CONFIG_BLK_DEV_NVME_SCSI) += scsi.o
--- /dev/null
+/*
+ * NVM Express device driver
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/hdreg.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/pr.h>
+#include <linux/ptrace.h>
+#include <linux/nvme_ioctl.h>
+#include <linux/t10-pi.h>
+#include <scsi/sg.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+#define NVME_MINORS (1U << MINORBITS)
+
+static int nvme_major;
+module_param(nvme_major, int, 0);
+
+static int nvme_char_major;
+module_param(nvme_char_major, int, 0);
+
+static LIST_HEAD(nvme_ctrl_list);
+DEFINE_SPINLOCK(dev_list_lock);
+
+static struct class *nvme_class;
+
+static void nvme_free_ns(struct kref *kref)
+{
+ struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
+
+ if (ns->type == NVME_NS_LIGHTNVM)
+ nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ nvme_put_ctrl(ns->ctrl);
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
+static void nvme_put_ns(struct nvme_ns *ns)
+{
+ kref_put(&ns->kref, nvme_free_ns);
+}
+
+static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
+{
+ struct nvme_ns *ns;
+
+ spin_lock(&dev_list_lock);
+ ns = disk->private_data;
+ if (ns && !kref_get_unless_zero(&ns->kref))
+ ns = NULL;
+ spin_unlock(&dev_list_lock);
+
+ return ns;
+}
+
+void nvme_requeue_req(struct request *req)
+{
+ unsigned long flags;
+
+ blk_mq_requeue_request(req);
+ spin_lock_irqsave(req->q->queue_lock, flags);
+ if (!blk_queue_stopped(req->q))
+ blk_mq_kick_requeue_list(req->q);
+ spin_unlock_irqrestore(req->q->queue_lock, flags);
+}
+
+struct request *nvme_alloc_request(struct request_queue *q,
+ struct nvme_command *cmd, unsigned int flags)
+{
+ bool write = cmd->common.opcode & 1;
+ struct request *req;
+
+ req = blk_mq_alloc_request(q, write, flags);
+ if (IS_ERR(req))
+ return req;
+
+ req->cmd_type = REQ_TYPE_DRV_PRIV;
+ req->cmd_flags |= REQ_FAILFAST_DRIVER;
+ req->__data_len = 0;
+ req->__sector = (sector_t) -1;
+ req->bio = req->biotail = NULL;
+
+ req->cmd = (unsigned char *)cmd;
+ req->cmd_len = sizeof(struct nvme_command);
+ req->special = (void *)0;
+
+ return req;
+}
+
+/*
+ * Returns 0 on success. If the result is negative, it's a Linux error code;
+ * if the result is positive, it's an NVM Express status code
+ */
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
+{
+ struct request *req;
+ int ret;
+
+ req = nvme_alloc_request(q, cmd, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+ if (buffer && bufflen) {
+ ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
+ if (ret)
+ goto out;
+ }
+
+ blk_execute_rq(req->q, NULL, req, 0);
+ if (result)
+ *result = (u32)(uintptr_t)req->special;
+ ret = req->errors;
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, unsigned bufflen)
+{
+ return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
+}
+
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen,
+ void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
+ u32 *result, unsigned timeout)
+{
+ bool write = cmd->common.opcode & 1;
+ struct nvme_ns *ns = q->queuedata;
+ struct gendisk *disk = ns ? ns->disk : NULL;
+ struct request *req;
+ struct bio *bio = NULL;
+ void *meta = NULL;
+ int ret;
+
+ req = nvme_alloc_request(q, cmd, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
+
+ if (ubuffer && bufflen) {
+ ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
+ GFP_KERNEL);
+ if (ret)
+ goto out;
+ bio = req->bio;
+
+ if (!disk)
+ goto submit;
+ bio->bi_bdev = bdget_disk(disk, 0);
+ if (!bio->bi_bdev) {
+ ret = -ENODEV;
+ goto out_unmap;
+ }
+
+ if (meta_buffer) {
+ struct bio_integrity_payload *bip;
+
+ meta = kmalloc(meta_len, GFP_KERNEL);
+ if (!meta) {
+ ret = -ENOMEM;
+ goto out_unmap;
+ }
+
+ if (write) {
+ if (copy_from_user(meta, meta_buffer,
+ meta_len)) {
+ ret = -EFAULT;
+ goto out_free_meta;
+ }
+ }
+
+ bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
+ if (IS_ERR(bip)) {
+ ret = PTR_ERR(bip);
+ goto out_free_meta;
+ }
+
+ bip->bip_iter.bi_size = meta_len;
+ bip->bip_iter.bi_sector = meta_seed;
+
+ ret = bio_integrity_add_page(bio, virt_to_page(meta),
+ meta_len, offset_in_page(meta));
+ if (ret != meta_len) {
+ ret = -ENOMEM;
+ goto out_free_meta;
+ }
+ }
+ }
+ submit:
+ blk_execute_rq(req->q, disk, req, 0);
+ ret = req->errors;
+ if (result)
+ *result = (u32)(uintptr_t)req->special;
+ if (meta && !ret && !write) {
+ if (copy_to_user(meta_buffer, meta, meta_len))
+ ret = -EFAULT;
+ }
+ out_free_meta:
+ kfree(meta);
+ out_unmap:
+ if (bio) {
+ if (disk && bio->bi_bdev)
+ bdput(bio->bi_bdev);
+ blk_rq_unmap_user(bio);
+ }
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen, u32 *result,
+ unsigned timeout)
+{
+ return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
+ result, timeout);
+}
+
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(1);
+
+ *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ctrl));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
+{
+ struct nvme_command c = { };
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = cpu_to_le32(2);
+ c.identify.nsid = cpu_to_le32(nsid);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
+}
+
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
+ struct nvme_id_ns **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify,
+ c.identify.nsid = cpu_to_le32(nsid),
+
+ *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ns));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
+ dma_addr_t dma_addr, u32 *result)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_get_features;
+ c.features.nsid = cpu_to_le32(nsid);
+ c.features.prp1 = cpu_to_le64(dma_addr);
+ c.features.fid = cpu_to_le32(fid);
+
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+}
+
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+ dma_addr_t dma_addr, u32 *result)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_set_features;
+ c.features.prp1 = cpu_to_le64(dma_addr);
+ c.features.fid = cpu_to_le32(fid);
+ c.features.dword11 = cpu_to_le32(dword11);
+
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
+}
+
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
+{
+ struct nvme_command c = { };
+ int error;
+
+ c.common.opcode = nvme_admin_get_log_page,
+ c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+ c.common.cdw10[0] = cpu_to_le32(
+ (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+ NVME_LOG_SMART),
+
+ *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+ if (!*log)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+ sizeof(struct nvme_smart_log));
+ if (error)
+ kfree(*log);
+ return error;
+}
+
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
+{
+ u32 q_count = (*count - 1) | ((*count - 1) << 16);
+ u32 result;
+ int status, nr_io_queues;
+
+ status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
+ &result);
+ if (status)
+ return status;
+
+ nr_io_queues = min(result & 0xffff, result >> 16) + 1;
+ *count = min(*count, nr_io_queues);
+ return 0;
+}
+
+static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
+{
+ struct nvme_user_io io;
+ struct nvme_command c;
+ unsigned length, meta_len;
+ void __user *metadata;
+
+ if (copy_from_user(&io, uio, sizeof(io)))
+ return -EFAULT;
+
+ switch (io.opcode) {
+ case nvme_cmd_write:
+ case nvme_cmd_read:
+ case nvme_cmd_compare:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ length = (io.nblocks + 1) << ns->lba_shift;
+ meta_len = (io.nblocks + 1) * ns->ms;
+ metadata = (void __user *)(uintptr_t)io.metadata;
+
+ if (ns->ext) {
+ length += meta_len;
+ meta_len = 0;
+ } else if (meta_len) {
+ if ((io.metadata & 3) || !io.metadata)
+ return -EINVAL;
+ }
+
+ memset(&c, 0, sizeof(c));
+ c.rw.opcode = io.opcode;
+ c.rw.flags = io.flags;
+ c.rw.nsid = cpu_to_le32(ns->ns_id);
+ c.rw.slba = cpu_to_le64(io.slba);
+ c.rw.length = cpu_to_le16(io.nblocks);
+ c.rw.control = cpu_to_le16(io.control);
+ c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
+ c.rw.reftag = cpu_to_le32(io.reftag);
+ c.rw.apptag = cpu_to_le16(io.apptag);
+ c.rw.appmask = cpu_to_le16(io.appmask);
+
+ return __nvme_submit_user_cmd(ns->queue, &c,
+ (void __user *)(uintptr_t)io.addr, length,
+ metadata, meta_len, io.slba, NULL, 0);
+}
+
+static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ struct nvme_passthru_cmd __user *ucmd)
+{
+ struct nvme_passthru_cmd cmd;
+ struct nvme_command c;
+ unsigned timeout = 0;
+ int status;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
+ return -EFAULT;
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = cmd.opcode;
+ c.common.flags = cmd.flags;
+ c.common.nsid = cpu_to_le32(cmd.nsid);
+ c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
+ c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
+ c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
+ c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
+ c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
+ c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
+ c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
+ c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
+
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
+
+ status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
+ (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
+ &cmd.result, timeout);
+ if (status >= 0) {
+ if (put_user(cmd.result, &ucmd->result))
+ return -EFAULT;
+ }
+
+ return status;
+}
+
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+ switch (cmd) {
+ case NVME_IOCTL_ID:
+ force_successful_syscall_return();
+ return ns->ns_id;
+ case NVME_IOCTL_ADMIN_CMD:
+ return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
+ case NVME_IOCTL_IO_CMD:
+ return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
+ case NVME_IOCTL_SUBMIT_IO:
+ return nvme_submit_io(ns, (void __user *)arg);
+#ifdef CONFIG_BLK_DEV_NVME_SCSI
+ case SG_GET_VERSION_NUM:
+ return nvme_sg_get_version_num((void __user *)arg);
+ case SG_IO:
+ return nvme_sg_io(ns, (void __user *)arg);
+#endif
+ default:
+ return -ENOTTY;
+ }
+}
+
+#ifdef CONFIG_COMPAT
+static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case SG_IO:
+ return -ENOIOCTLCMD;
+ }
+ return nvme_ioctl(bdev, mode, cmd, arg);
+}
+#else
+#define nvme_compat_ioctl NULL
+#endif
+
+static int nvme_open(struct block_device *bdev, fmode_t mode)
+{
+ return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
+}
+
+static void nvme_release(struct gendisk *disk, fmode_t mode)
+{
+ nvme_put_ns(disk->private_data);
+}
+
+static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+ /* some standard values */
+ geo->heads = 1 << 6;
+ geo->sectors = 1 << 5;
+ geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
+ return 0;
+}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+ struct blk_integrity integrity;
+
+ switch (ns->pi_type) {
+ case NVME_NS_DPS_PI_TYPE3:
+ integrity.profile = &t10_pi_type3_crc;
+ break;
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ integrity.profile = &t10_pi_type1_crc;
+ break;
+ default:
+ integrity.profile = NULL;
+ break;
+ }
+ integrity.tuple_size = ns->ms;
+ blk_integrity_register(ns->disk, &integrity);
+ blk_queue_max_integrity_segments(ns->queue, 1);
+}
+#else
+static void nvme_init_integrity(struct nvme_ns *ns)
+{
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+static void nvme_config_discard(struct nvme_ns *ns)
+{
+ u32 logical_block_size = queue_logical_block_size(ns->queue);
+ ns->queue->limits.discard_zeroes_data = 0;
+ ns->queue->limits.discard_alignment = logical_block_size;
+ ns->queue->limits.discard_granularity = logical_block_size;
+ blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
+ queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
+}
+
+static int nvme_revalidate_disk(struct gendisk *disk)
+{
+ struct nvme_ns *ns = disk->private_data;
+ struct nvme_id_ns *id;
+ u8 lbaf, pi_type;
+ u16 old_ms;
+ unsigned short bs;
+
+ if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
+ dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
+ __func__, ns->ctrl->instance, ns->ns_id);
+ return -ENODEV;
+ }
+ if (id->ncap == 0) {
+ kfree(id);
+ return -ENODEV;
+ }
+
+ if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
+ if (nvme_nvm_register(ns->queue, disk->disk_name)) {
+ dev_warn(ns->ctrl->dev,
+ "%s: LightNVM init failure\n", __func__);
+ kfree(id);
+ return -ENODEV;
+ }
+ ns->type = NVME_NS_LIGHTNVM;
+ }
+
+ if (ns->ctrl->vs >= NVME_VS(1, 1))
+ memcpy(ns->eui, id->eui64, sizeof(ns->eui));
+ if (ns->ctrl->vs >= NVME_VS(1, 2))
+ memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
+
+ old_ms = ns->ms;
+ lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
+ ns->lba_shift = id->lbaf[lbaf].ds;
+ ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
+ ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
+
+ /*
+ * If identify namespace failed, use default 512 byte block size so
+ * block layer can use before failing read/write for 0 capacity.
+ */
+ if (ns->lba_shift == 0)
+ ns->lba_shift = 9;
+ bs = 1 << ns->lba_shift;
+ /* XXX: PI implementation requires metadata equal t10 pi tuple size */
+ pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
+ id->dps & NVME_NS_DPS_PI_MASK : 0;
+
+ blk_mq_freeze_queue(disk->queue);
+ if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
+ ns->ms != old_ms ||
+ bs != queue_logical_block_size(disk->queue) ||
+ (ns->ms && ns->ext)))
+ blk_integrity_unregister(disk);
+
+ ns->pi_type = pi_type;
+ blk_queue_logical_block_size(ns->queue, bs);
+
+ if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
+ nvme_init_integrity(ns);
+ if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
+ set_capacity(disk, 0);
+ else
+ set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
+
+ if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
+ nvme_config_discard(ns);
+ blk_mq_unfreeze_queue(disk->queue);
+
+ kfree(id);
+ return 0;
+}
+
+static char nvme_pr_type(enum pr_type type)
+{
+ switch (type) {
+ case PR_WRITE_EXCLUSIVE:
+ return 1;
+ case PR_EXCLUSIVE_ACCESS:
+ return 2;
+ case PR_WRITE_EXCLUSIVE_REG_ONLY:
+ return 3;
+ case PR_EXCLUSIVE_ACCESS_REG_ONLY:
+ return 4;
+ case PR_WRITE_EXCLUSIVE_ALL_REGS:
+ return 5;
+ case PR_EXCLUSIVE_ACCESS_ALL_REGS:
+ return 6;
+ default:
+ return 0;
+ }
+};
+
+static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
+ u64 key, u64 sa_key, u8 op)
+{
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+ struct nvme_command c;
+ u8 data[16] = { 0, };
+
+ put_unaligned_le64(key, &data[0]);
+ put_unaligned_le64(sa_key, &data[8]);
+
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = op;
+ c.common.nsid = cpu_to_le32(ns->ns_id);
+ c.common.cdw10[0] = cpu_to_le32(cdw10);
+
+ return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
+}
+
+static int nvme_pr_register(struct block_device *bdev, u64 old,
+ u64 new, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = old ? 2 : 0;
+ cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
+ cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_reserve(struct block_device *bdev, u64 key,
+ enum pr_type type, unsigned flags)
+{
+ u32 cdw10;
+
+ if (flags & ~PR_FL_IGNORE_KEY)
+ return -EOPNOTSUPP;
+
+ cdw10 = nvme_pr_type(type) << 8;
+ cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
+ enum pr_type type, bool abort)
+{
+ u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
+ return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
+}
+
+static int nvme_pr_clear(struct block_device *bdev, u64 key)
+{
+ u32 cdw10 = 1 | (key ? 1 << 3 : 0);
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
+}
+
+static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
+{
+ u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
+ return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
+}
+
+static const struct pr_ops nvme_pr_ops = {
+ .pr_register = nvme_pr_register,
+ .pr_reserve = nvme_pr_reserve,
+ .pr_release = nvme_pr_release,
+ .pr_preempt = nvme_pr_preempt,
+ .pr_clear = nvme_pr_clear,
+};
+
+static const struct block_device_operations nvme_fops = {
+ .owner = THIS_MODULE,
+ .ioctl = nvme_ioctl,
+ .compat_ioctl = nvme_compat_ioctl,
+ .open = nvme_open,
+ .release = nvme_release,
+ .getgeo = nvme_getgeo,
+ .revalidate_disk= nvme_revalidate_disk,
+ .pr_ops = &nvme_pr_ops,
+};
+
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
+{
+ unsigned long timeout =
+ ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
+ u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
+ int ret;
+
+ while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+ if ((csts & NVME_CSTS_RDY) == bit)
+ break;
+
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ dev_err(ctrl->dev,
+ "Device not ready; aborting %s\n", enabled ?
+ "initialisation" : "reset");
+ return -ENODEV;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * If the device has been passed off to us in an enabled state, just clear
+ * the enabled bit. The spec says we should set the 'shutdown notification
+ * bits', but doing so may cause the device to complete commands to the
+ * admin queue ... and we don't know what memory that might be pointing at!
+ */
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+ int ret;
+
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config &= ~NVME_CC_ENABLE;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+ return nvme_wait_ready(ctrl, cap, false);
+}
+
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+ /*
+ * Default to a 4K page size, with the intention to update this
+ * path in the future to accomodate architectures with differing
+ * kernel and IO page sizes.
+ */
+ unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
+ int ret;
+
+ if (page_shift < dev_page_min) {
+ dev_err(ctrl->dev,
+ "Minimum device page size %u too large for host (%u)\n",
+ 1 << dev_page_min, 1 << page_shift);
+ return -ENODEV;
+ }
+
+ ctrl->page_size = 1 << page_shift;
+
+ ctrl->ctrl_config = NVME_CC_CSS_NVM;
+ ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
+ ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
+ ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
+ ctrl->ctrl_config |= NVME_CC_ENABLE;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+ return nvme_wait_ready(ctrl, cap, true);
+}
+
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
+{
+ unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
+ u32 csts;
+ int ret;
+
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+
+ while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+ if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
+ break;
+
+ msleep(100);
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ if (time_after(jiffies, timeout)) {
+ dev_err(ctrl->dev,
+ "Device shutdown incomplete; abort shutdown\n");
+ return -ENODEV;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Initialize the cached copies of the Identify data and various controller
+ * register in our nvme_ctrl structure. This should be called as soon as
+ * the admin queue is fully up and running.
+ */
+int nvme_init_identify(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ u64 cap;
+ int ret, page_shift;
+
+ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
+ if (ret) {
+ dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+ return ret;
+ }
+
+ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
+ if (ret) {
+ dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
+ return ret;
+ }
+ page_shift = NVME_CAP_MPSMIN(cap) + 12;
+
+ if (ctrl->vs >= NVME_VS(1, 1))
+ ctrl->subsystem = NVME_CAP_NSSRC(cap);
+
+ ret = nvme_identify_ctrl(ctrl, &id);
+ if (ret) {
+ dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
+ return -EIO;
+ }
+
+ ctrl->oncs = le16_to_cpup(&id->oncs);
+ atomic_set(&ctrl->abort_limit, id->acl + 1);
+ ctrl->vwc = id->vwc;
+ memcpy(ctrl->serial, id->sn, sizeof(id->sn));
+ memcpy(ctrl->model, id->mn, sizeof(id->mn));
+ memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
+ if (id->mdts)
+ ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
+ else
+ ctrl->max_hw_sectors = UINT_MAX;
+
+ if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
+ unsigned int max_hw_sectors;
+
+ ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
+ max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
+ if (ctrl->max_hw_sectors) {
+ ctrl->max_hw_sectors = min(max_hw_sectors,
+ ctrl->max_hw_sectors);
+ } else {
+ ctrl->max_hw_sectors = max_hw_sectors;
+ }
+ }
+
+ kfree(id);
+ return 0;
+}
+
+static int nvme_dev_open(struct inode *inode, struct file *file)
+{
+ struct nvme_ctrl *ctrl;
+ int instance = iminor(inode);
+ int ret = -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
+ if (ctrl->instance != instance)
+ continue;
+
+ if (!ctrl->admin_q) {
+ ret = -EWOULDBLOCK;
+ break;
+ }
+ if (!kref_get_unless_zero(&ctrl->kref))
+ break;
+ file->private_data = ctrl;
+ ret = 0;
+ break;
+ }
+ spin_unlock(&dev_list_lock);
+
+ return ret;
+}
+
+static int nvme_dev_release(struct inode *inode, struct file *file)
+{
+ nvme_put_ctrl(file->private_data);
+ return 0;
+}
+
+static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
+{
+ struct nvme_ns *ns;
+ int ret;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ if (list_empty(&ctrl->namespaces)) {
+ ret = -ENOTTY;
+ goto out_unlock;
+ }
+
+ ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
+ if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
+ dev_warn(ctrl->dev,
+ "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ dev_warn(ctrl->dev,
+ "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
+ kref_get(&ns->kref);
+ mutex_unlock(&ctrl->namespaces_mutex);
+
+ ret = nvme_user_cmd(ctrl, ns, argp);
+ nvme_put_ns(ns);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&ctrl->namespaces_mutex);
+ return ret;
+}
+
+static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct nvme_ctrl *ctrl = file->private_data;
+ void __user *argp = (void __user *)arg;
+
+ switch (cmd) {
+ case NVME_IOCTL_ADMIN_CMD:
+ return nvme_user_cmd(ctrl, NULL, argp);
+ case NVME_IOCTL_IO_CMD:
+ return nvme_dev_user_cmd(ctrl, argp);
+ case NVME_IOCTL_RESET:
+ dev_warn(ctrl->dev, "resetting controller\n");
+ return ctrl->ops->reset_ctrl(ctrl);
+ case NVME_IOCTL_SUBSYS_RESET:
+ return nvme_reset_subsystem(ctrl);
+ default:
+ return -ENOTTY;
+ }
+}
+
+static const struct file_operations nvme_dev_fops = {
+ .owner = THIS_MODULE,
+ .open = nvme_dev_open,
+ .release = nvme_dev_release,
+ .unlocked_ioctl = nvme_dev_ioctl,
+ .compat_ioctl = nvme_dev_ioctl,
+};
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ int ret;
+
+ ret = ctrl->ops->reset_ctrl(ctrl);
+ if (ret < 0)
+ return ret;
+ return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+ return sprintf(buf, "%pU\n", ns->uuid);
+}
+static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
+
+static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+ return sprintf(buf, "%8phd\n", ns->eui);
+}
+static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
+
+static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+ return sprintf(buf, "%d\n", ns->ns_id);
+}
+static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
+
+static struct attribute *nvme_ns_attrs[] = {
+ &dev_attr_uuid.attr,
+ &dev_attr_eui.attr,
+ &dev_attr_nsid.attr,
+ NULL,
+};
+
+static umode_t nvme_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvme_ns *ns = dev_to_disk(dev)->private_data;
+
+ if (a == &dev_attr_uuid.attr) {
+ if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
+ return 0;
+ }
+ if (a == &dev_attr_eui.attr) {
+ if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
+ return 0;
+ }
+ return a->mode;
+}
+
+static const struct attribute_group nvme_ns_attr_group = {
+ .attrs = nvme_ns_attrs,
+ .is_visible = nvme_attrs_are_visible,
+};
+
+#define nvme_show_function(field) \
+static ssize_t field##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
+ return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \
+} \
+static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
+
+nvme_show_function(model);
+nvme_show_function(serial);
+nvme_show_function(firmware_rev);
+
+static struct attribute *nvme_dev_attrs[] = {
+ &dev_attr_reset_controller.attr,
+ &dev_attr_model.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_firmware_rev.attr,
+ NULL
+};
+
+static struct attribute_group nvme_dev_attrs_group = {
+ .attrs = nvme_dev_attrs,
+};
+
+static const struct attribute_group *nvme_dev_attr_groups[] = {
+ &nvme_dev_attrs_group,
+ NULL,
+};
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+ struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+ return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ if (ns->ns_id == nsid)
+ return ns;
+ if (ns->ns_id > nsid)
+ break;
+ }
+ return NULL;
+}
+
+static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+ struct gendisk *disk;
+ int node = dev_to_node(ctrl->dev);
+
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
+ ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
+ if (!ns)
+ return;
+
+ ns->queue = blk_mq_init_queue(ctrl->tagset);
+ if (IS_ERR(ns->queue))
+ goto out_free_ns;
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
+ ns->queue->queuedata = ns;
+ ns->ctrl = ctrl;
+
+ disk = alloc_disk_node(0, node);
+ if (!disk)
+ goto out_free_queue;
+
+ kref_init(&ns->kref);
+ ns->ns_id = nsid;
+ ns->disk = disk;
+ ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
+
+ blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
+ if (ctrl->max_hw_sectors) {
+ blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
+ blk_queue_max_segments(ns->queue,
+ (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
+ }
+ if (ctrl->stripe_size)
+ blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
+ if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+ blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
+ blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
+
+ disk->major = nvme_major;
+ disk->first_minor = 0;
+ disk->fops = &nvme_fops;
+ disk->private_data = ns;
+ disk->queue = ns->queue;
+ disk->driverfs_dev = ctrl->device;
+ disk->flags = GENHD_FL_EXT_DEVT;
+ sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
+
+ if (nvme_revalidate_disk(ns->disk))
+ goto out_free_disk;
+
+ list_add_tail(&ns->list, &ctrl->namespaces);
+ kref_get(&ctrl->kref);
+ if (ns->type == NVME_NS_LIGHTNVM)
+ return;
+
+ add_disk(ns->disk);
+ if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
+ &nvme_ns_attr_group))
+ pr_warn("%s: failed to create sysfs group for identification\n",
+ ns->disk->disk_name);
+ return;
+ out_free_disk:
+ kfree(disk);
+ out_free_queue:
+ blk_cleanup_queue(ns->queue);
+ out_free_ns:
+ kfree(ns);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+ bool kill = nvme_io_incapable(ns->ctrl) &&
+ !blk_queue_dying(ns->queue);
+
+ lockdep_assert_held(&ns->ctrl->namespaces_mutex);
+
+ if (kill) {
+ blk_set_queue_dying(ns->queue);
+
+ /*
+ * The controller was shutdown first if we got here through
+ * device removal. The shutdown may requeue outstanding
+ * requests. These need to be aborted immediately so
+ * del_gendisk doesn't block indefinitely for their completion.
+ */
+ blk_mq_abort_requeue_list(ns->queue);
+ }
+ if (ns->disk->flags & GENHD_FL_UP) {
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
+ sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
+ &nvme_ns_attr_group);
+ del_gendisk(ns->disk);
+ }
+ if (kill || !blk_queue_dying(ns->queue)) {
+ blk_mq_abort_requeue_list(ns->queue);
+ blk_cleanup_queue(ns->queue);
+ }
+ list_del_init(&ns->list);
+ nvme_put_ns(ns);
+}
+
+static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+
+ ns = nvme_find_ns(ctrl, nsid);
+ if (ns) {
+ if (revalidate_disk(ns->disk))
+ nvme_ns_remove(ns);
+ } else
+ nvme_alloc_ns(ctrl, nsid);
+}
+
+static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
+{
+ struct nvme_ns *ns;
+ __le32 *ns_list;
+ unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
+ int ret = 0;
+
+ ns_list = kzalloc(0x1000, GFP_KERNEL);
+ if (!ns_list)
+ return -ENOMEM;
+
+ for (i = 0; i < num_lists; i++) {
+ ret = nvme_identify_ns_list(ctrl, prev, ns_list);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < min(nn, 1024U); j++) {
+ nsid = le32_to_cpu(ns_list[j]);
+ if (!nsid)
+ goto out;
+
+ nvme_validate_ns(ctrl, nsid);
+
+ while (++prev < nsid) {
+ ns = nvme_find_ns(ctrl, prev);
+ if (ns)
+ nvme_ns_remove(ns);
+ }
+ }
+ nn -= j;
+ }
+ out:
+ kfree(ns_list);
+ return ret;
+}
+
+static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
+{
+ struct nvme_ns *ns, *next;
+ unsigned i;
+
+ lockdep_assert_held(&ctrl->namespaces_mutex);
+
+ for (i = 1; i <= nn; i++)
+ nvme_validate_ns(ctrl, i);
+
+ list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
+ if (ns->ns_id > nn)
+ nvme_ns_remove(ns);
+ }
+}
+
+void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ unsigned nn;
+
+ if (nvme_identify_ctrl(ctrl, &id))
+ return;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ nn = le32_to_cpu(id->nn);
+ if (ctrl->vs >= NVME_VS(1, 1) &&
+ !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
+ if (!nvme_scan_ns_list(ctrl, nn))
+ goto done;
+ }
+ __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
+ done:
+ list_sort(NULL, &ctrl->namespaces, ns_cmp);
+ mutex_unlock(&ctrl->namespaces_mutex);
+ kfree(id);
+}
+
+void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns, *next;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
+ nvme_ns_remove(ns);
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+static DEFINE_IDA(nvme_instance_ida);
+
+static int nvme_set_instance(struct nvme_ctrl *ctrl)
+{
+ int instance, error;
+
+ do {
+ if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
+ return -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ error = ida_get_new(&nvme_instance_ida, &instance);
+ spin_unlock(&dev_list_lock);
+ } while (error == -EAGAIN);
+
+ if (error)
+ return -ENODEV;
+
+ ctrl->instance = instance;
+ return 0;
+}
+
+static void nvme_release_instance(struct nvme_ctrl *ctrl)
+{
+ spin_lock(&dev_list_lock);
+ ida_remove(&nvme_instance_ida, ctrl->instance);
+ spin_unlock(&dev_list_lock);
+}
+
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
+ {
+ device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
+
+ spin_lock(&dev_list_lock);
+ list_del(&ctrl->node);
+ spin_unlock(&dev_list_lock);
+}
+
+static void nvme_free_ctrl(struct kref *kref)
+{
+ struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+
+ put_device(ctrl->device);
+ nvme_release_instance(ctrl);
+
+ ctrl->ops->free_ctrl(ctrl);
+}
+
+void nvme_put_ctrl(struct nvme_ctrl *ctrl)
+{
+ kref_put(&ctrl->kref, nvme_free_ctrl);
+}
+
+/*
+ * Initialize a NVMe controller structures. This needs to be called during
+ * earliest initialization so that we have the initialized structured around
+ * during probing.
+ */
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
+ const struct nvme_ctrl_ops *ops, unsigned long quirks)
+{
+ int ret;
+
+ INIT_LIST_HEAD(&ctrl->namespaces);
+ mutex_init(&ctrl->namespaces_mutex);
+ kref_init(&ctrl->kref);
+ ctrl->dev = dev;
+ ctrl->ops = ops;
+ ctrl->quirks = quirks;
+
+ ret = nvme_set_instance(ctrl);
+ if (ret)
+ goto out;
+
+ ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
+ MKDEV(nvme_char_major, ctrl->instance),
+ dev, nvme_dev_attr_groups,
+ "nvme%d", ctrl->instance);
+ if (IS_ERR(ctrl->device)) {
+ ret = PTR_ERR(ctrl->device);
+ goto out_release_instance;
+ }
+ get_device(ctrl->device);
+ dev_set_drvdata(ctrl->device, ctrl);
+
+ spin_lock(&dev_list_lock);
+ list_add_tail(&ctrl->node, &nvme_ctrl_list);
+ spin_unlock(&dev_list_lock);
+
+ return 0;
+out_release_instance:
+ nvme_release_instance(ctrl);
+out:
+ return ret;
+}
+
+void nvme_stop_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ spin_lock_irq(ns->queue->queue_lock);
+ queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
+ spin_unlock_irq(ns->queue->queue_lock);
+
+ blk_mq_cancel_requeue_work(ns->queue);
+ blk_mq_stop_hw_queues(ns->queue);
+ }
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+void nvme_start_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
+ blk_mq_start_stopped_hw_queues(ns->queue, true);
+ blk_mq_kick_requeue_list(ns->queue);
+ }
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+
+int __init nvme_core_init(void)
+{
+ int result;
+
+ result = register_blkdev(nvme_major, "nvme");
+ if (result < 0)
+ return result;
+ else if (result > 0)
+ nvme_major = result;
+
+ result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
+ &nvme_dev_fops);
+ if (result < 0)
+ goto unregister_blkdev;
+ else if (result > 0)
+ nvme_char_major = result;
+
+ nvme_class = class_create(THIS_MODULE, "nvme");
+ if (IS_ERR(nvme_class)) {
+ result = PTR_ERR(nvme_class);
+ goto unregister_chrdev;
+ }
+
+ return 0;
+
+ unregister_chrdev:
+ __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ unregister_blkdev:
+ unregister_blkdev(nvme_major, "nvme");
+ return result;
+}
+
+void nvme_core_exit(void)
+{
+ unregister_blkdev(nvme_major, "nvme");
+ class_destroy(nvme_class);
+ __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+}
};
};
+struct nvme_nvm_lp_mlc {
+ __u16 num_pairs;
+ __u8 pairs[886];
+};
+
+struct nvme_nvm_lp_tbl {
+ __u8 id[8];
+ struct nvme_nvm_lp_mlc mlc;
+};
+
struct nvme_nvm_id_group {
__u8 mtype;
__u8 fmtype;
__le32 mpos;
__le32 mccap;
__le16 cpar;
- __u8 reserved[906];
+ __u8 reserved[10];
+ struct nvme_nvm_lp_tbl lptbl;
} __packed;
struct nvme_nvm_addr_format {
dst->mccap = le32_to_cpu(src->mccap);
dst->cpar = le16_to_cpu(src->cpar);
+
+ if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
+ memcpy(dst->lptbl.id, src->lptbl.id, 8);
+ dst->lptbl.mlc.num_pairs =
+ le16_to_cpu(src->lptbl.mlc.num_pairs);
+ /* 4 bits per pair */
+ memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
+ dst->lptbl.mlc.num_pairs >> 1);
+ }
}
return 0;
static int nvme_nvm_identity(struct nvm_dev *nvmdev, struct nvm_id *nvm_id)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_dev *dev = ns->dev;
struct nvme_nvm_id *nvme_nvm_id;
struct nvme_nvm_command c = {};
int ret;
if (!nvme_nvm_id)
return -ENOMEM;
- ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
nvme_nvm_id, sizeof(struct nvme_nvm_id));
if (ret) {
ret = -EIO;
nvm_l2p_update_fn *update_l2p, void *priv)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
- u32 len = queue_max_hw_sectors(dev->admin_q) << 9;
+ u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9;
u32 nlb_pr_rq = len / sizeof(u64);
u64 cmd_slba = slba;
void *entries;
c.l2p.slba = cpu_to_le64(cmd_slba);
c.l2p.nlb = cpu_to_le32(cmd_nlb);
- ret = nvme_submit_sync_cmd(dev->admin_q,
+ ret = nvme_submit_sync_cmd(ns->ctrl->admin_q,
(struct nvme_command *)&c, entries, len);
if (ret) {
- dev_err(dev->dev, "L2P table transfer failed (%d)\n",
+ dev_err(ns->ctrl->dev, "L2P table transfer failed (%d)\n",
ret);
ret = -EIO;
goto out;
{
struct request_queue *q = nvmdev->q;
struct nvme_ns *ns = q->queuedata;
- struct nvme_dev *dev = ns->dev;
+ struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_nvm_command c = {};
struct nvme_nvm_bb_tbl *bb_tbl;
int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blocks;
if (!bb_tbl)
return -ENOMEM;
- ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ ret = nvme_submit_sync_cmd(ctrl->admin_q, (struct nvme_command *)&c,
bb_tbl, tblsz);
if (ret) {
- dev_err(dev->dev, "get bad block table failed (%d)\n", ret);
+ dev_err(ctrl->dev, "get bad block table failed (%d)\n", ret);
ret = -EIO;
goto out;
}
if (bb_tbl->tblid[0] != 'B' || bb_tbl->tblid[1] != 'B' ||
bb_tbl->tblid[2] != 'L' || bb_tbl->tblid[3] != 'T') {
- dev_err(dev->dev, "bbt format mismatch\n");
+ dev_err(ctrl->dev, "bbt format mismatch\n");
ret = -EINVAL;
goto out;
}
if (le16_to_cpu(bb_tbl->verid) != 1) {
ret = -EINVAL;
- dev_err(dev->dev, "bbt version not supported\n");
+ dev_err(ctrl->dev, "bbt version not supported\n");
goto out;
}
if (le32_to_cpu(bb_tbl->tblks) != nr_blocks) {
ret = -EINVAL;
- dev_err(dev->dev, "bbt unsuspected blocks returned (%u!=%u)",
+ dev_err(ctrl->dev, "bbt unsuspected blocks returned (%u!=%u)",
le32_to_cpu(bb_tbl->tblks), nr_blocks);
goto out;
}
ppa = dev_to_generic_addr(nvmdev, ppa);
ret = update_bbtbl(ppa, nr_blocks, bb_tbl->blk, priv);
- if (ret) {
- ret = -EINTR;
- goto out;
- }
-
out:
kfree(bb_tbl);
return ret;
int type)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_dev *dev = ns->dev;
struct nvme_nvm_command c = {};
int ret = 0;
c.set_bb.nlb = cpu_to_le16(rqd->nr_pages - 1);
c.set_bb.value = type;
- ret = nvme_submit_sync_cmd(dev->admin_q, (struct nvme_command *)&c,
+ ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, (struct nvme_command *)&c,
NULL, 0);
if (ret)
- dev_err(dev->dev, "set bad block table failed (%d)\n", ret);
+ dev_err(ns->ctrl->dev, "set bad block table failed (%d)\n", ret);
return ret;
}
static void nvme_nvm_end_io(struct request *rq, int error)
{
struct nvm_rq *rqd = rq->end_io_data;
- struct nvm_dev *dev = rqd->dev;
- if (dev->mt && dev->mt->end_io(rqd, error))
- pr_err("nvme: err status: %x result: %lx\n",
- rq->errors, (unsigned long)rq->special);
+ nvm_end_io(rqd, error);
kfree(rq->cmd);
blk_mq_free_request(rq);
static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_dev *dev = ns->dev;
- return dma_pool_create(name, dev->dev, PAGE_SIZE, PAGE_SIZE, 0);
+ return dma_pool_create(name, ns->ctrl->dev, PAGE_SIZE, PAGE_SIZE, 0);
}
static void nvme_nvm_destroy_dma_pool(void *pool)
int nvme_nvm_ns_supported(struct nvme_ns *ns, struct nvme_id_ns *id)
{
- struct nvme_dev *dev = ns->dev;
- struct pci_dev *pdev = to_pci_dev(dev->dev);
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ /* XXX: this is poking into PCI structures from generic code! */
+ struct pci_dev *pdev = to_pci_dev(ctrl->dev);
/* QEMU NVMe simulator - PCI ID + Vendor specific bit */
if (pdev->vendor == PCI_VENDOR_ID_CNEX &&
#include <linux/kref.h>
#include <linux/blk-mq.h>
+enum {
+ /*
+ * Driver internal status code for commands that were cancelled due
+ * to timeouts or controller shutdown. The value is negative so
+ * that it a) doesn't overlap with the unsigned hardware error codes,
+ * and b) can easily be tested for.
+ */
+ NVME_SC_CANCELLED = -EINTR,
+};
+
extern unsigned char nvme_io_timeout;
#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ)
+extern unsigned char admin_timeout;
+#define ADMIN_TIMEOUT (admin_timeout * HZ)
+
+extern unsigned char shutdown_timeout;
+#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
+
enum {
NVME_NS_LBA = 0,
NVME_NS_LIGHTNVM = 1,
};
/*
- * Represents an NVM Express device. Each nvme_dev is a PCI function.
+ * List of workarounds for devices that required behavior not specified in
+ * the standard.
*/
-struct nvme_dev {
- struct list_head node;
- struct nvme_queue **queues;
+enum nvme_quirks {
+ /*
+ * Prefers I/O aligned to a stripe size specified in a vendor
+ * specific Identify field.
+ */
+ NVME_QUIRK_STRIPE_SIZE = (1 << 0),
+
+ /*
+ * The controller doesn't handle Identify value others than 0 or 1
+ * correctly.
+ */
+ NVME_QUIRK_IDENTIFY_CNS = (1 << 1),
+};
+
+struct nvme_ctrl {
+ const struct nvme_ctrl_ops *ops;
struct request_queue *admin_q;
- struct blk_mq_tag_set tagset;
- struct blk_mq_tag_set admin_tagset;
- u32 __iomem *dbs;
struct device *dev;
- struct dma_pool *prp_page_pool;
- struct dma_pool *prp_small_pool;
+ struct kref kref;
int instance;
- unsigned queue_count;
- unsigned online_queues;
- unsigned max_qid;
- int q_depth;
- u32 db_stride;
- u32 ctrl_config;
- struct msix_entry *entry;
- struct nvme_bar __iomem *bar;
+ struct blk_mq_tag_set *tagset;
struct list_head namespaces;
- struct kref kref;
- struct device *device;
- struct work_struct reset_work;
- struct work_struct probe_work;
- struct work_struct scan_work;
+ struct mutex namespaces_mutex;
+ struct device *device; /* char device */
+ struct list_head node;
+
char name[12];
char serial[20];
char model[40];
char firmware_rev[8];
- bool subsystem;
+
+ u32 ctrl_config;
+
+ u32 page_size;
u32 max_hw_sectors;
u32 stripe_size;
- u32 page_size;
- void __iomem *cmb;
- dma_addr_t cmb_dma_addr;
- u64 cmb_size;
- u32 cmbsz;
u16 oncs;
- u16 abort_limit;
+ atomic_t abort_limit;
u8 event_limit;
u8 vwc;
+ u32 vs;
+ bool subsystem;
+ unsigned long quirks;
};
/*
struct nvme_ns {
struct list_head list;
- struct nvme_dev *dev;
+ struct nvme_ctrl *ctrl;
struct request_queue *queue;
struct gendisk *disk;
struct kref kref;
+ u8 eui[8];
+ u8 uuid[16];
+
unsigned ns_id;
int lba_shift;
u16 ms;
u32 mode_select_block_len;
};
-/*
- * The nvme_iod describes the data in an I/O, including the list of PRP
- * entries. You can't see it in this data structure because C doesn't let
- * me express that. Use nvme_alloc_iod to ensure there's enough space
- * allocated to store the PRP list.
- */
-struct nvme_iod {
- unsigned long private; /* For the use of the submitter of the I/O */
- int npages; /* In the PRP list. 0 means small pool in use */
- int offset; /* Of PRP list */
- int nents; /* Used in scatterlist */
- int length; /* Of data, in bytes */
- dma_addr_t first_dma;
- struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */
- struct scatterlist sg[0];
+struct nvme_ctrl_ops {
+ int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
+ int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
+ int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
+ bool (*io_incapable)(struct nvme_ctrl *ctrl);
+ int (*reset_ctrl)(struct nvme_ctrl *ctrl);
+ void (*free_ctrl)(struct nvme_ctrl *ctrl);
};
+static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
+{
+ u32 val = 0;
+
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+ return false;
+ return val & NVME_CSTS_RDY;
+}
+
+static inline bool nvme_io_incapable(struct nvme_ctrl *ctrl)
+{
+ u32 val = 0;
+
+ if (ctrl->ops->io_incapable(ctrl))
+ return false;
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+ return false;
+ return val & NVME_CSTS_CFS;
+}
+
+static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
+{
+ if (!ctrl->subsystem)
+ return -ENOTTY;
+ return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
+}
+
static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
{
return (sector >> (ns->lba_shift - 9));
}
+static inline void nvme_setup_flush(struct nvme_ns *ns,
+ struct nvme_command *cmnd)
+{
+ memset(cmnd, 0, sizeof(*cmnd));
+ cmnd->common.opcode = nvme_cmd_flush;
+ cmnd->common.nsid = cpu_to_le32(ns->ns_id);
+}
+
+static inline void nvme_setup_rw(struct nvme_ns *ns, struct request *req,
+ struct nvme_command *cmnd)
+{
+ u16 control = 0;
+ u32 dsmgmt = 0;
+
+ if (req->cmd_flags & REQ_FUA)
+ control |= NVME_RW_FUA;
+ if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+ control |= NVME_RW_LR;
+
+ if (req->cmd_flags & REQ_RAHEAD)
+ dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
+
+ memset(cmnd, 0, sizeof(*cmnd));
+ cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+ cmnd->rw.command_id = req->tag;
+ cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
+ cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+
+ if (ns->ms) {
+ switch (ns->pi_type) {
+ case NVME_NS_DPS_PI_TYPE3:
+ control |= NVME_RW_PRINFO_PRCHK_GUARD;
+ break;
+ case NVME_NS_DPS_PI_TYPE1:
+ case NVME_NS_DPS_PI_TYPE2:
+ control |= NVME_RW_PRINFO_PRCHK_GUARD |
+ NVME_RW_PRINFO_PRCHK_REF;
+ cmnd->rw.reftag = cpu_to_le32(
+ nvme_block_nr(ns, blk_rq_pos(req)));
+ break;
+ }
+ if (!blk_integrity_rq(req))
+ control |= NVME_RW_PRINFO_PRACT;
+ }
+
+ cmnd->rw.control = cpu_to_le16(control);
+ cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
+}
+
+
+static inline int nvme_error_status(u16 status)
+{
+ switch (status & 0x7ff) {
+ case NVME_SC_SUCCESS:
+ return 0;
+ case NVME_SC_CAP_EXCEEDED:
+ return -ENOSPC;
+ default:
+ return -EIO;
+ }
+}
+
+static inline bool nvme_req_needs_retry(struct request *req, u16 status)
+{
+ return !(status & NVME_SC_DNR || blk_noretry_request(req)) &&
+ (jiffies - req->start_time) < req->timeout;
+}
+
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
+ const struct nvme_ctrl_ops *ops, unsigned long quirks);
+void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
+void nvme_put_ctrl(struct nvme_ctrl *ctrl);
+int nvme_init_identify(struct nvme_ctrl *ctrl);
+
+void nvme_scan_namespaces(struct nvme_ctrl *ctrl);
+void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
+
+void nvme_stop_queues(struct nvme_ctrl *ctrl);
+void nvme_start_queues(struct nvme_ctrl *ctrl);
+
+struct request *nvme_alloc_request(struct request_queue *q,
+ struct nvme_command *cmd, unsigned int flags);
+void nvme_requeue_req(struct request *req);
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
- void *buffer, void __user *ubuffer, unsigned bufflen,
+ void *buffer, unsigned bufflen, u32 *result, unsigned timeout);
+int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen, u32 *result,
+ unsigned timeout);
+int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void __user *ubuffer, unsigned bufflen,
+ void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
u32 *result, unsigned timeout);
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id);
+int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
struct nvme_id_ns **id);
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
+int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log);
+int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
dma_addr_t dma_addr, u32 *result);
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
+int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
dma_addr_t dma_addr, u32 *result);
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
+
+extern spinlock_t dev_list_lock;
struct sg_io_hdr;
}
#endif /* CONFIG_NVM */
+int __init nvme_core_init(void);
+void nvme_core_exit(void);
+
#endif /* _NVME_H */
* more details.
*/
+#include <linux/aer.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/kernel.h>
-#include <linux/list_sort.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/poison.h>
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/t10-pi.h>
#include <linux/types.h>
-#include <linux/pr.h>
-#include <scsi/sg.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <asm/unaligned.h>
-#include <uapi/linux/nvme_ioctl.h>
#include "nvme.h"
-#define NVME_MINORS (1U << MINORBITS)
#define NVME_Q_DEPTH 1024
#define NVME_AQ_DEPTH 256
#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
-#define ADMIN_TIMEOUT (admin_timeout * HZ)
-#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ)
+
+/*
+ * We handle AEN commands ourselves and don't even let the
+ * block layer know about them.
+ */
+#define NVME_NR_AEN_COMMANDS 1
+#define NVME_AQ_BLKMQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
-static unsigned char admin_timeout = 60;
+unsigned char admin_timeout = 60;
module_param(admin_timeout, byte, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
-static unsigned char shutdown_timeout = 5;
+unsigned char shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
-static int nvme_major;
-module_param(nvme_major, int, 0);
-
-static int nvme_char_major;
-module_param(nvme_char_major, int, 0);
-
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
module_param(use_cmb_sqes, bool, 0644);
MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
-static DEFINE_SPINLOCK(dev_list_lock);
static LIST_HEAD(dev_list);
static struct task_struct *nvme_thread;
static struct workqueue_struct *nvme_workq;
static wait_queue_head_t nvme_kthread_wait;
-static struct class *nvme_class;
+struct nvme_dev;
+struct nvme_queue;
-static int __nvme_reset(struct nvme_dev *dev);
static int nvme_reset(struct nvme_dev *dev);
static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_dead_ctrl(struct nvme_dev *dev);
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
-struct async_cmd_info {
- struct kthread_work work;
- struct kthread_worker *worker;
- struct request *req;
- u32 result;
- int status;
- void *ctx;
+/*
+ * Represents an NVM Express device. Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+ struct list_head node;
+ struct nvme_queue **queues;
+ struct blk_mq_tag_set tagset;
+ struct blk_mq_tag_set admin_tagset;
+ u32 __iomem *dbs;
+ struct device *dev;
+ struct dma_pool *prp_page_pool;
+ struct dma_pool *prp_small_pool;
+ unsigned queue_count;
+ unsigned online_queues;
+ unsigned max_qid;
+ int q_depth;
+ u32 db_stride;
+ struct msix_entry *entry;
+ void __iomem *bar;
+ struct work_struct reset_work;
+ struct work_struct scan_work;
+ struct work_struct remove_work;
+ struct mutex shutdown_lock;
+ bool subsystem;
+ void __iomem *cmb;
+ dma_addr_t cmb_dma_addr;
+ u64 cmb_size;
+ u32 cmbsz;
+ unsigned long flags;
+
+#define NVME_CTRL_RESETTING 0
+
+ struct nvme_ctrl ctrl;
+ struct completion ioq_wait;
};
+static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
+{
+ return container_of(ctrl, struct nvme_dev, ctrl);
+}
+
/*
* An NVM Express queue. Each device has at least two (one for admin
* commands and one for I/O commands).
u16 qid;
u8 cq_phase;
u8 cqe_seen;
- struct async_cmd_info cmdinfo;
+};
+
+/*
+ * The nvme_iod describes the data in an I/O, including the list of PRP
+ * entries. You can't see it in this data structure because C doesn't let
+ * me express that. Use nvme_init_iod to ensure there's enough space
+ * allocated to store the PRP list.
+ */
+struct nvme_iod {
+ struct nvme_queue *nvmeq;
+ int aborted;
+ int npages; /* In the PRP list. 0 means small pool in use */
+ int nents; /* Used in scatterlist */
+ int length; /* Of data, in bytes */
+ dma_addr_t first_dma;
+ struct scatterlist meta_sg; /* metadata requires single contiguous buffer */
+ struct scatterlist *sg;
+ struct scatterlist inline_sg[0];
};
/*
BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
}
-typedef void (*nvme_completion_fn)(struct nvme_queue *, void *,
- struct nvme_completion *);
-
-struct nvme_cmd_info {
- nvme_completion_fn fn;
- void *ctx;
- int aborted;
- struct nvme_queue *nvmeq;
- struct nvme_iod iod[0];
-};
-
/*
* Max size of iod being embedded in the request payload
*/
#define NVME_INT_PAGES 2
-#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size)
-#define NVME_INT_MASK 0x01
+#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->ctrl.page_size)
/*
* Will slightly overestimate the number of pages needed. This is OK
*/
static int nvme_npages(unsigned size, struct nvme_dev *dev)
{
- unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size);
+ unsigned nprps = DIV_ROUND_UP(size + dev->ctrl.page_size,
+ dev->ctrl.page_size);
return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
}
-static unsigned int nvme_cmd_size(struct nvme_dev *dev)
+static unsigned int nvme_iod_alloc_size(struct nvme_dev *dev,
+ unsigned int size, unsigned int nseg)
{
- unsigned int ret = sizeof(struct nvme_cmd_info);
-
- ret += sizeof(struct nvme_iod);
- ret += sizeof(__le64 *) * nvme_npages(NVME_INT_BYTES(dev), dev);
- ret += sizeof(struct scatterlist) * NVME_INT_PAGES;
+ return sizeof(__le64 *) * nvme_npages(size, dev) +
+ sizeof(struct scatterlist) * nseg;
+}
- return ret;
+static unsigned int nvme_cmd_size(struct nvme_dev *dev)
+{
+ return sizeof(struct nvme_iod) +
+ nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
}
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int numa_node)
{
struct nvme_dev *dev = data;
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = dev->queues[0];
BUG_ON(!nvmeq);
- cmd->nvmeq = nvmeq;
+ iod->nvmeq = nvmeq;
return 0;
}
unsigned int numa_node)
{
struct nvme_dev *dev = data;
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
BUG_ON(!nvmeq);
- cmd->nvmeq = nvmeq;
+ iod->nvmeq = nvmeq;
return 0;
}
-static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx,
- nvme_completion_fn handler)
+static void nvme_complete_async_event(struct nvme_dev *dev,
+ struct nvme_completion *cqe)
{
- cmd->fn = handler;
- cmd->ctx = ctx;
- cmd->aborted = 0;
- blk_mq_start_request(blk_mq_rq_from_pdu(cmd));
-}
-
-static void *iod_get_private(struct nvme_iod *iod)
-{
- return (void *) (iod->private & ~0x1UL);
-}
-
-/*
- * If bit 0 is set, the iod is embedded in the request payload.
- */
-static bool iod_should_kfree(struct nvme_iod *iod)
-{
- return (iod->private & NVME_INT_MASK) == 0;
-}
-
-/* Special values must be less than 0x1000 */
-#define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA)
-#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE)
-#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE)
-#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE)
-
-static void special_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- if (ctx == CMD_CTX_CANCELLED)
- return;
- if (ctx == CMD_CTX_COMPLETED) {
- dev_warn(nvmeq->q_dmadev,
- "completed id %d twice on queue %d\n",
- cqe->command_id, le16_to_cpup(&cqe->sq_id));
- return;
- }
- if (ctx == CMD_CTX_INVALID) {
- dev_warn(nvmeq->q_dmadev,
- "invalid id %d completed on queue %d\n",
- cqe->command_id, le16_to_cpup(&cqe->sq_id));
- return;
- }
- dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
-}
-
-static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
-{
- void *ctx;
-
- if (fn)
- *fn = cmd->fn;
- ctx = cmd->ctx;
- cmd->fn = special_completion;
- cmd->ctx = CMD_CTX_CANCELLED;
- return ctx;
-}
-
-static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- u32 result = le32_to_cpup(&cqe->result);
- u16 status = le16_to_cpup(&cqe->status) >> 1;
+ u16 status = le16_to_cpu(cqe->status) >> 1;
+ u32 result = le32_to_cpu(cqe->result);
if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
- ++nvmeq->dev->event_limit;
+ ++dev->ctrl.event_limit;
if (status != NVME_SC_SUCCESS)
return;
switch (result & 0xff07) {
case NVME_AER_NOTICE_NS_CHANGED:
- dev_info(nvmeq->q_dmadev, "rescanning\n");
- schedule_work(&nvmeq->dev->scan_work);
+ dev_info(dev->dev, "rescanning\n");
+ queue_work(nvme_workq, &dev->scan_work);
default:
- dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
- }
-}
-
-static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- struct request *req = ctx;
-
- u16 status = le16_to_cpup(&cqe->status) >> 1;
- u32 result = le32_to_cpup(&cqe->result);
-
- blk_mq_free_request(req);
-
- dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
- ++nvmeq->dev->abort_limit;
-}
-
-static void async_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- struct async_cmd_info *cmdinfo = ctx;
- cmdinfo->result = le32_to_cpup(&cqe->result);
- cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
- queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
- blk_mq_free_request(cmdinfo->req);
-}
-
-static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq,
- unsigned int tag)
-{
- struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag);
-
- return blk_mq_rq_to_pdu(req);
-}
-
-/*
- * Called with local interrupts disabled and the q_lock held. May not sleep.
- */
-static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag,
- nvme_completion_fn *fn)
-{
- struct nvme_cmd_info *cmd = get_cmd_from_tag(nvmeq, tag);
- void *ctx;
- if (tag >= nvmeq->q_depth) {
- *fn = special_completion;
- return CMD_CTX_INVALID;
+ dev_warn(dev->dev, "async event result %08x\n", result);
}
- if (fn)
- *fn = cmd->fn;
- ctx = cmd->ctx;
- cmd->fn = special_completion;
- cmd->ctx = CMD_CTX_COMPLETED;
- return ctx;
}
/**
- * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
+ * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
* @nvmeq: The queue to use
* @cmd: The command to send
*
nvmeq->sq_tail = tail;
}
-static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
-{
- unsigned long flags;
- spin_lock_irqsave(&nvmeq->q_lock, flags);
- __nvme_submit_cmd(nvmeq, cmd);
- spin_unlock_irqrestore(&nvmeq->q_lock, flags);
-}
-
-static __le64 **iod_list(struct nvme_iod *iod)
-{
- return ((void *)iod) + iod->offset;
-}
-
-static inline void iod_init(struct nvme_iod *iod, unsigned nbytes,
- unsigned nseg, unsigned long private)
-{
- iod->private = private;
- iod->offset = offsetof(struct nvme_iod, sg[nseg]);
- iod->npages = -1;
- iod->length = nbytes;
- iod->nents = 0;
-}
-
-static struct nvme_iod *
-__nvme_alloc_iod(unsigned nseg, unsigned bytes, struct nvme_dev *dev,
- unsigned long priv, gfp_t gfp)
+static __le64 **iod_list(struct request *req)
{
- struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +
- sizeof(__le64 *) * nvme_npages(bytes, dev) +
- sizeof(struct scatterlist) * nseg, gfp);
-
- if (iod)
- iod_init(iod, bytes, nseg, priv);
-
- return iod;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ return (__le64 **)(iod->sg + req->nr_phys_segments);
}
-static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
- gfp_t gfp)
+static int nvme_init_iod(struct request *rq, struct nvme_dev *dev)
{
- unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) :
- sizeof(struct nvme_dsm_range);
- struct nvme_iod *iod;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
+ int nseg = rq->nr_phys_segments;
+ unsigned size;
- if (rq->nr_phys_segments <= NVME_INT_PAGES &&
- size <= NVME_INT_BYTES(dev)) {
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq);
+ if (rq->cmd_flags & REQ_DISCARD)
+ size = sizeof(struct nvme_dsm_range);
+ else
+ size = blk_rq_bytes(rq);
- iod = cmd->iod;
- iod_init(iod, size, rq->nr_phys_segments,
- (unsigned long) rq | NVME_INT_MASK);
- return iod;
+ if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
+ iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
+ if (!iod->sg)
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ } else {
+ iod->sg = iod->inline_sg;
}
- return __nvme_alloc_iod(rq->nr_phys_segments, size, dev,
- (unsigned long) rq, gfp);
+ iod->aborted = 0;
+ iod->npages = -1;
+ iod->nents = 0;
+ iod->length = size;
+ return 0;
}
-static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
+static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
{
- const int last_prp = dev->page_size / 8 - 1;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ const int last_prp = dev->ctrl.page_size / 8 - 1;
int i;
- __le64 **list = iod_list(iod);
+ __le64 **list = iod_list(req);
dma_addr_t prp_dma = iod->first_dma;
if (iod->npages == 0)
prp_dma = next_prp_dma;
}
- if (iod_should_kfree(iod))
- kfree(iod);
-}
-
-static int nvme_error_status(u16 status)
-{
- switch (status & 0x7ff) {
- case NVME_SC_SUCCESS:
- return 0;
- case NVME_SC_CAP_EXCEEDED:
- return -ENOSPC;
- default:
- return -EIO;
- }
+ if (iod->sg != iod->inline_sg)
+ kfree(iod->sg);
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
}
kunmap_atomic(pmap);
}
-
-static void nvme_init_integrity(struct nvme_ns *ns)
-{
- struct blk_integrity integrity;
-
- switch (ns->pi_type) {
- case NVME_NS_DPS_PI_TYPE3:
- integrity.profile = &t10_pi_type3_crc;
- break;
- case NVME_NS_DPS_PI_TYPE1:
- case NVME_NS_DPS_PI_TYPE2:
- integrity.profile = &t10_pi_type1_crc;
- break;
- default:
- integrity.profile = NULL;
- break;
- }
- integrity.tuple_size = ns->ms;
- blk_integrity_register(ns->disk, &integrity);
- blk_queue_max_integrity_segments(ns->queue, 1);
-}
#else /* CONFIG_BLK_DEV_INTEGRITY */
static void nvme_dif_remap(struct request *req,
void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
{
}
-static void nvme_init_integrity(struct nvme_ns *ns)
-{
-}
#endif
-static void req_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- struct nvme_iod *iod = ctx;
- struct request *req = iod_get_private(iod);
- struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
- u16 status = le16_to_cpup(&cqe->status) >> 1;
- bool requeue = false;
- int error = 0;
-
- if (unlikely(status)) {
- if (!(status & NVME_SC_DNR || blk_noretry_request(req))
- && (jiffies - req->start_time) < req->timeout) {
- unsigned long flags;
-
- requeue = true;
- blk_mq_requeue_request(req);
- spin_lock_irqsave(req->q->queue_lock, flags);
- if (!blk_queue_stopped(req->q))
- blk_mq_kick_requeue_list(req->q);
- spin_unlock_irqrestore(req->q->queue_lock, flags);
- goto release_iod;
- }
-
- if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
- if (cmd_rq->ctx == CMD_CTX_CANCELLED)
- error = -EINTR;
- else
- error = status;
- } else {
- error = nvme_error_status(status);
- }
- }
-
- if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
- u32 result = le32_to_cpup(&cqe->result);
- req->special = (void *)(uintptr_t)result;
- }
-
- if (cmd_rq->aborted)
- dev_warn(nvmeq->dev->dev,
- "completing aborted command with status:%04x\n",
- error);
-
-release_iod:
- if (iod->nents) {
- dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
- rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (blk_integrity_rq(req)) {
- if (!rq_data_dir(req))
- nvme_dif_remap(req, nvme_dif_complete);
- dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
- rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- }
- }
- nvme_free_iod(nvmeq->dev, iod);
-
- if (likely(!requeue))
- blk_mq_complete_request(req, error);
-}
-
-/* length is in bytes. gfp flags indicates whether we may sleep. */
-static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
- int total_len, gfp_t gfp)
+static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req,
+ int total_len)
{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct dma_pool *pool;
int length = total_len;
struct scatterlist *sg = iod->sg;
int dma_len = sg_dma_len(sg);
u64 dma_addr = sg_dma_address(sg);
- u32 page_size = dev->page_size;
+ u32 page_size = dev->ctrl.page_size;
int offset = dma_addr & (page_size - 1);
__le64 *prp_list;
- __le64 **list = iod_list(iod);
+ __le64 **list = iod_list(req);
dma_addr_t prp_dma;
int nprps, i;
length -= (page_size - offset);
if (length <= 0)
- return total_len;
+ return true;
dma_len -= (page_size - offset);
if (dma_len) {
if (length <= page_size) {
iod->first_dma = dma_addr;
- return total_len;
+ return true;
}
nprps = DIV_ROUND_UP(length, page_size);
iod->npages = 1;
}
- prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
+ prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list) {
iod->first_dma = dma_addr;
iod->npages = -1;
- return (total_len - length) + page_size;
+ return false;
}
list[0] = prp_list;
iod->first_dma = prp_dma;
for (;;) {
if (i == page_size >> 3) {
__le64 *old_prp_list = prp_list;
- prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
+ prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list)
- return total_len - length;
+ return false;
list[iod->npages++] = prp_list;
prp_list[0] = old_prp_list[i - 1];
old_prp_list[i - 1] = cpu_to_le64(prp_dma);
dma_len = sg_dma_len(sg);
}
- return total_len;
+ return true;
}
-static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
- struct nvme_iod *iod)
+static int nvme_map_data(struct nvme_dev *dev, struct request *req,
+ struct nvme_command *cmnd)
{
- struct nvme_command cmnd;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct request_queue *q = req->q;
+ enum dma_data_direction dma_dir = rq_data_dir(req) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ int ret = BLK_MQ_RQ_QUEUE_ERROR;
- memcpy(&cmnd, req->cmd, sizeof(cmnd));
- cmnd.rw.command_id = req->tag;
- if (req->nr_phys_segments) {
- cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
- }
+ sg_init_table(iod->sg, req->nr_phys_segments);
+ iod->nents = blk_rq_map_sg(q, req, iod->sg);
+ if (!iod->nents)
+ goto out;
- __nvme_submit_cmd(nvmeq, &cmnd);
-}
+ ret = BLK_MQ_RQ_QUEUE_BUSY;
+ if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir))
+ goto out;
-/*
- * We reuse the small pool to allocate the 16-byte range here as it is not
- * worth having a special pool for these or additional cases to handle freeing
- * the iod.
- */
-static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
- struct request *req, struct nvme_iod *iod)
-{
- struct nvme_dsm_range *range =
- (struct nvme_dsm_range *)iod_list(iod)[0];
- struct nvme_command cmnd;
+ if (!nvme_setup_prps(dev, req, blk_rq_bytes(req)))
+ goto out_unmap;
- range->cattr = cpu_to_le32(0);
- range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
- range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ ret = BLK_MQ_RQ_QUEUE_ERROR;
+ if (blk_integrity_rq(req)) {
+ if (blk_rq_count_integrity_sg(q, req->bio) != 1)
+ goto out_unmap;
- memset(&cmnd, 0, sizeof(cmnd));
- cmnd.dsm.opcode = nvme_cmd_dsm;
- cmnd.dsm.command_id = req->tag;
- cmnd.dsm.nsid = cpu_to_le32(ns->ns_id);
- cmnd.dsm.prp1 = cpu_to_le64(iod->first_dma);
- cmnd.dsm.nr = 0;
- cmnd.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+ sg_init_table(&iod->meta_sg, 1);
+ if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
+ goto out_unmap;
- __nvme_submit_cmd(nvmeq, &cmnd);
-}
+ if (rq_data_dir(req))
+ nvme_dif_remap(req, nvme_dif_prep);
-static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
- int cmdid)
-{
- struct nvme_command cmnd;
+ if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
+ goto out_unmap;
+ }
- memset(&cmnd, 0, sizeof(cmnd));
- cmnd.common.opcode = nvme_cmd_flush;
- cmnd.common.command_id = cmdid;
- cmnd.common.nsid = cpu_to_le32(ns->ns_id);
+ cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+ cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+ if (blk_integrity_rq(req))
+ cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
+ return BLK_MQ_RQ_QUEUE_OK;
- __nvme_submit_cmd(nvmeq, &cmnd);
+out_unmap:
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+out:
+ return ret;
}
-static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
- struct nvme_ns *ns)
+static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
{
- struct request *req = iod_get_private(iod);
- struct nvme_command cmnd;
- u16 control = 0;
- u32 dsmgmt = 0;
-
- if (req->cmd_flags & REQ_FUA)
- control |= NVME_RW_FUA;
- if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
- control |= NVME_RW_LR;
-
- if (req->cmd_flags & REQ_RAHEAD)
- dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
-
- memset(&cmnd, 0, sizeof(cmnd));
- cmnd.rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
- cmnd.rw.command_id = req->tag;
- cmnd.rw.nsid = cpu_to_le32(ns->ns_id);
- cmnd.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- cmnd.rw.prp2 = cpu_to_le64(iod->first_dma);
- cmnd.rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
- cmnd.rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
-
- if (ns->ms) {
- switch (ns->pi_type) {
- case NVME_NS_DPS_PI_TYPE3:
- control |= NVME_RW_PRINFO_PRCHK_GUARD;
- break;
- case NVME_NS_DPS_PI_TYPE1:
- case NVME_NS_DPS_PI_TYPE2:
- control |= NVME_RW_PRINFO_PRCHK_GUARD |
- NVME_RW_PRINFO_PRCHK_REF;
- cmnd.rw.reftag = cpu_to_le32(
- nvme_block_nr(ns, blk_rq_pos(req)));
- break;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ enum dma_data_direction dma_dir = rq_data_dir(req) ?
+ DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+ if (iod->nents) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+ if (blk_integrity_rq(req)) {
+ if (!rq_data_dir(req))
+ nvme_dif_remap(req, nvme_dif_complete);
+ dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
}
- if (blk_integrity_rq(req))
- cmnd.rw.metadata =
- cpu_to_le64(sg_dma_address(iod->meta_sg));
- else
- control |= NVME_RW_PRINFO_PRACT;
}
- cmnd.rw.control = cpu_to_le16(control);
- cmnd.rw.dsmgmt = cpu_to_le32(dsmgmt);
+ nvme_free_iod(dev, req);
+}
- __nvme_submit_cmd(nvmeq, &cmnd);
+/*
+ * We reuse the small pool to allocate the 16-byte range here as it is not
+ * worth having a special pool for these or additional cases to handle freeing
+ * the iod.
+ */
+static int nvme_setup_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+ struct request *req, struct nvme_command *cmnd)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_dsm_range *range;
- return 0;
+ range = dma_pool_alloc(nvmeq->dev->prp_small_pool, GFP_ATOMIC,
+ &iod->first_dma);
+ if (!range)
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ iod_list(req)[0] = (__le64 *)range;
+ iod->npages = 0;
+
+ range->cattr = cpu_to_le32(0);
+ range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
+ range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+
+ memset(cmnd, 0, sizeof(*cmnd));
+ cmnd->dsm.opcode = nvme_cmd_dsm;
+ cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
+ cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma);
+ cmnd->dsm.nr = 0;
+ cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
+ return BLK_MQ_RQ_QUEUE_OK;
}
/*
struct nvme_queue *nvmeq = hctx->driver_data;
struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
- struct nvme_iod *iod;
- enum dma_data_direction dma_dir;
+ struct nvme_command cmnd;
+ int ret = BLK_MQ_RQ_QUEUE_OK;
/*
* If formated with metadata, require the block layer provide a buffer
if (ns && ns->ms && !blk_integrity_rq(req)) {
if (!(ns->pi_type && ns->ms == 8) &&
req->cmd_type != REQ_TYPE_DRV_PRIV) {
- blk_mq_complete_request(req, -EFAULT);
+ blk_mq_end_request(req, -EFAULT);
return BLK_MQ_RQ_QUEUE_OK;
}
}
- iod = nvme_alloc_iod(req, dev, GFP_ATOMIC);
- if (!iod)
- return BLK_MQ_RQ_QUEUE_BUSY;
+ ret = nvme_init_iod(req, dev);
+ if (ret)
+ return ret;
if (req->cmd_flags & REQ_DISCARD) {
- void *range;
- /*
- * We reuse the small pool to allocate the 16-byte range here
- * as it is not worth having a special pool for these or
- * additional cases to handle freeing the iod.
- */
- range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
- &iod->first_dma);
- if (!range)
- goto retry_cmd;
- iod_list(iod)[0] = (__le64 *)range;
- iod->npages = 0;
- } else if (req->nr_phys_segments) {
- dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+ ret = nvme_setup_discard(nvmeq, ns, req, &cmnd);
+ } else {
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ memcpy(&cmnd, req->cmd, sizeof(cmnd));
+ else if (req->cmd_flags & REQ_FLUSH)
+ nvme_setup_flush(ns, &cmnd);
+ else
+ nvme_setup_rw(ns, req, &cmnd);
+
+ if (req->nr_phys_segments)
+ ret = nvme_map_data(dev, req, &cmnd);
+ }
- sg_init_table(iod->sg, req->nr_phys_segments);
- iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
- if (!iod->nents)
- goto error_cmd;
+ if (ret)
+ goto out;
- if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir))
- goto retry_cmd;
+ cmnd.common.command_id = req->tag;
+ blk_mq_start_request(req);
- if (blk_rq_bytes(req) !=
- nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
- goto retry_cmd;
- }
- if (blk_integrity_rq(req)) {
- if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents,
- dma_dir);
- goto error_cmd;
- }
+ spin_lock_irq(&nvmeq->q_lock);
+ __nvme_submit_cmd(nvmeq, &cmnd);
+ nvme_process_cq(nvmeq);
+ spin_unlock_irq(&nvmeq->q_lock);
+ return BLK_MQ_RQ_QUEUE_OK;
+out:
+ nvme_free_iod(dev, req);
+ return ret;
+}
- sg_init_table(iod->meta_sg, 1);
- if (blk_rq_map_integrity_sg(
- req->q, req->bio, iod->meta_sg) != 1) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents,
- dma_dir);
- goto error_cmd;
- }
+static void nvme_complete_rq(struct request *req)
+{
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_dev *dev = iod->nvmeq->dev;
+ int error = 0;
- if (rq_data_dir(req))
- nvme_dif_remap(req, nvme_dif_prep);
+ nvme_unmap_data(dev, req);
- if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents,
- dma_dir);
- goto error_cmd;
- }
+ if (unlikely(req->errors)) {
+ if (nvme_req_needs_retry(req, req->errors)) {
+ nvme_requeue_req(req);
+ return;
}
- }
- nvme_set_info(cmd, iod, req_completion);
- spin_lock_irq(&nvmeq->q_lock);
- if (req->cmd_type == REQ_TYPE_DRV_PRIV)
- nvme_submit_priv(nvmeq, req, iod);
- else if (req->cmd_flags & REQ_DISCARD)
- nvme_submit_discard(nvmeq, ns, req, iod);
- else if (req->cmd_flags & REQ_FLUSH)
- nvme_submit_flush(nvmeq, ns, req->tag);
- else
- nvme_submit_iod(nvmeq, iod, ns);
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ error = req->errors;
+ else
+ error = nvme_error_status(req->errors);
+ }
- nvme_process_cq(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
- return BLK_MQ_RQ_QUEUE_OK;
+ if (unlikely(iod->aborted)) {
+ dev_warn(dev->dev,
+ "completing aborted command with status: %04x\n",
+ req->errors);
+ }
- error_cmd:
- nvme_free_iod(dev, iod);
- return BLK_MQ_RQ_QUEUE_ERROR;
- retry_cmd:
- nvme_free_iod(dev, iod);
- return BLK_MQ_RQ_QUEUE_BUSY;
+ blk_mq_end_request(req, error);
}
static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
phase = nvmeq->cq_phase;
for (;;) {
- void *ctx;
- nvme_completion_fn fn;
struct nvme_completion cqe = nvmeq->cqes[head];
- if ((le16_to_cpu(cqe.status) & 1) != phase)
+ u16 status = le16_to_cpu(cqe.status);
+ struct request *req;
+
+ if ((status & 1) != phase)
break;
nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
if (++head == nvmeq->q_depth) {
head = 0;
phase = !phase;
}
+
if (tag && *tag == cqe.command_id)
*tag = -1;
- ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn);
- fn(nvmeq, ctx, &cqe);
+
+ if (unlikely(cqe.command_id >= nvmeq->q_depth)) {
+ dev_warn(nvmeq->q_dmadev,
+ "invalid id %d completed on queue %d\n",
+ cqe.command_id, le16_to_cpu(cqe.sq_id));
+ continue;
+ }
+
+ /*
+ * AEN requests are special as they don't time out and can
+ * survive any kind of queue freeze and often don't respond to
+ * aborts. We don't even bother to allocate a struct request
+ * for them but rather special case them here.
+ */
+ if (unlikely(nvmeq->qid == 0 &&
+ cqe.command_id >= NVME_AQ_BLKMQ_DEPTH)) {
+ nvme_complete_async_event(nvmeq->dev, &cqe);
+ continue;
+ }
+
+ req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+ u32 result = le32_to_cpu(cqe.result);
+ req->special = (void *)(uintptr_t)result;
+ }
+ blk_mq_complete_request(req, status >> 1);
+
}
/* If the controller ignores the cq head doorbell and continuously
return 0;
}
-/*
- * Returns 0 on success. If the result is negative, it's a Linux error code;
- * if the result is positive, it's an NVM Express status code
- */
-int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
- void *buffer, void __user *ubuffer, unsigned bufflen,
- u32 *result, unsigned timeout)
+static void nvme_submit_async_event(struct nvme_dev *dev)
{
- bool write = cmd->common.opcode & 1;
- struct bio *bio = NULL;
- struct request *req;
- int ret;
-
- req = blk_mq_alloc_request(q, write, 0);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ struct nvme_command c;
- req->cmd_type = REQ_TYPE_DRV_PRIV;
- req->cmd_flags |= REQ_FAILFAST_DRIVER;
- req->__data_len = 0;
- req->__sector = (sector_t) -1;
- req->bio = req->biotail = NULL;
-
- req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
-
- req->cmd = (unsigned char *)cmd;
- req->cmd_len = sizeof(struct nvme_command);
- req->special = (void *)0;
-
- if (buffer && bufflen) {
- ret = blk_rq_map_kern(q, req, buffer, bufflen,
- __GFP_DIRECT_RECLAIM);
- if (ret)
- goto out;
- } else if (ubuffer && bufflen) {
- ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
- __GFP_DIRECT_RECLAIM);
- if (ret)
- goto out;
- bio = req->bio;
- }
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = nvme_admin_async_event;
+ c.common.command_id = NVME_AQ_BLKMQ_DEPTH + --dev->ctrl.event_limit;
- blk_execute_rq(req->q, NULL, req, 0);
- if (bio)
- blk_rq_unmap_user(bio);
- if (result)
- *result = (u32)(uintptr_t)req->special;
- ret = req->errors;
- out:
- blk_mq_free_request(req);
- return ret;
+ __nvme_submit_cmd(dev->queues[0], &c);
}
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
- void *buffer, unsigned bufflen)
+static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
{
- return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.delete_queue.opcode = opcode;
+ c.delete_queue.qid = cpu_to_le16(id);
+
+ return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
-static int nvme_submit_async_admin_req(struct nvme_dev *dev)
-{
- struct nvme_queue *nvmeq = dev->queues[0];
- struct nvme_command c;
- struct nvme_cmd_info *cmd_info;
- struct request *req;
-
- req = blk_mq_alloc_request(dev->admin_q, WRITE,
- BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->cmd_flags |= REQ_NO_TIMEOUT;
- cmd_info = blk_mq_rq_to_pdu(req);
- nvme_set_info(cmd_info, NULL, async_req_completion);
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_async_event;
- c.common.command_id = req->tag;
-
- blk_mq_free_request(req);
- __nvme_submit_cmd(nvmeq, &c);
- return 0;
-}
-
-static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
- struct nvme_command *cmd,
- struct async_cmd_info *cmdinfo, unsigned timeout)
-{
- struct nvme_queue *nvmeq = dev->queues[0];
- struct request *req;
- struct nvme_cmd_info *cmd_rq;
-
- req = blk_mq_alloc_request(dev->admin_q, WRITE, 0);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->timeout = timeout;
- cmd_rq = blk_mq_rq_to_pdu(req);
- cmdinfo->req = req;
- nvme_set_info(cmd_rq, cmdinfo, async_completion);
- cmdinfo->status = -EINTR;
-
- cmd->common.command_id = req->tag;
-
- nvme_submit_cmd(nvmeq, cmd);
- return 0;
-}
-
-static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
-{
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.delete_queue.opcode = opcode;
- c.delete_queue.qid = cpu_to_le16(id);
-
- return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
-}
-
-static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
- struct nvme_queue *nvmeq)
+static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
+ struct nvme_queue *nvmeq)
{
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
c.create_cq.cq_flags = cpu_to_le16(flags);
c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
- return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+ return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
c.create_sq.sq_flags = cpu_to_le16(flags);
c.create_sq.cqid = cpu_to_le16(qid);
- return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+ return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
}
static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}
-int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
+static void abort_endio(struct request *req, int error)
{
- struct nvme_command c = { };
- int error;
-
- /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
- c.identify.opcode = nvme_admin_identify;
- c.identify.cns = cpu_to_le32(1);
-
- *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
- if (!*id)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
- sizeof(struct nvme_id_ctrl));
- if (error)
- kfree(*id);
- return error;
-}
-
-int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
- struct nvme_id_ns **id)
-{
- struct nvme_command c = { };
- int error;
-
- /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
- c.identify.opcode = nvme_admin_identify,
- c.identify.nsid = cpu_to_le32(nsid),
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_queue *nvmeq = iod->nvmeq;
+ u32 result = (u32)(uintptr_t)req->special;
+ u16 status = req->errors;
- *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
- if (!*id)
- return -ENOMEM;
-
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
- sizeof(struct nvme_id_ns));
- if (error)
- kfree(*id);
- return error;
-}
-
-int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
- dma_addr_t dma_addr, u32 *result)
-{
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_get_features;
- c.features.nsid = cpu_to_le32(nsid);
- c.features.prp1 = cpu_to_le64(dma_addr);
- c.features.fid = cpu_to_le32(fid);
-
- return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
- result, 0);
-}
-
-int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
- dma_addr_t dma_addr, u32 *result)
-{
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.features.opcode = nvme_admin_set_features;
- c.features.prp1 = cpu_to_le64(dma_addr);
- c.features.fid = cpu_to_le32(fid);
- c.features.dword11 = cpu_to_le32(dword11);
-
- return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
- result, 0);
-}
-
-int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
-{
- struct nvme_command c = { };
- int error;
-
- c.common.opcode = nvme_admin_get_log_page,
- c.common.nsid = cpu_to_le32(0xFFFFFFFF),
- c.common.cdw10[0] = cpu_to_le32(
- (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
- NVME_LOG_SMART),
-
- *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
- if (!*log)
- return -ENOMEM;
+ dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
+ atomic_inc(&nvmeq->dev->ctrl.abort_limit);
- error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
- sizeof(struct nvme_smart_log));
- if (error)
- kfree(*log);
- return error;
+ blk_mq_free_request(req);
}
-/**
- * nvme_abort_req - Attempt aborting a request
- *
- * Schedule controller reset if the command was already aborted once before and
- * still hasn't been returned to the driver, or if this is the admin queue.
- */
-static void nvme_abort_req(struct request *req)
+static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
{
- struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = cmd_rq->nvmeq;
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_queue *nvmeq = iod->nvmeq;
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
- struct nvme_cmd_info *abort_cmd;
struct nvme_command cmd;
- if (!nvmeq->qid || cmd_rq->aborted) {
- spin_lock(&dev_list_lock);
- if (!__nvme_reset(dev)) {
- dev_warn(dev->dev,
- "I/O %d QID %d timeout, reset controller\n",
- req->tag, nvmeq->qid);
- }
- spin_unlock(&dev_list_lock);
- return;
+ /*
+ * Shutdown immediately if controller times out while starting. The
+ * reset work will see the pci device disabled when it gets the forced
+ * cancellation error. All outstanding requests are completed on
+ * shutdown, so we return BLK_EH_HANDLED.
+ */
+ if (test_bit(NVME_CTRL_RESETTING, &dev->flags)) {
+ dev_warn(dev->dev,
+ "I/O %d QID %d timeout, disable controller\n",
+ req->tag, nvmeq->qid);
+ nvme_dev_disable(dev, false);
+ req->errors = NVME_SC_CANCELLED;
+ return BLK_EH_HANDLED;
}
- if (!dev->abort_limit)
- return;
+ /*
+ * Shutdown the controller immediately and schedule a reset if the
+ * command was already aborted once before and still hasn't been
+ * returned to the driver, or if this is the admin queue.
+ */
+ if (!nvmeq->qid || iod->aborted) {
+ dev_warn(dev->dev,
+ "I/O %d QID %d timeout, reset controller\n",
+ req->tag, nvmeq->qid);
+ nvme_dev_disable(dev, false);
+ queue_work(nvme_workq, &dev->reset_work);
- abort_req = blk_mq_alloc_request(dev->admin_q, WRITE,
- BLK_MQ_REQ_NOWAIT);
- if (IS_ERR(abort_req))
- return;
+ /*
+ * Mark the request as handled, since the inline shutdown
+ * forces all outstanding requests to complete.
+ */
+ req->errors = NVME_SC_CANCELLED;
+ return BLK_EH_HANDLED;
+ }
- abort_cmd = blk_mq_rq_to_pdu(abort_req);
- nvme_set_info(abort_cmd, abort_req, abort_completion);
+ iod->aborted = 1;
+
+ if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
+ atomic_inc(&dev->ctrl.abort_limit);
+ return BLK_EH_RESET_TIMER;
+ }
memset(&cmd, 0, sizeof(cmd));
cmd.abort.opcode = nvme_admin_abort_cmd;
cmd.abort.cid = req->tag;
cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
- cmd.abort.command_id = abort_req->tag;
- --dev->abort_limit;
- cmd_rq->aborted = 1;
+ dev_warn(nvmeq->q_dmadev, "I/O %d QID %d timeout, aborting\n",
+ req->tag, nvmeq->qid);
+
+ abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
+ BLK_MQ_REQ_NOWAIT);
+ if (IS_ERR(abort_req)) {
+ atomic_inc(&dev->ctrl.abort_limit);
+ return BLK_EH_RESET_TIMER;
+ }
+
+ abort_req->timeout = ADMIN_TIMEOUT;
+ abort_req->end_io_data = NULL;
+ blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
- dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
- nvmeq->qid);
- nvme_submit_cmd(dev->queues[0], &cmd);
+ /*
+ * The aborted req will be completed on receiving the abort req.
+ * We enable the timer again. If hit twice, it'll cause a device reset,
+ * as the device then is in a faulty state.
+ */
+ return BLK_EH_RESET_TIMER;
}
static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
{
struct nvme_queue *nvmeq = data;
- void *ctx;
- nvme_completion_fn fn;
- struct nvme_cmd_info *cmd;
- struct nvme_completion cqe;
+ int status;
if (!blk_mq_request_started(req))
return;
- cmd = blk_mq_rq_to_pdu(req);
-
- if (cmd->ctx == CMD_CTX_CANCELLED)
- return;
+ dev_warn(nvmeq->q_dmadev,
+ "Cancelling I/O %d QID %d\n", req->tag, nvmeq->qid);
+ status = NVME_SC_ABORT_REQ;
if (blk_queue_dying(req->q))
- cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
- else
- cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
-
-
- dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n",
- req->tag, nvmeq->qid);
- ctx = cancel_cmd_info(cmd, &fn);
- fn(nvmeq, ctx, &cqe);
-}
-
-static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
-{
- struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = cmd->nvmeq;
-
- dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
- nvmeq->qid);
- spin_lock_irq(&nvmeq->q_lock);
- nvme_abort_req(req);
- spin_unlock_irq(&nvmeq->q_lock);
-
- /*
- * The aborted req will be completed on receiving the abort req.
- * We enable the timer again. If hit twice, it'll cause a device reset,
- * as the device then is in a faulty state.
- */
- return BLK_EH_RESET_TIMER;
+ status |= NVME_SC_DNR;
+ blk_mq_complete_request(req, status);
}
static void nvme_free_queue(struct nvme_queue *nvmeq)
nvmeq->cq_vector = -1;
spin_unlock_irq(&nvmeq->q_lock);
- if (!nvmeq->qid && nvmeq->dev->admin_q)
- blk_mq_freeze_queue_start(nvmeq->dev->admin_q);
+ if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
+ blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
irq_set_affinity_hint(vector, NULL);
free_irq(vector, nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
}
-static void nvme_disable_queue(struct nvme_dev *dev, int qid)
+static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
- struct nvme_queue *nvmeq = dev->queues[qid];
+ struct nvme_queue *nvmeq = dev->queues[0];
if (!nvmeq)
return;
if (nvme_suspend_queue(nvmeq))
return;
- /* Don't tell the adapter to delete the admin queue.
- * Don't tell a removed adapter to delete IO queues. */
- if (qid && readl(&dev->bar->csts) != -1) {
- adapter_delete_sq(dev, qid);
- adapter_delete_cq(dev, qid);
- }
+ if (shutdown)
+ nvme_shutdown_ctrl(&dev->ctrl);
+ else
+ nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
+ dev->bar + NVME_REG_CAP));
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
int entry_size)
{
int q_depth = dev->q_depth;
- unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size);
+ unsigned q_size_aligned = roundup(q_depth * entry_size,
+ dev->ctrl.page_size);
if (q_size_aligned * nr_io_queues > dev->cmb_size) {
u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
- mem_per_q = round_down(mem_per_q, dev->page_size);
+ mem_per_q = round_down(mem_per_q, dev->ctrl.page_size);
q_depth = div_u64(mem_per_q, entry_size);
/*
int qid, int depth)
{
if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
- unsigned offset = (qid - 1) *
- roundup(SQ_SIZE(depth), dev->page_size);
+ unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
+ dev->ctrl.page_size);
nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
nvmeq->sq_cmds_io = dev->cmb + offset;
} else {
nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
- dev->instance, qid);
+ dev->ctrl.instance, qid);
spin_lock_init(&nvmeq->q_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
return result;
}
-static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
-{
- unsigned long timeout;
- u32 bit = enabled ? NVME_CSTS_RDY : 0;
-
- timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
-
- while ((readl(&dev->bar->csts) & NVME_CSTS_RDY) != bit) {
- msleep(100);
- if (fatal_signal_pending(current))
- return -EINTR;
- if (time_after(jiffies, timeout)) {
- dev_err(dev->dev,
- "Device not ready; aborting %s\n", enabled ?
- "initialisation" : "reset");
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
-/*
- * If the device has been passed off to us in an enabled state, just clear
- * the enabled bit. The spec says we should set the 'shutdown notification
- * bits', but doing so may cause the device to complete commands to the
- * admin queue ... and we don't know what memory that might be pointing at!
- */
-static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap)
-{
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config &= ~NVME_CC_ENABLE;
- writel(dev->ctrl_config, &dev->bar->cc);
-
- return nvme_wait_ready(dev, cap, false);
-}
-
-static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap)
-{
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config |= NVME_CC_ENABLE;
- writel(dev->ctrl_config, &dev->bar->cc);
-
- return nvme_wait_ready(dev, cap, true);
-}
-
-static int nvme_shutdown_ctrl(struct nvme_dev *dev)
-{
- unsigned long timeout;
-
- dev->ctrl_config &= ~NVME_CC_SHN_MASK;
- dev->ctrl_config |= NVME_CC_SHN_NORMAL;
-
- writel(dev->ctrl_config, &dev->bar->cc);
-
- timeout = SHUTDOWN_TIMEOUT + jiffies;
- while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) !=
- NVME_CSTS_SHST_CMPLT) {
- msleep(100);
- if (fatal_signal_pending(current))
- return -EINTR;
- if (time_after(jiffies, timeout)) {
- dev_err(dev->dev,
- "Device shutdown incomplete; abort shutdown\n");
- return -ENODEV;
- }
- }
-
- return 0;
-}
-
static struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
+ .complete = nvme_complete_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
static struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
+ .complete = nvme_complete_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_init_hctx,
.init_request = nvme_init_request,
static void nvme_dev_remove_admin(struct nvme_dev *dev)
{
- if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
- blk_cleanup_queue(dev->admin_q);
+ if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
+ blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
}
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
{
- if (!dev->admin_q) {
+ if (!dev->ctrl.admin_q) {
dev->admin_tagset.ops = &nvme_mq_admin_ops;
dev->admin_tagset.nr_hw_queues = 1;
- dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
- dev->admin_tagset.reserved_tags = 1;
+
+ /*
+ * Subtract one to leave an empty queue entry for 'Full Queue'
+ * condition. See NVM-Express 1.2 specification, section 4.1.2.
+ */
+ dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
return -ENOMEM;
- dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
- if (IS_ERR(dev->admin_q)) {
+ dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
+ if (IS_ERR(dev->ctrl.admin_q)) {
blk_mq_free_tag_set(&dev->admin_tagset);
return -ENOMEM;
}
- if (!blk_get_queue(dev->admin_q)) {
+ if (!blk_get_queue(dev->ctrl.admin_q)) {
nvme_dev_remove_admin(dev);
- dev->admin_q = NULL;
+ dev->ctrl.admin_q = NULL;
return -ENODEV;
}
} else
- blk_mq_unfreeze_queue(dev->admin_q);
+ blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
return 0;
}
{
int result;
u32 aqa;
- u64 cap = lo_hi_readq(&dev->bar->cap);
+ u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
- /*
- * default to a 4K page size, with the intention to update this
- * path in the future to accomodate architectures with differing
- * kernel and IO page sizes.
- */
- unsigned page_shift = 12;
- unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
-
- if (page_shift < dev_page_min) {
- dev_err(dev->dev,
- "Minimum device page size (%u) too large for "
- "host (%u)\n", 1 << dev_page_min,
- 1 << page_shift);
- return -ENODEV;
- }
- dev->subsystem = readl(&dev->bar->vs) >= NVME_VS(1, 1) ?
+ dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ?
NVME_CAP_NSSRC(cap) : 0;
- if (dev->subsystem && (readl(&dev->bar->csts) & NVME_CSTS_NSSRO))
- writel(NVME_CSTS_NSSRO, &dev->bar->csts);
+ if (dev->subsystem &&
+ (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
+ writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
- result = nvme_disable_ctrl(dev, cap);
+ result = nvme_disable_ctrl(&dev->ctrl, cap);
if (result < 0)
return result;
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
- dev->page_size = 1 << page_shift;
-
- dev->ctrl_config = NVME_CC_CSS_NVM;
- dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
- dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
- dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
-
- writel(aqa, &dev->bar->aqa);
- lo_hi_writeq(nvmeq->sq_dma_addr, &dev->bar->asq);
- lo_hi_writeq(nvmeq->cq_dma_addr, &dev->bar->acq);
+ writel(aqa, dev->bar + NVME_REG_AQA);
+ lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
+ lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
- result = nvme_enable_ctrl(dev, cap);
+ result = nvme_enable_ctrl(&dev->ctrl, cap);
if (result)
goto free_nvmeq;
return result;
}
-static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
-{
- struct nvme_dev *dev = ns->dev;
- struct nvme_user_io io;
- struct nvme_command c;
- unsigned length, meta_len;
- int status, write;
- dma_addr_t meta_dma = 0;
- void *meta = NULL;
- void __user *metadata;
-
- if (copy_from_user(&io, uio, sizeof(io)))
- return -EFAULT;
-
- switch (io.opcode) {
- case nvme_cmd_write:
- case nvme_cmd_read:
- case nvme_cmd_compare:
- break;
- default:
- return -EINVAL;
- }
-
- length = (io.nblocks + 1) << ns->lba_shift;
- meta_len = (io.nblocks + 1) * ns->ms;
- metadata = (void __user *)(uintptr_t)io.metadata;
- write = io.opcode & 1;
-
- if (ns->ext) {
- length += meta_len;
- meta_len = 0;
- }
- if (meta_len) {
- if (((io.metadata & 3) || !io.metadata) && !ns->ext)
- return -EINVAL;
-
- meta = dma_alloc_coherent(dev->dev, meta_len,
- &meta_dma, GFP_KERNEL);
-
- if (!meta) {
- status = -ENOMEM;
- goto unmap;
- }
- if (write) {
- if (copy_from_user(meta, metadata, meta_len)) {
- status = -EFAULT;
- goto unmap;
- }
- }
- }
-
- memset(&c, 0, sizeof(c));
- c.rw.opcode = io.opcode;
- c.rw.flags = io.flags;
- c.rw.nsid = cpu_to_le32(ns->ns_id);
- c.rw.slba = cpu_to_le64(io.slba);
- c.rw.length = cpu_to_le16(io.nblocks);
- c.rw.control = cpu_to_le16(io.control);
- c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
- c.rw.reftag = cpu_to_le32(io.reftag);
- c.rw.apptag = cpu_to_le16(io.apptag);
- c.rw.appmask = cpu_to_le16(io.appmask);
- c.rw.metadata = cpu_to_le64(meta_dma);
-
- status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
- (void __user *)(uintptr_t)io.addr, length, NULL, 0);
- unmap:
- if (meta) {
- if (status == NVME_SC_SUCCESS && !write) {
- if (copy_to_user(metadata, meta, meta_len))
- status = -EFAULT;
- }
- dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
- }
- return status;
-}
-
-static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
- struct nvme_passthru_cmd __user *ucmd)
-{
- struct nvme_passthru_cmd cmd;
- struct nvme_command c;
- unsigned timeout = 0;
- int status;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
- if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
- return -EFAULT;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = cmd.opcode;
- c.common.flags = cmd.flags;
- c.common.nsid = cpu_to_le32(cmd.nsid);
- c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
- c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
- c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
- c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
- c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
- c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
- c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
- c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
-
- if (cmd.timeout_ms)
- timeout = msecs_to_jiffies(cmd.timeout_ms);
-
- status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
- NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
- &cmd.result, timeout);
- if (status >= 0) {
- if (put_user(cmd.result, &ucmd->result))
- return -EFAULT;
- }
-
- return status;
-}
-
-static int nvme_subsys_reset(struct nvme_dev *dev)
-{
- if (!dev->subsystem)
- return -ENOTTY;
-
- writel(0x4E564D65, &dev->bar->nssr); /* "NVMe" */
- return 0;
-}
-
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
- unsigned long arg)
-{
- struct nvme_ns *ns = bdev->bd_disk->private_data;
-
- switch (cmd) {
- case NVME_IOCTL_ID:
- force_successful_syscall_return();
- return ns->ns_id;
- case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
- case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
- case NVME_IOCTL_SUBMIT_IO:
- return nvme_submit_io(ns, (void __user *)arg);
- case SG_GET_VERSION_NUM:
- return nvme_sg_get_version_num((void __user *)arg);
- case SG_IO:
- return nvme_sg_io(ns, (void __user *)arg);
- default:
- return -ENOTTY;
- }
-}
-
-#ifdef CONFIG_COMPAT
-static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- switch (cmd) {
- case SG_IO:
- return -ENOIOCTLCMD;
- }
- return nvme_ioctl(bdev, mode, cmd, arg);
-}
-#else
-#define nvme_compat_ioctl NULL
-#endif
-
-static void nvme_free_dev(struct kref *kref);
-static void nvme_free_ns(struct kref *kref)
-{
- struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
-
- if (ns->type == NVME_NS_LIGHTNVM)
- nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
-
- spin_lock(&dev_list_lock);
- ns->disk->private_data = NULL;
- spin_unlock(&dev_list_lock);
-
- kref_put(&ns->dev->kref, nvme_free_dev);
- put_disk(ns->disk);
- kfree(ns);
-}
-
-static int nvme_open(struct block_device *bdev, fmode_t mode)
-{
- int ret = 0;
- struct nvme_ns *ns;
-
- spin_lock(&dev_list_lock);
- ns = bdev->bd_disk->private_data;
- if (!ns)
- ret = -ENXIO;
- else if (!kref_get_unless_zero(&ns->kref))
- ret = -ENXIO;
- spin_unlock(&dev_list_lock);
-
- return ret;
-}
-
-static void nvme_release(struct gendisk *disk, fmode_t mode)
-{
- struct nvme_ns *ns = disk->private_data;
- kref_put(&ns->kref, nvme_free_ns);
-}
-
-static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo)
-{
- /* some standard values */
- geo->heads = 1 << 6;
- geo->sectors = 1 << 5;
- geo->cylinders = get_capacity(bd->bd_disk) >> 11;
- return 0;
-}
-
-static void nvme_config_discard(struct nvme_ns *ns)
-{
- u32 logical_block_size = queue_logical_block_size(ns->queue);
- ns->queue->limits.discard_zeroes_data = 0;
- ns->queue->limits.discard_alignment = logical_block_size;
- ns->queue->limits.discard_granularity = logical_block_size;
- blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
- queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
-}
-
-static int nvme_revalidate_disk(struct gendisk *disk)
-{
- struct nvme_ns *ns = disk->private_data;
- struct nvme_dev *dev = ns->dev;
- struct nvme_id_ns *id;
- u8 lbaf, pi_type;
- u16 old_ms;
- unsigned short bs;
-
- if (nvme_identify_ns(dev, ns->ns_id, &id)) {
- dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
- dev->instance, ns->ns_id);
- return -ENODEV;
- }
- if (id->ncap == 0) {
- kfree(id);
- return -ENODEV;
- }
-
- if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
- if (nvme_nvm_register(ns->queue, disk->disk_name)) {
- dev_warn(dev->dev,
- "%s: LightNVM init failure\n", __func__);
- kfree(id);
- return -ENODEV;
- }
- ns->type = NVME_NS_LIGHTNVM;
- }
-
- old_ms = ns->ms;
- lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
- ns->lba_shift = id->lbaf[lbaf].ds;
- ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
- ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
-
- /*
- * If identify namespace failed, use default 512 byte block size so
- * block layer can use before failing read/write for 0 capacity.
- */
- if (ns->lba_shift == 0)
- ns->lba_shift = 9;
- bs = 1 << ns->lba_shift;
-
- /* XXX: PI implementation requires metadata equal t10 pi tuple size */
- pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
- id->dps & NVME_NS_DPS_PI_MASK : 0;
-
- blk_mq_freeze_queue(disk->queue);
- if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
- ns->ms != old_ms ||
- bs != queue_logical_block_size(disk->queue) ||
- (ns->ms && ns->ext)))
- blk_integrity_unregister(disk);
-
- ns->pi_type = pi_type;
- blk_queue_logical_block_size(ns->queue, bs);
-
- if (ns->ms && !ns->ext)
- nvme_init_integrity(ns);
-
- if ((ns->ms && !(ns->ms == 8 && ns->pi_type) &&
- !blk_get_integrity(disk)) ||
- ns->type == NVME_NS_LIGHTNVM)
- set_capacity(disk, 0);
- else
- set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
-
- if (dev->oncs & NVME_CTRL_ONCS_DSM)
- nvme_config_discard(ns);
- blk_mq_unfreeze_queue(disk->queue);
-
- kfree(id);
- return 0;
-}
-
-static char nvme_pr_type(enum pr_type type)
-{
- switch (type) {
- case PR_WRITE_EXCLUSIVE:
- return 1;
- case PR_EXCLUSIVE_ACCESS:
- return 2;
- case PR_WRITE_EXCLUSIVE_REG_ONLY:
- return 3;
- case PR_EXCLUSIVE_ACCESS_REG_ONLY:
- return 4;
- case PR_WRITE_EXCLUSIVE_ALL_REGS:
- return 5;
- case PR_EXCLUSIVE_ACCESS_ALL_REGS:
- return 6;
- default:
- return 0;
- }
-};
-
-static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
- u64 key, u64 sa_key, u8 op)
-{
- struct nvme_ns *ns = bdev->bd_disk->private_data;
- struct nvme_command c;
- u8 data[16] = { 0, };
-
- put_unaligned_le64(key, &data[0]);
- put_unaligned_le64(sa_key, &data[8]);
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = op;
- c.common.nsid = cpu_to_le32(ns->ns_id);
- c.common.cdw10[0] = cpu_to_le32(cdw10);
-
- return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
-}
-
-static int nvme_pr_register(struct block_device *bdev, u64 old,
- u64 new, unsigned flags)
-{
- u32 cdw10;
-
- if (flags & ~PR_FL_IGNORE_KEY)
- return -EOPNOTSUPP;
-
- cdw10 = old ? 2 : 0;
- cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
- cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_reserve(struct block_device *bdev, u64 key,
- enum pr_type type, unsigned flags)
-{
- u32 cdw10;
-
- if (flags & ~PR_FL_IGNORE_KEY)
- return -EOPNOTSUPP;
-
- cdw10 = nvme_pr_type(type) << 8;
- cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
- enum pr_type type, bool abort)
-{
- u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
- return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
-}
-
-static int nvme_pr_clear(struct block_device *bdev, u64 key)
-{
- u32 cdw10 = 1 | (key ? 1 << 3 : 0);
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
-}
-
-static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
-{
- u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
- return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
-}
-
-static const struct pr_ops nvme_pr_ops = {
- .pr_register = nvme_pr_register,
- .pr_reserve = nvme_pr_reserve,
- .pr_release = nvme_pr_release,
- .pr_preempt = nvme_pr_preempt,
- .pr_clear = nvme_pr_clear,
-};
-
-static const struct block_device_operations nvme_fops = {
- .owner = THIS_MODULE,
- .ioctl = nvme_ioctl,
- .compat_ioctl = nvme_compat_ioctl,
- .open = nvme_open,
- .release = nvme_release,
- .getgeo = nvme_getgeo,
- .revalidate_disk= nvme_revalidate_disk,
- .pr_ops = &nvme_pr_ops,
-};
-
static int nvme_kthread(void *data)
{
struct nvme_dev *dev, *next;
spin_lock(&dev_list_lock);
list_for_each_entry_safe(dev, next, &dev_list, node) {
int i;
- u32 csts = readl(&dev->bar->csts);
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ /*
+ * Skip controllers currently under reset.
+ */
+ if (work_pending(&dev->reset_work) || work_busy(&dev->reset_work))
+ continue;
if ((dev->subsystem && (csts & NVME_CSTS_NSSRO)) ||
csts & NVME_CSTS_CFS) {
- if (!__nvme_reset(dev)) {
+ if (queue_work(nvme_workq, &dev->reset_work)) {
dev_warn(dev->dev,
"Failed status: %x, reset controller\n",
- readl(&dev->bar->csts));
+ readl(dev->bar + NVME_REG_CSTS));
}
continue;
}
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
- while ((i == 0) && (dev->event_limit > 0)) {
- if (nvme_submit_async_admin_req(dev))
- break;
- dev->event_limit--;
- }
+ while (i == 0 && dev->ctrl.event_limit > 0)
+ nvme_submit_async_event(dev);
spin_unlock_irq(&nvmeq->q_lock);
}
}
return 0;
}
-static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
-{
- struct nvme_ns *ns;
- struct gendisk *disk;
- int node = dev_to_node(dev->dev);
-
- ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
- if (!ns)
- return;
-
- ns->queue = blk_mq_init_queue(&dev->tagset);
- if (IS_ERR(ns->queue))
- goto out_free_ns;
- queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
- queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
- ns->dev = dev;
- ns->queue->queuedata = ns;
-
- disk = alloc_disk_node(0, node);
- if (!disk)
- goto out_free_queue;
-
- kref_init(&ns->kref);
- ns->ns_id = nsid;
- ns->disk = disk;
- ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
- list_add_tail(&ns->list, &dev->namespaces);
-
- blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
- if (dev->max_hw_sectors) {
- blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
- blk_queue_max_segments(ns->queue,
- (dev->max_hw_sectors / (dev->page_size >> 9)) + 1);
- }
- if (dev->stripe_size)
- blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
- if (dev->vwc & NVME_CTRL_VWC_PRESENT)
- blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
- blk_queue_virt_boundary(ns->queue, dev->page_size - 1);
-
- disk->major = nvme_major;
- disk->first_minor = 0;
- disk->fops = &nvme_fops;
- disk->private_data = ns;
- disk->queue = ns->queue;
- disk->driverfs_dev = dev->device;
- disk->flags = GENHD_FL_EXT_DEVT;
- sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid);
-
- /*
- * Initialize capacity to 0 until we establish the namespace format and
- * setup integrity extentions if necessary. The revalidate_disk after
- * add_disk allows the driver to register with integrity if the format
- * requires it.
- */
- set_capacity(disk, 0);
- if (nvme_revalidate_disk(ns->disk))
- goto out_free_disk;
-
- kref_get(&dev->kref);
- if (ns->type != NVME_NS_LIGHTNVM) {
- add_disk(ns->disk);
- if (ns->ms) {
- struct block_device *bd = bdget_disk(ns->disk, 0);
- if (!bd)
- return;
- if (blkdev_get(bd, FMODE_READ, NULL)) {
- bdput(bd);
- return;
- }
- blkdev_reread_part(bd);
- blkdev_put(bd, FMODE_READ);
- }
- }
- return;
- out_free_disk:
- kfree(disk);
- list_del(&ns->list);
- out_free_queue:
- blk_cleanup_queue(ns->queue);
- out_free_ns:
- kfree(ns);
-}
-
-/*
- * Create I/O queues. Failing to create an I/O queue is not an issue,
- * we can continue with less than the desired amount of queues, and
- * even a controller without I/O queues an still be used to issue
- * admin commands. This might be useful to upgrade a buggy firmware
- * for example.
- */
-static void nvme_create_io_queues(struct nvme_dev *dev)
+static int nvme_create_io_queues(struct nvme_dev *dev)
{
unsigned i;
+ int ret = 0;
- for (i = dev->queue_count; i <= dev->max_qid; i++)
- if (!nvme_alloc_queue(dev, i, dev->q_depth))
+ for (i = dev->queue_count; i <= dev->max_qid; i++) {
+ if (!nvme_alloc_queue(dev, i, dev->q_depth)) {
+ ret = -ENOMEM;
break;
+ }
+ }
- for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
- if (nvme_create_queue(dev->queues[i], i)) {
+ for (i = dev->online_queues; i <= dev->queue_count - 1; i++) {
+ ret = nvme_create_queue(dev->queues[i], i);
+ if (ret) {
nvme_free_queues(dev, i);
break;
}
-}
-
-static int set_queue_count(struct nvme_dev *dev, int count)
-{
- int status;
- u32 result;
- u32 q_count = (count - 1) | ((count - 1) << 16);
-
- status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0,
- &result);
- if (status < 0)
- return status;
- if (status > 0) {
- dev_err(dev->dev, "Could not set queue count (%d)\n", status);
- return 0;
}
- return min(result & 0xffff, result >> 16) + 1;
+
+ /*
+ * Ignore failing Create SQ/CQ commands, we can continue with less
+ * than the desired aount of queues, and even a controller without
+ * I/O queues an still be used to issue admin commands. This might
+ * be useful to upgrade a buggy firmware for example.
+ */
+ return ret >= 0 ? 0 : ret;
}
static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
if (!use_cmb_sqes)
return NULL;
- dev->cmbsz = readl(&dev->bar->cmbsz);
+ dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
if (!(NVME_CMB_SZ(dev->cmbsz)))
return NULL;
- cmbloc = readl(&dev->bar->cmbloc);
+ cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
size = szu * NVME_CMB_SZ(dev->cmbsz);
int result, i, vecs, nr_io_queues, size;
nr_io_queues = num_possible_cpus();
- result = set_queue_count(dev, nr_io_queues);
- if (result <= 0)
+ result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
+ if (result < 0)
return result;
- if (result < nr_io_queues)
- nr_io_queues = result;
+
+ /*
+ * Degraded controllers might return an error when setting the queue
+ * count. We still want to be able to bring them online and offer
+ * access to the admin queue, as that might be only way to fix them up.
+ */
+ if (result > 0) {
+ dev_err(dev->dev, "Could not set queue count (%d)\n", result);
+ nr_io_queues = 0;
+ result = 0;
+ }
if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
return -ENOMEM;
size = db_bar_size(dev, nr_io_queues);
} while (1);
- dev->dbs = ((void __iomem *)dev->bar) + 4096;
+ dev->dbs = dev->bar + 4096;
adminq->q_db = dev->dbs;
}
/* Free previously allocated queues that are no longer usable */
nvme_free_queues(dev, nr_io_queues + 1);
- nvme_create_io_queues(dev);
-
- return 0;
+ return nvme_create_io_queues(dev);
free_queues:
nvme_free_queues(dev, 1);
return result;
}
-static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+static void nvme_set_irq_hints(struct nvme_dev *dev)
{
- struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
- struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+ struct nvme_queue *nvmeq;
+ int i;
- return nsa->ns_id - nsb->ns_id;
-}
+ for (i = 0; i < dev->online_queues; i++) {
+ nvmeq = dev->queues[i];
-static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
-{
- struct nvme_ns *ns;
+ if (!nvmeq->tags || !(*nvmeq->tags))
+ continue;
- list_for_each_entry(ns, &dev->namespaces, list) {
- if (ns->ns_id == nsid)
- return ns;
- if (ns->ns_id > nsid)
- break;
+ irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
+ blk_mq_tags_cpumask(*nvmeq->tags));
}
- return NULL;
}
-static inline bool nvme_io_incapable(struct nvme_dev *dev)
+static void nvme_dev_scan(struct work_struct *work)
{
- return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
- dev->online_queues < 2);
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+
+ if (!dev->tagset.tags)
+ return;
+ nvme_scan_namespaces(&dev->ctrl);
+ nvme_set_irq_hints(dev);
}
-static void nvme_ns_remove(struct nvme_ns *ns)
+static void nvme_del_queue_end(struct request *req, int error)
{
- bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
-
- if (kill) {
- blk_set_queue_dying(ns->queue);
+ struct nvme_queue *nvmeq = req->end_io_data;
- /*
- * The controller was shutdown first if we got here through
- * device removal. The shutdown may requeue outstanding
- * requests. These need to be aborted immediately so
- * del_gendisk doesn't block indefinitely for their completion.
- */
- blk_mq_abort_requeue_list(ns->queue);
- }
- if (ns->disk->flags & GENHD_FL_UP)
- del_gendisk(ns->disk);
- if (kill || !blk_queue_dying(ns->queue)) {
- blk_mq_abort_requeue_list(ns->queue);
- blk_cleanup_queue(ns->queue);
- }
- list_del_init(&ns->list);
- kref_put(&ns->kref, nvme_free_ns);
+ blk_mq_free_request(req);
+ complete(&nvmeq->dev->ioq_wait);
}
-static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+static void nvme_del_cq_end(struct request *req, int error)
{
- struct nvme_ns *ns, *next;
- unsigned i;
+ struct nvme_queue *nvmeq = req->end_io_data;
- for (i = 1; i <= nn; i++) {
- ns = nvme_find_ns(dev, i);
- if (ns) {
- if (revalidate_disk(ns->disk))
- nvme_ns_remove(ns);
- } else
- nvme_alloc_ns(dev, i);
- }
- list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
- if (ns->ns_id > nn)
- nvme_ns_remove(ns);
+ if (!error) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvmeq->q_lock, flags);
+ nvme_process_cq(nvmeq);
+ spin_unlock_irqrestore(&nvmeq->q_lock, flags);
}
- list_sort(NULL, &dev->namespaces, ns_cmp);
+
+ nvme_del_queue_end(req, error);
}
-static void nvme_set_irq_hints(struct nvme_dev *dev)
+static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
{
- struct nvme_queue *nvmeq;
- int i;
+ struct request_queue *q = nvmeq->dev->ctrl.admin_q;
+ struct request *req;
+ struct nvme_command cmd;
- for (i = 0; i < dev->online_queues; i++) {
- nvmeq = dev->queues[i];
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.delete_queue.opcode = opcode;
+ cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
- if (!nvmeq->tags || !(*nvmeq->tags))
- continue;
+ req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
- irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- blk_mq_tags_cpumask(*nvmeq->tags));
- }
+ req->timeout = ADMIN_TIMEOUT;
+ req->end_io_data = nvmeq;
+
+ blk_execute_rq_nowait(q, NULL, req, false,
+ opcode == nvme_admin_delete_cq ?
+ nvme_del_cq_end : nvme_del_queue_end);
+ return 0;
}
-static void nvme_dev_scan(struct work_struct *work)
+static void nvme_disable_io_queues(struct nvme_dev *dev)
{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
- struct nvme_id_ctrl *ctrl;
+ int pass;
+ unsigned long timeout;
+ u8 opcode = nvme_admin_delete_sq;
- if (!dev->tagset.tags)
- return;
- if (nvme_identify_ctrl(dev, &ctrl))
- return;
- nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
- kfree(ctrl);
- nvme_set_irq_hints(dev);
+ for (pass = 0; pass < 2; pass++) {
+ int sent = 0, i = dev->queue_count - 1;
+
+ reinit_completion(&dev->ioq_wait);
+ retry:
+ timeout = ADMIN_TIMEOUT;
+ for (; i > 0; i--) {
+ struct nvme_queue *nvmeq = dev->queues[i];
+
+ if (!pass)
+ nvme_suspend_queue(nvmeq);
+ if (nvme_delete_queue(nvmeq, opcode))
+ break;
+ ++sent;
+ }
+ while (sent--) {
+ timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
+ if (timeout == 0)
+ return;
+ if (i)
+ goto retry;
+ }
+ opcode = nvme_admin_delete_cq;
+ }
}
/*
*/
static int nvme_dev_add(struct nvme_dev *dev)
{
- struct pci_dev *pdev = to_pci_dev(dev->dev);
- int res;
- struct nvme_id_ctrl *ctrl;
- int shift = NVME_CAP_MPSMIN(lo_hi_readq(&dev->bar->cap)) + 12;
-
- res = nvme_identify_ctrl(dev, &ctrl);
- if (res) {
- dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
- return -EIO;
- }
-
- dev->oncs = le16_to_cpup(&ctrl->oncs);
- dev->abort_limit = ctrl->acl + 1;
- dev->vwc = ctrl->vwc;
- memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
- memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
- memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
- if (ctrl->mdts)
- dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
- else
- dev->max_hw_sectors = UINT_MAX;
- if ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
- (pdev->device == 0x0953) && ctrl->vs[3]) {
- unsigned int max_hw_sectors;
-
- dev->stripe_size = 1 << (ctrl->vs[3] + shift);
- max_hw_sectors = dev->stripe_size >> (shift - 9);
- if (dev->max_hw_sectors) {
- dev->max_hw_sectors = min(max_hw_sectors,
- dev->max_hw_sectors);
- } else
- dev->max_hw_sectors = max_hw_sectors;
- }
- kfree(ctrl);
-
- if (!dev->tagset.tags) {
+ if (!dev->ctrl.tagset) {
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
dev->tagset.timeout = NVME_IO_TIMEOUT;
if (blk_mq_alloc_tag_set(&dev->tagset))
return 0;
+ dev->ctrl.tagset = &dev->tagset;
}
- schedule_work(&dev->scan_work);
+ queue_work(nvme_workq, &dev->scan_work);
return 0;
}
if (!dev->bar)
goto disable;
- if (readl(&dev->bar->csts) == -1) {
+ if (readl(dev->bar + NVME_REG_CSTS) == -1) {
result = -ENODEV;
goto unmap;
}
goto unmap;
}
- cap = lo_hi_readq(&dev->bar->cap);
+ cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+
dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
- dev->dbs = ((void __iomem *)dev->bar) + 4096;
+ dev->dbs = dev->bar + 4096;
/*
* Temporary fix for the Apple controller found in the MacBook8,1 and
dev->q_depth);
}
- if (readl(&dev->bar->vs) >= NVME_VS(1, 2))
+ if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
dev->cmb = nvme_map_cmb(dev);
+ pci_enable_pcie_error_reporting(pdev);
+ pci_save_state(pdev);
return 0;
unmap:
pci_release_regions(pdev);
}
- if (pci_is_enabled(pdev))
+ if (pci_is_enabled(pdev)) {
+ pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
-}
-
-struct nvme_delq_ctx {
- struct task_struct *waiter;
- struct kthread_worker *worker;
- atomic_t refcount;
-};
-
-static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
-{
- dq->waiter = current;
- mb();
-
- for (;;) {
- set_current_state(TASK_KILLABLE);
- if (!atomic_read(&dq->refcount))
- break;
- if (!schedule_timeout(ADMIN_TIMEOUT) ||
- fatal_signal_pending(current)) {
- /*
- * Disable the controller first since we can't trust it
- * at this point, but leave the admin queue enabled
- * until all queue deletion requests are flushed.
- * FIXME: This may take a while if there are more h/w
- * queues than admin tags.
- */
- set_current_state(TASK_RUNNING);
- nvme_disable_ctrl(dev, lo_hi_readq(&dev->bar->cap));
- nvme_clear_queue(dev->queues[0]);
- flush_kthread_worker(dq->worker);
- nvme_disable_queue(dev, 0);
- return;
- }
}
- set_current_state(TASK_RUNNING);
-}
-
-static void nvme_put_dq(struct nvme_delq_ctx *dq)
-{
- atomic_dec(&dq->refcount);
- if (dq->waiter)
- wake_up_process(dq->waiter);
-}
-
-static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq)
-{
- atomic_inc(&dq->refcount);
- return dq;
-}
-
-static void nvme_del_queue_end(struct nvme_queue *nvmeq)
-{
- struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx;
- nvme_put_dq(dq);
-
- spin_lock_irq(&nvmeq->q_lock);
- nvme_process_cq(nvmeq);
- spin_unlock_irq(&nvmeq->q_lock);
-}
-
-static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
- kthread_work_func_t fn)
-{
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.delete_queue.opcode = opcode;
- c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
-
- init_kthread_work(&nvmeq->cmdinfo.work, fn);
- return nvme_submit_admin_async_cmd(nvmeq->dev, &c, &nvmeq->cmdinfo,
- ADMIN_TIMEOUT);
-}
-
-static void nvme_del_cq_work_handler(struct kthread_work *work)
-{
- struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
- cmdinfo.work);
- nvme_del_queue_end(nvmeq);
-}
-
-static int nvme_delete_cq(struct nvme_queue *nvmeq)
-{
- return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq,
- nvme_del_cq_work_handler);
}
-static void nvme_del_sq_work_handler(struct kthread_work *work)
+static int nvme_dev_list_add(struct nvme_dev *dev)
{
- struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
- cmdinfo.work);
- int status = nvmeq->cmdinfo.status;
-
- if (!status)
- status = nvme_delete_cq(nvmeq);
- if (status)
- nvme_del_queue_end(nvmeq);
-}
-
-static int nvme_delete_sq(struct nvme_queue *nvmeq)
-{
- return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq,
- nvme_del_sq_work_handler);
-}
-
-static void nvme_del_queue_start(struct kthread_work *work)
-{
- struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
- cmdinfo.work);
- if (nvme_delete_sq(nvmeq))
- nvme_del_queue_end(nvmeq);
-}
+ bool start_thread = false;
-static void nvme_disable_io_queues(struct nvme_dev *dev)
-{
- int i;
- DEFINE_KTHREAD_WORKER_ONSTACK(worker);
- struct nvme_delq_ctx dq;
- struct task_struct *kworker_task = kthread_run(kthread_worker_fn,
- &worker, "nvme%d", dev->instance);
-
- if (IS_ERR(kworker_task)) {
- dev_err(dev->dev,
- "Failed to create queue del task\n");
- for (i = dev->queue_count - 1; i > 0; i--)
- nvme_disable_queue(dev, i);
- return;
+ spin_lock(&dev_list_lock);
+ if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
+ start_thread = true;
+ nvme_thread = NULL;
}
+ list_add(&dev->node, &dev_list);
+ spin_unlock(&dev_list_lock);
- dq.waiter = NULL;
- atomic_set(&dq.refcount, 0);
- dq.worker = &worker;
- for (i = dev->queue_count - 1; i > 0; i--) {
- struct nvme_queue *nvmeq = dev->queues[i];
+ if (start_thread) {
+ nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
+ wake_up_all(&nvme_kthread_wait);
+ } else
+ wait_event_killable(nvme_kthread_wait, nvme_thread);
- if (nvme_suspend_queue(nvmeq))
- continue;
- nvmeq->cmdinfo.ctx = nvme_get_dq(&dq);
- nvmeq->cmdinfo.worker = dq.worker;
- init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start);
- queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work);
- }
- nvme_wait_dq(&dq, dev);
- kthread_stop(kworker_task);
+ if (IS_ERR_OR_NULL(nvme_thread))
+ return nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
+
+ return 0;
}
/*
kthread_stop(tmp);
}
-static void nvme_freeze_queues(struct nvme_dev *dev)
-{
- struct nvme_ns *ns;
-
- list_for_each_entry(ns, &dev->namespaces, list) {
- blk_mq_freeze_queue_start(ns->queue);
-
- spin_lock_irq(ns->queue->queue_lock);
- queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
- spin_unlock_irq(ns->queue->queue_lock);
-
- blk_mq_cancel_requeue_work(ns->queue);
- blk_mq_stop_hw_queues(ns->queue);
- }
-}
-
-static void nvme_unfreeze_queues(struct nvme_dev *dev)
-{
- struct nvme_ns *ns;
-
- list_for_each_entry(ns, &dev->namespaces, list) {
- queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
- blk_mq_unfreeze_queue(ns->queue);
- blk_mq_start_stopped_hw_queues(ns->queue, true);
- blk_mq_kick_requeue_list(ns->queue);
- }
-}
-
-static void nvme_dev_shutdown(struct nvme_dev *dev)
+static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
int i;
u32 csts = -1;
nvme_dev_list_remove(dev);
+ mutex_lock(&dev->shutdown_lock);
if (dev->bar) {
- nvme_freeze_queues(dev);
- csts = readl(&dev->bar->csts);
+ nvme_stop_queues(&dev->ctrl);
+ csts = readl(dev->bar + NVME_REG_CSTS);
}
if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
for (i = dev->queue_count - 1; i >= 0; i--) {
}
} else {
nvme_disable_io_queues(dev);
- nvme_shutdown_ctrl(dev);
- nvme_disable_queue(dev, 0);
+ nvme_disable_admin_queue(dev, shutdown);
}
nvme_dev_unmap(dev);
for (i = dev->queue_count - 1; i >= 0; i--)
nvme_clear_queue(dev->queues[i]);
-}
-
-static void nvme_dev_remove(struct nvme_dev *dev)
-{
- struct nvme_ns *ns, *next;
-
- if (nvme_io_incapable(dev)) {
- /*
- * If the device is not capable of IO (surprise hot-removal,
- * for example), we need to quiesce prior to deleting the
- * namespaces. This will end outstanding requests and prevent
- * attempts to sync dirty data.
- */
- nvme_dev_shutdown(dev);
- }
- list_for_each_entry_safe(ns, next, &dev->namespaces, list)
- nvme_ns_remove(ns);
+ mutex_unlock(&dev->shutdown_lock);
}
static int nvme_setup_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_small_pool);
}
-static DEFINE_IDA(nvme_instance_ida);
-
-static int nvme_set_instance(struct nvme_dev *dev)
-{
- int instance, error;
-
- do {
- if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
- return -ENODEV;
-
- spin_lock(&dev_list_lock);
- error = ida_get_new(&nvme_instance_ida, &instance);
- spin_unlock(&dev_list_lock);
- } while (error == -EAGAIN);
-
- if (error)
- return -ENODEV;
-
- dev->instance = instance;
- return 0;
-}
-
-static void nvme_release_instance(struct nvme_dev *dev)
-{
- spin_lock(&dev_list_lock);
- ida_remove(&nvme_instance_ida, dev->instance);
- spin_unlock(&dev_list_lock);
-}
-
-static void nvme_free_dev(struct kref *kref)
+static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{
- struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
put_device(dev->dev);
- put_device(dev->device);
- nvme_release_instance(dev);
if (dev->tagset.tags)
blk_mq_free_tag_set(&dev->tagset);
- if (dev->admin_q)
- blk_put_queue(dev->admin_q);
+ if (dev->ctrl.admin_q)
+ blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
kfree(dev->entry);
kfree(dev);
}
-static int nvme_dev_open(struct inode *inode, struct file *f)
-{
- struct nvme_dev *dev;
- int instance = iminor(inode);
- int ret = -ENODEV;
-
- spin_lock(&dev_list_lock);
- list_for_each_entry(dev, &dev_list, node) {
- if (dev->instance == instance) {
- if (!dev->admin_q) {
- ret = -EWOULDBLOCK;
- break;
- }
- if (!kref_get_unless_zero(&dev->kref))
- break;
- f->private_data = dev;
- ret = 0;
- break;
- }
- }
- spin_unlock(&dev_list_lock);
-
- return ret;
-}
-
-static int nvme_dev_release(struct inode *inode, struct file *f)
+static void nvme_reset_work(struct work_struct *work)
{
- struct nvme_dev *dev = f->private_data;
- kref_put(&dev->kref, nvme_free_dev);
- return 0;
-}
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
+ int result;
-static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
-{
- struct nvme_dev *dev = f->private_data;
- struct nvme_ns *ns;
-
- switch (cmd) {
- case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(dev, NULL, (void __user *)arg);
- case NVME_IOCTL_IO_CMD:
- if (list_empty(&dev->namespaces))
- return -ENOTTY;
- ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
- return nvme_user_cmd(dev, ns, (void __user *)arg);
- case NVME_IOCTL_RESET:
- dev_warn(dev->dev, "resetting controller\n");
- return nvme_reset(dev);
- case NVME_IOCTL_SUBSYS_RESET:
- return nvme_subsys_reset(dev);
- default:
- return -ENOTTY;
- }
-}
+ if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
+ goto out;
-static const struct file_operations nvme_dev_fops = {
- .owner = THIS_MODULE,
- .open = nvme_dev_open,
- .release = nvme_dev_release,
- .unlocked_ioctl = nvme_dev_ioctl,
- .compat_ioctl = nvme_dev_ioctl,
-};
+ /*
+ * If we're called to reset a live controller first shut it down before
+ * moving on.
+ */
+ if (dev->bar)
+ nvme_dev_disable(dev, false);
-static void nvme_probe_work(struct work_struct *work)
-{
- struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
- bool start_thread = false;
- int result;
+ set_bit(NVME_CTRL_RESETTING, &dev->flags);
result = nvme_dev_map(dev);
if (result)
if (result)
goto unmap;
- spin_lock(&dev_list_lock);
- if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) {
- start_thread = true;
- nvme_thread = NULL;
- }
- list_add(&dev->node, &dev_list);
- spin_unlock(&dev_list_lock);
-
- if (start_thread) {
- nvme_thread = kthread_run(nvme_kthread, NULL, "nvme");
- wake_up_all(&nvme_kthread_wait);
- } else
- wait_event_killable(nvme_kthread_wait, nvme_thread);
-
- if (IS_ERR_OR_NULL(nvme_thread)) {
- result = nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
- goto disable;
- }
-
nvme_init_queue(dev->queues[0], 0);
result = nvme_alloc_admin_tags(dev);
if (result)
goto disable;
+ result = nvme_init_identify(&dev->ctrl);
+ if (result)
+ goto free_tags;
+
result = nvme_setup_io_queues(dev);
if (result)
goto free_tags;
- dev->event_limit = 1;
+ dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
+
+ result = nvme_dev_list_add(dev);
+ if (result)
+ goto remove;
/*
* Keep the controller around but remove all namespaces if we don't have
*/
if (dev->online_queues < 2) {
dev_warn(dev->dev, "IO queues not created\n");
- nvme_dev_remove(dev);
+ nvme_remove_namespaces(&dev->ctrl);
} else {
- nvme_unfreeze_queues(dev);
+ nvme_start_queues(&dev->ctrl);
nvme_dev_add(dev);
}
+ clear_bit(NVME_CTRL_RESETTING, &dev->flags);
return;
+ remove:
+ nvme_dev_list_remove(dev);
free_tags:
nvme_dev_remove_admin(dev);
- blk_put_queue(dev->admin_q);
- dev->admin_q = NULL;
+ blk_put_queue(dev->ctrl.admin_q);
+ dev->ctrl.admin_q = NULL;
dev->queues[0]->tags = NULL;
disable:
- nvme_disable_queue(dev, 0);
- nvme_dev_list_remove(dev);
+ nvme_disable_admin_queue(dev, false);
unmap:
nvme_dev_unmap(dev);
out:
- if (!work_busy(&dev->reset_work))
- nvme_dead_ctrl(dev);
+ nvme_remove_dead_ctrl(dev);
}
-static int nvme_remove_dead_ctrl(void *arg)
+static void nvme_remove_dead_ctrl_work(struct work_struct *work)
{
- struct nvme_dev *dev = (struct nvme_dev *)arg;
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pci_get_drvdata(pdev))
pci_stop_and_remove_bus_device_locked(pdev);
- kref_put(&dev->kref, nvme_free_dev);
- return 0;
+ nvme_put_ctrl(&dev->ctrl);
}
-static void nvme_dead_ctrl(struct nvme_dev *dev)
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{
- dev_warn(dev->dev, "Device failed to resume\n");
- kref_get(&dev->kref);
- if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
- dev->instance))) {
- dev_err(dev->dev,
- "Failed to start controller remove task\n");
- kref_put(&dev->kref, nvme_free_dev);
- }
+ dev_warn(dev->dev, "Removing after probe failure\n");
+ kref_get(&dev->ctrl.kref);
+ if (!schedule_work(&dev->remove_work))
+ nvme_put_ctrl(&dev->ctrl);
}
-static void nvme_reset_work(struct work_struct *ws)
+static int nvme_reset(struct nvme_dev *dev)
{
- struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work);
- bool in_probe = work_busy(&dev->probe_work);
-
- nvme_dev_shutdown(dev);
+ if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
+ return -ENODEV;
- /* Synchronize with device probe so that work will see failure status
- * and exit gracefully without trying to schedule another reset */
- flush_work(&dev->probe_work);
+ if (!queue_work(nvme_workq, &dev->reset_work))
+ return -EBUSY;
- /* Fail this device if reset occured during probe to avoid
- * infinite initialization loops. */
- if (in_probe) {
- nvme_dead_ctrl(dev);
- return;
- }
- /* Schedule device resume asynchronously so the reset work is available
- * to cleanup errors that may occur during reinitialization */
- schedule_work(&dev->probe_work);
+ flush_work(&dev->reset_work);
+ return 0;
}
-static int __nvme_reset(struct nvme_dev *dev)
+static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
{
- if (work_pending(&dev->reset_work))
- return -EBUSY;
- list_del_init(&dev->node);
- queue_work(nvme_workq, &dev->reset_work);
+ *val = readl(to_nvme_dev(ctrl)->bar + off);
return 0;
}
-static int nvme_reset(struct nvme_dev *dev)
+static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
{
- int ret;
-
- if (!dev->admin_q || blk_queue_dying(dev->admin_q))
- return -ENODEV;
-
- spin_lock(&dev_list_lock);
- ret = __nvme_reset(dev);
- spin_unlock(&dev_list_lock);
-
- if (!ret) {
- flush_work(&dev->reset_work);
- flush_work(&dev->probe_work);
- return 0;
- }
+ writel(val, to_nvme_dev(ctrl)->bar + off);
+ return 0;
+}
- return ret;
+static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+{
+ *val = readq(to_nvme_dev(ctrl)->bar + off);
+ return 0;
}
-static ssize_t nvme_sysfs_reset(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
+static bool nvme_pci_io_incapable(struct nvme_ctrl *ctrl)
{
- struct nvme_dev *ndev = dev_get_drvdata(dev);
- int ret;
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
- ret = nvme_reset(ndev);
- if (ret < 0)
- return ret;
+ return !dev->bar || dev->online_queues < 2;
+}
- return count;
+static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
+{
+ return nvme_reset(to_nvme_dev(ctrl));
}
-static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
+static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+ .reg_read32 = nvme_pci_reg_read32,
+ .reg_write32 = nvme_pci_reg_write32,
+ .reg_read64 = nvme_pci_reg_read64,
+ .io_incapable = nvme_pci_io_incapable,
+ .reset_ctrl = nvme_pci_reset_ctrl,
+ .free_ctrl = nvme_pci_free_ctrl,
+};
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
if (!dev->queues)
goto free;
- INIT_LIST_HEAD(&dev->namespaces);
- INIT_WORK(&dev->reset_work, nvme_reset_work);
dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
- result = nvme_set_instance(dev);
- if (result)
- goto put_pci;
+
+ INIT_LIST_HEAD(&dev->node);
+ INIT_WORK(&dev->scan_work, nvme_dev_scan);
+ INIT_WORK(&dev->reset_work, nvme_reset_work);
+ INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
+ mutex_init(&dev->shutdown_lock);
+ init_completion(&dev->ioq_wait);
result = nvme_setup_prp_pools(dev);
if (result)
- goto release;
-
- kref_init(&dev->kref);
- dev->device = device_create(nvme_class, &pdev->dev,
- MKDEV(nvme_char_major, dev->instance),
- dev, "nvme%d", dev->instance);
- if (IS_ERR(dev->device)) {
- result = PTR_ERR(dev->device);
- goto release_pools;
- }
- get_device(dev->device);
- dev_set_drvdata(dev->device, dev);
+ goto put_pci;
- result = device_create_file(dev->device, &dev_attr_reset_controller);
+ result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
+ id->driver_data);
if (result)
- goto put_dev;
+ goto release_pools;
- INIT_LIST_HEAD(&dev->node);
- INIT_WORK(&dev->scan_work, nvme_dev_scan);
- INIT_WORK(&dev->probe_work, nvme_probe_work);
- schedule_work(&dev->probe_work);
+ queue_work(nvme_workq, &dev->reset_work);
return 0;
- put_dev:
- device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
- put_device(dev->device);
release_pools:
nvme_release_prp_pools(dev);
- release:
- nvme_release_instance(dev);
put_pci:
put_device(dev->dev);
free:
struct nvme_dev *dev = pci_get_drvdata(pdev);
if (prepare)
- nvme_dev_shutdown(dev);
+ nvme_dev_disable(dev, false);
else
- schedule_work(&dev->probe_work);
+ queue_work(nvme_workq, &dev->reset_work);
}
static void nvme_shutdown(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_shutdown(dev);
+ nvme_dev_disable(dev, true);
}
static void nvme_remove(struct pci_dev *pdev)
spin_unlock(&dev_list_lock);
pci_set_drvdata(pdev, NULL);
- flush_work(&dev->probe_work);
flush_work(&dev->reset_work);
flush_work(&dev->scan_work);
- device_remove_file(dev->device, &dev_attr_reset_controller);
- nvme_dev_remove(dev);
- nvme_dev_shutdown(dev);
+ nvme_remove_namespaces(&dev->ctrl);
+ nvme_uninit_ctrl(&dev->ctrl);
+ nvme_dev_disable(dev, true);
nvme_dev_remove_admin(dev);
- device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
nvme_release_cmb(dev);
nvme_release_prp_pools(dev);
- kref_put(&dev->kref, nvme_free_dev);
+ nvme_put_ctrl(&dev->ctrl);
}
-/* These functions are yet to be implemented */
-#define nvme_error_detected NULL
-#define nvme_dump_registers NULL
-#define nvme_link_reset NULL
-#define nvme_slot_reset NULL
-#define nvme_error_resume NULL
-
#ifdef CONFIG_PM_SLEEP
static int nvme_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_dev_shutdown(ndev);
+ nvme_dev_disable(ndev, true);
return 0;
}
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- schedule_work(&ndev->probe_work);
+ queue_work(nvme_workq, &ndev->reset_work);
return 0;
}
#endif
static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);
+static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+ /*
+ * A frozen channel requires a reset. When detected, this method will
+ * shutdown the controller to quiesce. The controller will be restarted
+ * after the slot reset through driver's slot_reset callback.
+ */
+ dev_warn(&pdev->dev, "error detected: state:%d\n", state);
+ switch (state) {
+ case pci_channel_io_normal:
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ nvme_dev_disable(dev, false);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
+{
+ struct nvme_dev *dev = pci_get_drvdata(pdev);
+
+ dev_info(&pdev->dev, "restart after slot reset\n");
+ pci_restore_state(pdev);
+ queue_work(nvme_workq, &dev->reset_work);
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void nvme_error_resume(struct pci_dev *pdev)
+{
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+}
+
static const struct pci_error_handlers nvme_err_handler = {
.error_detected = nvme_error_detected,
- .mmio_enabled = nvme_dump_registers,
- .link_reset = nvme_link_reset,
.slot_reset = nvme_slot_reset,
.resume = nvme_error_resume,
.reset_notify = nvme_reset_notify,
#define PCI_CLASS_STORAGE_EXPRESS 0x010802
static const struct pci_device_id nvme_id_table[] = {
+ { PCI_VDEVICE(INTEL, 0x0953),
+ .driver_data = NVME_QUIRK_STRIPE_SIZE, },
+ { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
+ .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ 0, }
init_waitqueue_head(&nvme_kthread_wait);
- nvme_workq = create_singlethread_workqueue("nvme");
+ nvme_workq = alloc_workqueue("nvme", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
if (!nvme_workq)
return -ENOMEM;
- result = register_blkdev(nvme_major, "nvme");
+ result = nvme_core_init();
if (result < 0)
goto kill_workq;
- else if (result > 0)
- nvme_major = result;
-
- result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
- &nvme_dev_fops);
- if (result < 0)
- goto unregister_blkdev;
- else if (result > 0)
- nvme_char_major = result;
-
- nvme_class = class_create(THIS_MODULE, "nvme");
- if (IS_ERR(nvme_class)) {
- result = PTR_ERR(nvme_class);
- goto unregister_chrdev;
- }
result = pci_register_driver(&nvme_driver);
if (result)
- goto destroy_class;
+ goto core_exit;
return 0;
- destroy_class:
- class_destroy(nvme_class);
- unregister_chrdev:
- __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
- unregister_blkdev:
- unregister_blkdev(nvme_major, "nvme");
+ core_exit:
+ nvme_core_exit();
kill_workq:
destroy_workqueue(nvme_workq);
return result;
static void __exit nvme_exit(void)
{
pci_unregister_driver(&nvme_driver);
- unregister_blkdev(nvme_major, "nvme");
+ nvme_core_exit();
destroy_workqueue(nvme_workq);
- class_destroy(nvme_class);
- __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
_nvme_check_size();
}
struct sg_io_hdr *hdr, u8 *inq_response,
int alloc_len)
{
- struct nvme_dev *dev = ns->dev;
+ struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_id_ns *id_ns;
int res;
int nvme_sc;
u8 resp_data_format = 0x02;
u8 protect;
u8 cmdque = 0x01 << 1;
- u8 fw_offset = sizeof(dev->firmware_rev);
+ u8 fw_offset = sizeof(ctrl->firmware_rev);
/* nvme ns identify - use DPS value for PROTECT field */
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
inq_response[5] = protect; /* sccs=0 | acc=0 | tpgs=0 | pc3=0 */
inq_response[7] = cmdque; /* wbus16=0 | sync=0 | vs=0 */
strncpy(&inq_response[8], "NVMe ", 8);
- strncpy(&inq_response[16], dev->model, 16);
+ strncpy(&inq_response[16], ctrl->model, 16);
- while (dev->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
+ while (ctrl->firmware_rev[fw_offset - 1] == ' ' && fw_offset > 4)
fw_offset--;
fw_offset -= 4;
- strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
+ strncpy(&inq_response[32], ctrl->firmware_rev + fw_offset, 4);
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
struct sg_io_hdr *hdr, u8 *inq_response,
int alloc_len)
{
- struct nvme_dev *dev = ns->dev;
int xfer_len;
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
inq_response[1] = INQ_UNIT_SERIAL_NUMBER_PAGE; /* Page Code */
inq_response[3] = INQ_SERIAL_NUMBER_LENGTH; /* Page Length */
- strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
+ strncpy(&inq_response[4], ns->ctrl->serial, INQ_SERIAL_NUMBER_LENGTH);
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
-static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *inq_response, int alloc_len)
+static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ u8 *inq_response, int alloc_len)
{
- struct nvme_dev *dev = ns->dev;
- int res;
- int nvme_sc;
- int xfer_len;
- __be32 tmp_id = cpu_to_be32(ns->ns_id);
+ struct nvme_id_ns *id_ns;
+ int nvme_sc, res;
+ size_t len;
+ void *eui;
- memset(inq_response, 0, alloc_len);
- inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */
- if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
- struct nvme_id_ns *id_ns;
- void *eui;
- int len;
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
+ res = nvme_trans_status_code(hdr, nvme_sc);
+ if (res)
+ return res;
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- return res;
+ eui = id_ns->eui64;
+ len = sizeof(id_ns->eui64);
- eui = id_ns->eui64;
- len = sizeof(id_ns->eui64);
- if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
- if (bitmap_empty(eui, len * 8)) {
- eui = id_ns->nguid;
- len = sizeof(id_ns->nguid);
- }
- }
+ if (ns->ctrl->vs >= NVME_VS(1, 2)) {
if (bitmap_empty(eui, len * 8)) {
- kfree(id_ns);
- goto scsi_string;
+ eui = id_ns->nguid;
+ len = sizeof(id_ns->nguid);
}
+ }
- inq_response[3] = 4 + len; /* Page Length */
- /* Designation Descriptor start */
- inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
- inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = len; /* Designator Length */
- memcpy(&inq_response[8], eui, len);
- kfree(id_ns);
- } else {
- scsi_string:
- if (alloc_len < 72) {
- return nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- }
- inq_response[3] = 0x48; /* Page Length */
- /* Designation Descriptor start */
- inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
- inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
- inq_response[6] = 0x00; /* Rsvd */
- inq_response[7] = 0x44; /* Designator Length */
-
- sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
- memcpy(&inq_response[12], dev->model, sizeof(dev->model));
- sprintf(&inq_response[52], "%04x", tmp_id);
- memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
+ if (bitmap_empty(eui, len * 8)) {
+ res = -EOPNOTSUPP;
+ goto out_free_id;
}
- xfer_len = alloc_len;
- return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
+
+ memset(inq_response, 0, alloc_len);
+ inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
+ inq_response[3] = 4 + len; /* Page Length */
+
+ /* Designation Descriptor start */
+ inq_response[4] = 0x01; /* Proto ID=0h | Code set=1h */
+ inq_response[5] = 0x02; /* PIV=0b | Asso=00b | Designator Type=2h */
+ inq_response[6] = 0x00; /* Rsvd */
+ inq_response[7] = len; /* Designator Length */
+ memcpy(&inq_response[8], eui, len);
+
+ res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+out_free_id:
+ kfree(id_ns);
+ return res;
+}
+
+static int nvme_fill_device_id_scsi_string(struct nvme_ns *ns,
+ struct sg_io_hdr *hdr, u8 *inq_response, int alloc_len)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ struct nvme_id_ctrl *id_ctrl;
+ int nvme_sc, res;
+
+ if (alloc_len < 72) {
+ return nvme_trans_completion(hdr,
+ SAM_STAT_CHECK_CONDITION,
+ ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
+ SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+ }
+
+ nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
+ res = nvme_trans_status_code(hdr, nvme_sc);
+ if (res)
+ return res;
+
+ memset(inq_response, 0, alloc_len);
+ inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;
+ inq_response[3] = 0x48; /* Page Length */
+
+ /* Designation Descriptor start */
+ inq_response[4] = 0x03; /* Proto ID=0h | Code set=3h */
+ inq_response[5] = 0x08; /* PIV=0b | Asso=00b | Designator Type=8h */
+ inq_response[6] = 0x00; /* Rsvd */
+ inq_response[7] = 0x44; /* Designator Length */
+
+ sprintf(&inq_response[8], "%04x", le16_to_cpu(id_ctrl->vid));
+ memcpy(&inq_response[12], ctrl->model, sizeof(ctrl->model));
+ sprintf(&inq_response[52], "%04x", cpu_to_be32(ns->ns_id));
+ memcpy(&inq_response[56], ctrl->serial, sizeof(ctrl->serial));
+
+ res = nvme_trans_copy_to_user(hdr, inq_response, alloc_len);
+ kfree(id_ctrl);
+ return res;
+}
+
+static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ u8 *resp, int alloc_len)
+{
+ int res;
+
+ if (ns->ctrl->vs >= NVME_VS(1, 1)) {
+ res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len);
+ if (res != -EOPNOTSUPP)
+ return res;
+ }
+
+ return nvme_fill_device_id_scsi_string(ns, hdr, resp, alloc_len);
}
static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *inq_response;
int res;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
+ struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_id_ctrl *id_ctrl;
struct nvme_id_ns *id_ns;
int xfer_len;
if (inq_response == NULL)
return -ENOMEM;
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
goto out_free_inq;
app_chk = protect << 1;
ref_chk = protect;
- nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+ nvme_sc = nvme_identify_ctrl(ctrl, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
goto out_free_inq;
int res;
int xfer_len;
u8 *log_response;
- struct nvme_dev *dev = ns->dev;
struct nvme_smart_log *smart_log;
u8 temp_c;
u16 temp_k;
if (log_response == NULL)
return -ENOMEM;
- res = nvme_get_log_page(dev, &smart_log);
+ res = nvme_get_log_page(ns->ctrl, &smart_log);
if (res < 0)
goto out_free_response;
int res;
int xfer_len;
u8 *log_response;
- struct nvme_dev *dev = ns->dev;
struct nvme_smart_log *smart_log;
u32 feature_resp;
u8 temp_c_cur, temp_c_thresh;
if (log_response == NULL)
return -ENOMEM;
- res = nvme_get_log_page(dev, &smart_log);
+ res = nvme_get_log_page(ns->ctrl, &smart_log);
if (res < 0)
goto out_free_response;
kfree(smart_log);
/* Get Features for Temp Threshold */
- res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
+ res = nvme_get_features(ns->ctrl, NVME_FEAT_TEMP_THRESH, 0, 0,
&feature_resp);
if (res != NVME_SC_SUCCESS)
temp_c_thresh = LOG_TEMP_UNKNOWN;
{
int res;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id_ns;
u8 flbas;
u32 lba_length;
else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
return -EINVAL;
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
{
int res = 0;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
u32 feature_resp;
u8 vwc;
if (len < MODE_PAGE_CACHING_LEN)
return -EINVAL;
- nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
+ nvme_sc = nvme_get_features(ns->ctrl, NVME_FEAT_VOLATILE_WC, 0, 0,
&feature_resp);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
{
int res;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ctrl *id_ctrl;
int lowest_pow_st; /* max npss = lowest power consumption */
unsigned ps_desired = 0;
- nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+ nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
break;
}
- nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
+ nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_POWER_MGMT, ps_desired, 0,
NULL);
return nvme_trans_status_code(hdr, nvme_sc);
}
u8 buffer_id)
{
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
struct nvme_command c;
if (hdr->iovec_count > 0) {
c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
- nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+ nvme_sc = nvme_submit_user_cmd(ns->ctrl->admin_q, &c,
hdr->dxferp, tot_len, NULL, 0);
return nvme_trans_status_code(hdr, nvme_sc);
}
{
int res = 0;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
unsigned dword11;
switch (page_code) {
case MODE_PAGE_CACHING:
dword11 = ((mode_page[2] & CACHING_MODE_PAGE_WCE_MASK) ? 1 : 0);
- nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
- 0, NULL);
+ nvme_sc = nvme_set_features(ns->ctrl, NVME_FEAT_VOLATILE_WC,
+ dword11, 0, NULL);
res = nvme_trans_status_code(hdr, nvme_sc);
break;
case MODE_PAGE_CONTROL:
{
int res = 0;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
u8 flbas;
/*
if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
struct nvme_id_ns *id_ns;
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
{
int res;
int nvme_sc;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id_ns;
u8 i;
u8 flbas, nlbaf;
struct nvme_command c;
/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
c.format.nsid = cpu_to_le32(ns->ns_id);
c.format.cdw10 = cpu_to_le32(cdw10);
- nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
+ nvme_sc = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, NULL, 0);
res = nvme_trans_status_code(hdr, nvme_sc);
kfree(id_ns);
nvme_sc = NVME_SC_LBA_RANGE;
break;
}
- nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+ nvme_sc = nvme_submit_user_cmd(ns->queue, &c,
next_mapping_addr, unit_len, NULL, 0);
if (nvme_sc)
break;
u32 alloc_len;
u32 resp_size;
u32 xfer_len;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id_ns;
u8 *response;
resp_size = READ_CAP_10_RESP_SIZE;
}
- nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
+ nvme_sc = nvme_identify_ns(ns->ctrl, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
int nvme_sc;
u32 alloc_len, xfer_len, resp_size;
u8 *response;
- struct nvme_dev *dev = ns->dev;
struct nvme_id_ctrl *id_ctrl;
u32 ll_length, lun_id;
u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
case ALL_LUNS_RETURNED:
case ALL_WELL_KNOWN_LUNS_RETURNED:
case RESTRICTED_LUNS_RETURNED:
- nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
+ nvme_sc = nvme_identify_ctrl(ns->ctrl, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
return res;
struct sg_io_hdr *hdr,
u8 *cmd)
{
- struct nvme_dev *dev = ns->dev;
-
- if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY))
+ if (nvme_ctrl_ready(ns->ctrl))
return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
NOT_READY, SCSI_ASC_LUN_NOT_READY,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry) {
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return -ENOMEM;
}
inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm);
if (!inode) {
dput(dentry);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return -ENOMEM;
}
inode->i_fop = fops;
inode->i_private = priv;
d_add(dentry, inode);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return 0;
}
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = d_alloc_name(parent, name);
if (!dentry) {
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return NULL;
}
inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755);
if (!inode) {
dput(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return NULL;
}
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
d_add(dentry, inode);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return dentry;
}
static const struct file_operations tw_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = tw_chrdev_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = tw_chrdev_ioctl,
+#endif
.open = tw_chrdev_open,
.release = NULL,
.llseek = noop_llseek,
tristate "IBM Power Linux RAID adapter support"
depends on PCI && SCSI && ATA
select FW_LOADER
+ select IRQ_POLL
---help---
This driver supports the IBM Power Linux family RAID adapters.
This includes IBM pSeries 5712, 5703, 5709, and 570A, as well
ST-DMA, replacing ACSI). It does NOT support other schemes, like
in the Hades (without DMA).
-config ATARI_SCSI_TOSHIBA_DELAY
- bool "Long delays for Toshiba CD-ROMs"
- depends on ATARI_SCSI
- help
- This option increases the delay after a SCSI arbitration to
- accommodate some flaky Toshiba CD-ROM drives. Say Y if you intend to
- use a Toshiba CD-ROM drive; otherwise, the option is not needed and
- would impact performance a bit, so say N.
-
-config ATARI_SCSI_RESET_BOOT
- bool "Reset SCSI-devices at boottime"
- depends on ATARI_SCSI
- help
- Reset the devices on your Atari whenever it boots. This makes the
- boot process fractionally longer but may assist recovery from errors
- that leave the devices with SCSI operations partway completed.
-
config MAC_SCSI
tristate "Macintosh NCR5380 SCSI"
depends on MAC && SCSI=y
-/*
+/*
* NCR 5380 generic driver routines. These should make it *trivial*
- * to implement 5380 SCSI drivers under Linux with a non-trantor
- * architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
*
- * Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
*
* Copyright 1993, Drew Eckhardt
- * Visionary Computing
- * (Unix and Linux consulting and custom programming)
- * drew@colorado.edu
- * +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
*
- * For more information, please consult
+ * For more information, please consult
*
* NCR 5380 Family
* SCSI Protocol Controller
*/
/*
- * Revision 1.10 1998/9/2 Alan Cox
- * (alan@lxorguk.ukuu.org.uk)
- * Fixed up the timer lockups reported so far. Things still suck. Looking
- * forward to 2.3 and per device request queues. Then it'll be possible to
- * SMP thread this beast and improve life no end.
-
- * Revision 1.9 1997/7/27 Ronald van Cuijlenborg
- * (ronald.van.cuijlenborg@tip.nl or nutty@dds.nl)
- * (hopefully) fixed and enhanced USLEEP
- * added support for DTC3181E card (for Mustek scanner)
- *
-
- * Revision 1.8 Ingmar Baumgart
- * (ingmar@gonzo.schwaben.de)
- * added support for NCR53C400a card
- *
-
- * Revision 1.7 1996/3/2 Ray Van Tassle (rayvt@comm.mot.com)
- * added proc_info
- * added support needed for DTC 3180/3280
- * fixed a couple of bugs
- *
-
- * Revision 1.5 1994/01/19 09:14:57 drew
- * Fixed udelay() hack that was being used on DATAOUT phases
- * instead of a proper wait for the final handshake.
- *
- * Revision 1.4 1994/01/19 06:44:25 drew
- * *** empty log message ***
- *
- * Revision 1.3 1994/01/19 05:24:40 drew
- * Added support for TCR LAST_BYTE_SENT bit.
- *
- * Revision 1.2 1994/01/15 06:14:11 drew
- * REAL DMA support, bug fixes.
- *
- * Revision 1.1 1994/01/15 06:00:54 drew
- * Initial revision
- *
+ * With contributions from Ray Van Tassle, Ingmar Baumgart,
+ * Ronald van Cuijlenborg, Alan Cox and others.
*/
/*
- * Further development / testing that should be done :
+ * Further development / testing that should be done :
* 1. Cleanup the NCR5380_transfer_dma function and DMA operation complete
- * code so that everything does the same thing that's done at the
- * end of a pseudo-DMA read operation.
+ * code so that everything does the same thing that's done at the
+ * end of a pseudo-DMA read operation.
*
* 2. Fix REAL_DMA (interrupt driven, polled works fine) -
- * basically, transfer size needs to be reduced by one
- * and the last byte read as is done with PSEUDO_DMA.
- *
- * 4. Test SCSI-II tagged queueing (I have no devices which support
- * tagged queueing)
- *
- * 5. Test linked command handling code after Eric is ready with
- * the high level code.
+ * basically, transfer size needs to be reduced by one
+ * and the last byte read as is done with PSEUDO_DMA.
+ *
+ * 4. Test SCSI-II tagged queueing (I have no devices which support
+ * tagged queueing)
*/
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x,y) {printk("LINE:%d Adding %p to %p\n", __LINE__, (void*)(x), (void*)(y)); if ((x)==(y)) udelay(5); }
-#define REMOVE(w,x,y,z) {printk("LINE:%d Removing: %p->%p %p->%p \n", __LINE__, (void*)(w), (void*)(x), (void*)(y), (void*)(z)); if ((x)==(y)) udelay(5); }
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
#ifndef notyet
-#undef LINKED
#undef REAL_DMA
#endif
-#ifdef REAL_DMA_POLL
-#undef READ_OVERRUNS
-#define READ_OVERRUNS
-#endif
-
#ifdef BOARD_REQUIRES_NO_DELAY
#define io_recovery_delay(x)
#else
/*
* Design
*
- * This is a generic 5380 driver. To use it on a different platform,
+ * This is a generic 5380 driver. To use it on a different platform,
* one simply writes appropriate system specific macros (ie, data
- * transfer - some PC's will use the I/O bus, 68K's must use
+ * transfer - some PC's will use the I/O bus, 68K's must use
* memory mapped) and drops this file in their 'C' wrapper.
*
- * (Note from hch: unfortunately it was not enough for the different
- * m68k folks and instead of improving this driver they copied it
- * and hacked it up for their needs. As a consequence they lost
- * most updates to this driver. Maybe someone will fix all these
- * drivers to use a common core one day..)
- *
- * As far as command queueing, two queues are maintained for
+ * As far as command queueing, two queues are maintained for
* each 5380 in the system - commands that haven't been issued yet,
- * and commands that are currently executing. This means that an
- * unlimited number of commands may be queued, letting
- * more commands propagate from the higher driver levels giving higher
- * throughput. Note that both I_T_L and I_T_L_Q nexuses are supported,
- * allowing multiple commands to propagate all the way to a SCSI-II device
+ * and commands that are currently executing. This means that an
+ * unlimited number of commands may be queued, letting
+ * more commands propagate from the higher driver levels giving higher
+ * throughput. Note that both I_T_L and I_T_L_Q nexuses are supported,
+ * allowing multiple commands to propagate all the way to a SCSI-II device
* while a command is already executing.
*
*
- * Issues specific to the NCR5380 :
- *
- * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead
- * piece of hardware that requires you to sit in a loop polling for
- * the REQ signal as long as you are connected. Some devices are
- * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
- * while doing long seek operations.
- *
- * The workaround for this is to keep track of devices that have
- * disconnected. If the device hasn't disconnected, for commands that
- * should disconnect, we do something like
+ * Issues specific to the NCR5380 :
*
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- *
- * Some tweaking of N and M needs to be done. An algorithm based
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these
+ * When used in a PIO or pseudo-dma mode, the NCR5380 is a braindead
+ * piece of hardware that requires you to sit in a loop polling for
+ * the REQ signal as long as you are connected. Some devices are
+ * brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
+ * while doing long seek operations. [...] These
* broken devices are the exception rather than the rule and I'd rather
* spend my time optimizing for the normal case.
*
* which is started from a workqueue for each NCR5380 host in the
* system. It attempts to establish I_T_L or I_T_L_Q nexuses by
* removing the commands from the issue queue and calling
- * NCR5380_select() if a nexus is not established.
+ * NCR5380_select() if a nexus is not established.
*
* Once a nexus is established, the NCR5380_information_transfer()
* phase goes through the various phases as instructed by the target.
* if the target goes into MSG IN and sends a DISCONNECT message,
* the command structure is placed into the per instance disconnected
- * queue, and NCR5380_main tries to find more work. If the target is
+ * queue, and NCR5380_main tries to find more work. If the target is
* idle for too long, the system will try to sleep.
*
* If a command has disconnected, eventually an interrupt will trigger,
* calling NCR5380_intr() which will in turn call NCR5380_reselect
* to reestablish a nexus. This will run main if necessary.
*
- * On command termination, the done function will be called as
+ * On command termination, the done function will be called as
* appropriate.
*
- * SCSI pointers are maintained in the SCp field of SCSI command
+ * SCSI pointers are maintained in the SCp field of SCSI command
* structures, being initialized after the command is connected
* in NCR5380_select, and set as appropriate in NCR5380_information_transfer.
* Note that in violation of the standard, an implicit SAVE POINTERS operation
/*
* Using this file :
* This file a skeleton Linux SCSI driver for the NCR 5380 series
- * of chips. To use it, you write an architecture specific functions
+ * of chips. To use it, you write an architecture specific functions
* and macros and include this file in your driver.
*
- * These macros control options :
- * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be
- * defined.
- *
+ * These macros control options :
+ * AUTOPROBE_IRQ - if defined, the NCR5380_probe_irq() function will be
+ * defined.
+ *
* AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- * for commands that return with a CHECK CONDITION status.
+ * for commands that return with a CHECK CONDITION status.
*
* DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- * transceivers.
+ * transceivers.
*
* DONT_USE_INTR - if defined, never use interrupts, even if we probe or
- * override-configure an IRQ.
- *
- * LIMIT_TRANSFERSIZE - if defined, limit the pseudo-dma transfers to 512
- * bytes at a time. Since interrupts are disabled by default during
- * these transfers, we might need this to give reasonable interrupt
- * service time if the transfer size gets too large.
- *
- * LINKED - if defined, linked commands are supported.
+ * override-configure an IRQ.
*
* PSEUDO_DMA - if defined, PSEUDO DMA is used during the data transfer phases.
*
* REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
*
* REAL_DMA_POLL - if defined, REAL DMA is used but the driver doesn't
- * rely on phase mismatch and EOP interrupts to determine end
- * of phase.
- *
- * UNSAFE - leave interrupts enabled during pseudo-DMA transfers. You
- * only really want to use this if you're having a problem with
- * dropped characters during high speed communications, and even
- * then, you're going to be better off twiddling with transfersize
- * in the high level code.
- *
- * Defaults for these will be provided although the user may want to adjust
- * these to allocate CPU resources to the SCSI driver or "real" code.
- *
- * USLEEP_SLEEP - amount of time, in jiffies, to sleep
- *
- * USLEEP_POLL - amount of time, in jiffies, to poll
+ * rely on phase mismatch and EOP interrupts to determine end
+ * of phase.
*
* These macros MUST be defined :
- * NCR5380_local_declare() - declare any local variables needed for your
- * transfer routines.
*
- * NCR5380_setup(instance) - initialize any local variables needed from a given
- * instance of the host adapter for NCR5380_{read,write,pread,pwrite}
- *
* NCR5380_read(register) - read from the specified register
*
- * NCR5380_write(register, value) - write to the specific register
+ * NCR5380_write(register, value) - write to the specific register
*
- * NCR5380_implementation_fields - additional fields needed for this
- * specific implementation of the NCR5380
+ * NCR5380_implementation_fields - additional fields needed for this
+ * specific implementation of the NCR5380
*
* Either real DMA *or* pseudo DMA may be implemented
- * REAL functions :
+ * REAL functions :
* NCR5380_REAL_DMA should be defined if real DMA is to be used.
- * Note that the DMA setup functions should return the number of bytes
- * that they were able to program the controller for.
+ * Note that the DMA setup functions should return the number of bytes
+ * that they were able to program the controller for.
*
- * Also note that generic i386/PC versions of these macros are
- * available as NCR5380_i386_dma_write_setup,
- * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * Also note that generic i386/PC versions of these macros are
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
*
* NCR5380_dma_write_setup(instance, src, count) - initialize
* NCR5380_dma_read_setup(instance, dst, count) - initialize
* NCR5380_pread(instance, dst, count);
*
* The generic driver is initialized by calling NCR5380_init(instance),
- * after setting the appropriate host specific fields and ID. If the
+ * after setting the appropriate host specific fields and ID. If the
* driver wishes to autoprobe for an IRQ line, the NCR5380_probe_irq(instance,
* possible) function may be used.
*/
-static int do_abort(struct Scsi_Host *host);
-static void do_reset(struct Scsi_Host *host);
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
-/*
- * initialize_SCp - init the scsi pointer field
- * @cmd: command block to set up
+/**
+ * initialize_SCp - init the scsi pointer field
+ * @cmd: command block to set up
*
- * Set up the internal fields in the SCSI command.
+ * Set up the internal fields in the SCSI command.
*/
static inline void initialize_SCp(struct scsi_cmnd *cmd)
{
- /*
- * Initialize the Scsi Pointer field so that all of the commands in the
+ /*
+ * Initialize the Scsi Pointer field so that all of the commands in the
* various queues are valid.
*/
cmd->SCp.ptr = NULL;
cmd->SCp.this_residual = 0;
}
+
+ cmd->SCp.Status = 0;
+ cmd->SCp.Message = 0;
}
/**
- * NCR5380_poll_politely - wait for NCR5380 status bits
- * @instance: controller to poll
- * @reg: 5380 register to poll
- * @bit: Bitmask to check
- * @val: Value required to exit
- *
- * Polls the NCR5380 in a reasonably efficient manner waiting for
- * an event to occur, after a short quick poll we begin giving the
- * CPU back in non IRQ contexts
- *
- * Returns the value of the register or a negative error code.
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
*/
-
-static int NCR5380_poll_politely(struct Scsi_Host *instance, int reg, int bit, int val, int t)
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+ int reg1, int bit1, int val1,
+ int reg2, int bit2, int val2, int wait)
{
- NCR5380_local_declare();
- int n = 500; /* At about 8uS a cycle for the cpu access */
- unsigned long end = jiffies + t;
- int r;
-
- NCR5380_setup(instance);
-
- while( n-- > 0)
- {
- r = NCR5380_read(reg);
- if((r & bit) == val)
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ unsigned long deadline = jiffies + wait;
+ unsigned long n;
+
+ /* Busy-wait for up to 10 ms */
+ n = min(10000U, jiffies_to_usecs(wait));
+ n *= hostdata->accesses_per_ms;
+ n /= 2000;
+ do {
+ if ((NCR5380_read(reg1) & bit1) == val1)
+ return 0;
+ if ((NCR5380_read(reg2) & bit2) == val2)
return 0;
cpu_relax();
- }
-
- /* t time yet ? */
- while(time_before(jiffies, end))
- {
- r = NCR5380_read(reg);
- if((r & bit) == val)
+ } while (n--);
+
+ if (irqs_disabled() || in_interrupt())
+ return -ETIMEDOUT;
+
+ /* Repeatedly sleep for 1 ms until deadline */
+ while (time_is_after_jiffies(deadline)) {
+ schedule_timeout_uninterruptible(1);
+ if ((NCR5380_read(reg1) & bit1) == val1)
+ return 0;
+ if ((NCR5380_read(reg2) & bit2) == val2)
return 0;
- if(!in_interrupt())
- cond_resched();
- else
- cpu_relax();
}
+
return -ETIMEDOUT;
}
-static struct {
- unsigned char value;
- const char *name;
-} phases[] __maybe_unused = {
- {PHASE_DATAOUT, "DATAOUT"},
- {PHASE_DATAIN, "DATAIN"},
- {PHASE_CMDOUT, "CMDOUT"},
- {PHASE_STATIN, "STATIN"},
- {PHASE_MSGOUT, "MSGOUT"},
- {PHASE_MSGIN, "MSGIN"},
- {PHASE_UNKNOWN, "UNKNOWN"}
-};
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+ int reg, int bit, int val, int wait)
+{
+ return NCR5380_poll_politely2(instance, reg, bit, val,
+ reg, bit, val, wait);
+}
#if NDEBUG
static struct {
unsigned char mask;
const char *name;
-} signals[] = {
- {SR_DBP, "PARITY"},
- {SR_RST, "RST"},
- {SR_BSY, "BSY"},
- {SR_REQ, "REQ"},
- {SR_MSG, "MSG"},
- {SR_CD, "CD"},
- {SR_IO, "IO"},
- {SR_SEL, "SEL"},
+} signals[] = {
+ {SR_DBP, "PARITY"},
+ {SR_RST, "RST"},
+ {SR_BSY, "BSY"},
+ {SR_REQ, "REQ"},
+ {SR_MSG, "MSG"},
+ {SR_CD, "CD"},
+ {SR_IO, "IO"},
+ {SR_SEL, "SEL"},
{0, NULL}
-},
+},
basrs[] = {
- {BASR_ATN, "ATN"},
- {BASR_ACK, "ACK"},
+ {BASR_ATN, "ATN"},
+ {BASR_ACK, "ACK"},
{0, NULL}
-},
-icrs[] = {
- {ICR_ASSERT_RST, "ASSERT RST"},
- {ICR_ASSERT_ACK, "ASSERT ACK"},
- {ICR_ASSERT_BSY, "ASSERT BSY"},
- {ICR_ASSERT_SEL, "ASSERT SEL"},
- {ICR_ASSERT_ATN, "ASSERT ATN"},
- {ICR_ASSERT_DATA, "ASSERT DATA"},
+},
+icrs[] = {
+ {ICR_ASSERT_RST, "ASSERT RST"},
+ {ICR_ASSERT_ACK, "ASSERT ACK"},
+ {ICR_ASSERT_BSY, "ASSERT BSY"},
+ {ICR_ASSERT_SEL, "ASSERT SEL"},
+ {ICR_ASSERT_ATN, "ASSERT ATN"},
+ {ICR_ASSERT_DATA, "ASSERT DATA"},
{0, NULL}
-},
-mrs[] = {
- {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
- {MR_TARGET, "MODE TARGET"},
- {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
- {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
- {MR_MONITOR_BSY, "MODE MONITOR BSY"},
- {MR_DMA_MODE, "MODE DMA"},
- {MR_ARBITRATE, "MODE ARBITRATION"},
+},
+mrs[] = {
+ {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+ {MR_TARGET, "MODE TARGET"},
+ {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+ {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+ {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
+ {MR_MONITOR_BSY, "MODE MONITOR BSY"},
+ {MR_DMA_MODE, "MODE DMA"},
+ {MR_ARBITRATE, "MODE ARBITRATION"},
{0, NULL}
};
/**
- * NCR5380_print - print scsi bus signals
- * @instance: adapter state to dump
- *
- * Print the SCSI bus signals for debugging purposes
+ * NCR5380_print - print scsi bus signals
+ * @instance: adapter state to dump
*
- * Locks: caller holds hostdata lock (not essential)
+ * Print the SCSI bus signals for debugging purposes
*/
static void NCR5380_print(struct Scsi_Host *instance)
{
- NCR5380_local_declare();
unsigned char status, data, basr, mr, icr, i;
- NCR5380_setup(instance);
data = NCR5380_read(CURRENT_SCSI_DATA_REG);
status = NCR5380_read(STATUS_REG);
printk("\n");
}
+static struct {
+ unsigned char value;
+ const char *name;
+} phases[] = {
+ {PHASE_DATAOUT, "DATAOUT"},
+ {PHASE_DATAIN, "DATAIN"},
+ {PHASE_CMDOUT, "CMDOUT"},
+ {PHASE_STATIN, "STATIN"},
+ {PHASE_MSGOUT, "MSGOUT"},
+ {PHASE_MSGIN, "MSGIN"},
+ {PHASE_UNKNOWN, "UNKNOWN"}
+};
-/*
- * NCR5380_print_phase - show SCSI phase
- * @instance: adapter to dump
- *
- * Print the current SCSI phase for debugging purposes
+/**
+ * NCR5380_print_phase - show SCSI phase
+ * @instance: adapter to dump
*
- * Locks: none
+ * Print the current SCSI phase for debugging purposes
*/
static void NCR5380_print_phase(struct Scsi_Host *instance)
{
- NCR5380_local_declare();
unsigned char status;
int i;
- NCR5380_setup(instance);
status = NCR5380_read(STATUS_REG);
if (!(status & SR_REQ))
- printk("scsi%d : REQ not asserted, phase unknown.\n", instance->host_no);
+ shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
else {
- for (i = 0; (phases[i].value != PHASE_UNKNOWN) && (phases[i].value != (status & PHASE_MASK)); ++i);
- printk("scsi%d : phase %s\n", instance->host_no, phases[i].name);
+ for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
+ (phases[i].value != (status & PHASE_MASK)); ++i)
+ ;
+ shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
}
}
#endif
-/*
- * These need tweaking, and would probably work best as per-device
- * flags initialized differently for disk, tape, cd, etc devices.
- * People with broken devices are free to experiment as to what gives
- * the best results for them.
- *
- * USLEEP_SLEEP should be a minimum seek time.
- *
- * USLEEP_POLL should be a maximum rotational latency.
- */
-#ifndef USLEEP_SLEEP
-/* 20 ms (reasonable hard disk speed) */
-#define USLEEP_SLEEP msecs_to_jiffies(20)
-#endif
-/* 300 RPM (floppy speed) */
-#ifndef USLEEP_POLL
-#define USLEEP_POLL msecs_to_jiffies(200)
-#endif
-#ifndef USLEEP_WAITLONG
-/* RvC: (reasonable time to wait on select error) */
-#define USLEEP_WAITLONG USLEEP_SLEEP
-#endif
-
-/*
- * Function : int should_disconnect (unsigned char cmd)
- *
- * Purpose : decide whether a command would normally disconnect or
- * not, since if it won't disconnect we should go to sleep.
- *
- * Input : cmd - opcode of SCSI command
- *
- * Returns : DISCONNECT_LONG if we should disconnect for a really long
- * time (ie always, sleep, look for REQ active, sleep),
- * DISCONNECT_TIME_TO_DATA if we would only disconnect for a normal
- * time-to-data delay, DISCONNECT_NONE if this command would return
- * immediately.
- *
- * Future sleep algorithms based on time to data can exploit
- * something like this so they can differentiate between "normal"
- * (ie, read, write, seek) and unusual commands (ie, * format).
- *
- * Note : We don't deal with commands that handle an immediate disconnect,
- *
- */
-static int should_disconnect(unsigned char cmd)
-{
- switch (cmd) {
- case READ_6:
- case WRITE_6:
- case SEEK_6:
- case READ_10:
- case WRITE_10:
- case SEEK_10:
- return DISCONNECT_TIME_TO_DATA;
- case FORMAT_UNIT:
- case SEARCH_HIGH:
- case SEARCH_LOW:
- case SEARCH_EQUAL:
- return DISCONNECT_LONG;
- default:
- return DISCONNECT_NONE;
- }
-}
-
-static void NCR5380_set_timer(struct NCR5380_hostdata *hostdata, unsigned long timeout)
-{
- hostdata->time_expires = jiffies + timeout;
- schedule_delayed_work(&hostdata->coroutine, timeout);
-}
-
-
-static int probe_irq __initdata = 0;
+static int probe_irq __initdata;
/**
- * probe_intr - helper for IRQ autoprobe
- * @irq: interrupt number
- * @dev_id: unused
- * @regs: unused
+ * probe_intr - helper for IRQ autoprobe
+ * @irq: interrupt number
+ * @dev_id: unused
+ * @regs: unused
*
- * Set a flag to indicate the IRQ in question was received. This is
- * used by the IRQ probe code.
+ * Set a flag to indicate the IRQ in question was received. This is
+ * used by the IRQ probe code.
*/
-
+
static irqreturn_t __init probe_intr(int irq, void *dev_id)
{
probe_irq = irq;
}
/**
- * NCR5380_probe_irq - find the IRQ of an NCR5380
- * @instance: NCR5380 controller
- * @possible: bitmask of ISA IRQ lines
- *
- * Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
- * and then looking to see what interrupt actually turned up.
+ * NCR5380_probe_irq - find the IRQ of an NCR5380
+ * @instance: NCR5380 controller
+ * @possible: bitmask of ISA IRQ lines
*
- * Locks: none, irqs must be enabled on entry
+ * Autoprobe for the IRQ line used by the NCR5380 by triggering an IRQ
+ * and then looking to see what interrupt actually turned up.
*/
static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
int possible)
{
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned long timeout;
int trying_irqs, i, mask;
- NCR5380_setup(instance);
for (trying_irqs = 0, i = 1, mask = 2; i < 16; ++i, mask <<= 1)
if ((mask & possible) && (request_irq(i, &probe_intr, 0, "NCR-probe", NULL) == 0))
/*
* A interrupt is triggered whenever BSY = false, SEL = true
- * and a bit set in the SELECT_ENABLE_REG is asserted on the
+ * and a bit set in the SELECT_ENABLE_REG is asserted on the
* SCSI bus.
*
* Note that the bus is only driven when the phase control signals
while (probe_irq == NO_IRQ && time_before(jiffies, timeout))
schedule_timeout_uninterruptible(1);
-
+
NCR5380_write(SELECT_ENABLE_REG, 0);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
}
/**
- * NCR58380_info - report driver and host information
- * @instance: relevant scsi host instance
- *
- * For use as the host template info() handler.
+ * NCR58380_info - report driver and host information
+ * @instance: relevant scsi host instance
*
- * Locks: none
+ * For use as the host template info() handler.
*/
static const char *NCR5380_info(struct Scsi_Host *instance)
"can_queue %d, cmd_per_lun %d, "
"sg_tablesize %d, this_id %d, "
"flags { %s%s%s}, "
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
- "USLEEP_POLL %lu, USLEEP_WAITLONG %lu, "
-#endif
"options { %s} ",
instance->hostt->name, instance->io_port, instance->n_io_port,
instance->base, instance->irq,
instance->can_queue, instance->cmd_per_lun,
instance->sg_tablesize, instance->this_id,
- hostdata->flags & FLAG_NCR53C400 ? "NCR53C400 " : "",
- hostdata->flags & FLAG_DTC3181E ? "DTC3181E " : "",
+ hostdata->flags & FLAG_NO_DMA_FIXUP ? "NO_DMA_FIXUP " : "",
hostdata->flags & FLAG_NO_PSEUDO_DMA ? "NO_PSEUDO_DMA " : "",
-#if defined(USLEEP_POLL) && defined(USLEEP_WAITLONG)
- USLEEP_POLL, USLEEP_WAITLONG,
-#endif
+ hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY " : "",
#ifdef AUTOPROBE_IRQ
"AUTOPROBE_IRQ "
#endif
#endif
#ifdef PSEUDO_DMA
"PSEUDO_DMA "
-#endif
-#ifdef UNSAFE
- "UNSAFE "
-#endif
-#ifdef NCR53C400
- "NCR53C400 "
#endif
"");
}
-/**
- * NCR5380_print_status - dump controller info
- * @instance: controller to dump
- *
- * Print commands in the various queues, called from NCR5380_abort
- * and NCR5380_debug to aid debugging.
- *
- * Locks: called functions disable irqs
- */
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
- NCR5380_dprint(NDEBUG_ANY, instance);
- NCR5380_dprint_phase(NDEBUG_ANY, instance);
-}
-
#ifdef PSEUDO_DMA
-/******************************************/
-/*
- * /proc/scsi/[dtc pas16 t128 generic]/[0-ASC_NUM_BOARD_SUPPORTED]
- *
- * *buffer: I/O buffer
- * **start: if inout == FALSE pointer into buffer where user read should start
- * offset: current offset
- * length: length of buffer
- * hostno: Scsi_Host host_no
- * inout: TRUE - user is writing; FALSE - user is reading
- *
- * Return the number of bytes read from or written
- */
-
static int __maybe_unused NCR5380_write_info(struct Scsi_Host *instance,
char *buffer, int length)
{
hostdata->spin_max_w = 0;
return 0;
}
-#endif
-
-static
-void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m);
-static
-void lprint_command(unsigned char *cmd, struct seq_file *m);
-static
-void lprint_opcode(int opcode, struct seq_file *m);
static int __maybe_unused NCR5380_show_info(struct seq_file *m,
- struct Scsi_Host *instance)
+ struct Scsi_Host *instance)
{
- struct NCR5380_hostdata *hostdata;
- struct scsi_cmnd *ptr;
-
- hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
-#ifdef PSEUDO_DMA
seq_printf(m, "Highwater I/O busy spin counts: write %d, read %d\n",
hostdata->spin_max_w, hostdata->spin_max_r);
-#endif
- spin_lock_irq(instance->host_lock);
- if (!hostdata->connected)
- seq_printf(m, "scsi%d: no currently connected command\n", instance->host_no);
- else
- lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
- seq_printf(m, "scsi%d: issue_queue\n", instance->host_no);
- for (ptr = (struct scsi_cmnd *) hostdata->issue_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
- lprint_Scsi_Cmnd(ptr, m);
-
- seq_printf(m, "scsi%d: disconnected_queue\n", instance->host_no);
- for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr; ptr = (struct scsi_cmnd *) ptr->host_scribble)
- lprint_Scsi_Cmnd(ptr, m);
- spin_unlock_irq(instance->host_lock);
return 0;
}
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
- seq_printf(m, "scsi%d : destination target %d, lun %llu\n", cmd->device->host->host_no, cmd->device->id, cmd->device->lun);
- seq_puts(m, " command = ");
- lprint_command(cmd->cmnd, m);
-}
-
-static void lprint_command(unsigned char *command, struct seq_file *m)
-{
- int i, s;
- lprint_opcode(command[0], m);
- for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
- seq_printf(m, "%02x ", command[i]);
- seq_putc(m, '\n');
-}
-
-static void lprint_opcode(int opcode, struct seq_file *m)
-{
- seq_printf(m, "%2d (0x%02x)", opcode, opcode);
-}
-
+#endif
/**
- * NCR5380_init - initialise an NCR5380
- * @instance: adapter to configure
- * @flags: control flags
+ * NCR5380_init - initialise an NCR5380
+ * @instance: adapter to configure
+ * @flags: control flags
*
- * Initializes *instance and corresponding 5380 chip,
- * with flags OR'd into the initial flags value.
+ * Initializes *instance and corresponding 5380 chip,
+ * with flags OR'd into the initial flags value.
*
- * Notes : I assume that the host, hostno, and id bits have been
- * set correctly. I don't care about the irq and other fields.
+ * Notes : I assume that the host, hostno, and id bits have been
+ * set correctly. I don't care about the irq and other fields.
*
- * Returns 0 for success
- *
- * Locks: interrupts must be enabled when we are called
+ * Returns 0 for success
*/
static int NCR5380_init(struct Scsi_Host *instance, int flags)
{
- NCR5380_local_declare();
- int i, pass;
- unsigned long timeout;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
- if(in_interrupt())
- printk(KERN_ERR "NCR5380_init called with interrupts off!\n");
- /*
- * On NCR53C400 boards, NCR5380 registers are mapped 8 past
- * the base address.
- */
-
-#ifdef NCR53C400
- if (flags & FLAG_NCR53C400)
- instance->NCR5380_instance_name += NCR53C400_address_adjust;
-#endif
-
- NCR5380_setup(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int i;
+ unsigned long deadline;
- hostdata->aborted = 0;
+ hostdata->host = instance;
hostdata->id_mask = 1 << instance->this_id;
+ hostdata->id_higher_mask = 0;
for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
if (i > hostdata->id_mask)
hostdata->id_higher_mask |= i;
#ifdef REAL_DMA
hostdata->dmalen = 0;
#endif
- hostdata->targets_present = 0;
+ spin_lock_init(&hostdata->lock);
hostdata->connected = NULL;
- hostdata->issue_queue = NULL;
- hostdata->disconnected_queue = NULL;
-
- INIT_DELAYED_WORK(&hostdata->coroutine, NCR5380_main);
-
- /* The CHECK code seems to break the 53C400. Will check it later maybe */
- if (flags & FLAG_NCR53C400)
- hostdata->flags = FLAG_HAS_LAST_BYTE_SENT | flags;
- else
- hostdata->flags = FLAG_CHECK_LAST_BYTE_SENT | flags;
+ hostdata->sensing = NULL;
+ INIT_LIST_HEAD(&hostdata->autosense);
+ INIT_LIST_HEAD(&hostdata->unissued);
+ INIT_LIST_HEAD(&hostdata->disconnected);
- hostdata->host = instance;
- hostdata->time_expires = 0;
+ hostdata->flags = flags;
+
+ INIT_WORK(&hostdata->main_task, NCR5380_main);
+ hostdata->work_q = alloc_workqueue("ncr5380_%d",
+ WQ_UNBOUND | WQ_MEM_RECLAIM,
+ 1, instance->host_no);
+ if (!hostdata->work_q)
+ return -ENOMEM;
prepare_info(instance);
NCR5380_write(TARGET_COMMAND_REG, 0);
NCR5380_write(SELECT_ENABLE_REG, 0);
-#ifdef NCR53C400
- if (hostdata->flags & FLAG_NCR53C400) {
- NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
- }
-#endif
+ /* Calibrate register polling loop */
+ i = 0;
+ deadline = jiffies + 1;
+ do {
+ cpu_relax();
+ } while (time_is_after_jiffies(deadline));
+ deadline += msecs_to_jiffies(256);
+ do {
+ NCR5380_read(STATUS_REG);
+ ++i;
+ cpu_relax();
+ } while (time_is_after_jiffies(deadline));
+ hostdata->accesses_per_ms = i / 256;
- /*
- * Detect and correct bus wedge problems.
- *
- * If the system crashed, it may have crashed in a state
- * where a SCSI command was still executing, and the
- * SCSI bus is not in a BUS FREE STATE.
- *
- * If this is the case, we'll try to abort the currently
- * established nexus which we know nothing about, and that
- * failing, do a hard reset of the SCSI bus
- */
+ return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int pass;
for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
switch (pass) {
case 1:
case 3:
case 5:
- printk(KERN_INFO "scsi%d: SCSI bus busy, waiting up to five seconds\n", instance->host_no);
- timeout = jiffies + 5 * HZ;
- NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, 0, 5*HZ);
+ shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+ NCR5380_poll_politely(instance,
+ STATUS_REG, SR_BSY, 0, 5 * HZ);
break;
case 2:
- printk(KERN_WARNING "scsi%d: bus busy, attempting abort\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
do_abort(instance);
break;
case 4:
- printk(KERN_WARNING "scsi%d: bus busy, attempting reset\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
do_reset(instance);
+ /* Wait after a reset; the SCSI standard calls for
+ * 250ms, we wait 500ms to be on the safe side.
+ * But some Toshiba CD-ROMs need ten times that.
+ */
+ if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+ msleep(2500);
+ else
+ msleep(500);
break;
case 6:
- printk(KERN_ERR "scsi%d: bus locked solid or invalid override\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "bus locked solid\n");
return -ENXIO;
}
}
}
/**
- * NCR5380_exit - remove an NCR5380
- * @instance: adapter to remove
+ * NCR5380_exit - remove an NCR5380
+ * @instance: adapter to remove
+ *
+ * Assumes that no more work can be queued (e.g. by NCR5380_intr).
*/
static void NCR5380_exit(struct Scsi_Host *instance)
{
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
- cancel_delayed_work_sync(&hostdata->coroutine);
+ cancel_work_sync(&hostdata->main_task);
+ destroy_workqueue(hostdata->work_q);
}
/**
- * NCR5380_queue_command - queue a command
- * @cmd: SCSI command
- * @done: completion handler
- *
- * cmd is added to the per instance issue_queue, with minor
- * twiddling done to the host specific fields of cmd. If the
- * main coroutine is not running, it is restarted.
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+ dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+ if (hostdata->sensing == cmd) {
+ /* Autosense processing ends here */
+ if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ set_host_byte(cmd, DID_ERROR);
+ } else
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ hostdata->sensing = NULL;
+ }
+
+ hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+
+ cmd->scsi_done(cmd);
+}
+
+/**
+ * NCR5380_queue_command - queue a command
+ * @instance: the relevant SCSI adapter
+ * @cmd: SCSI command
*
- * Locks: host lock taken by caller
+ * cmd is added to the per-instance issue queue, with minor
+ * twiddling done to the host specific fields of cmd. If the
+ * main coroutine is not running, it is restarted.
*/
-static int NCR5380_queue_command_lck(struct scsi_cmnd *cmd, void (*done) (struct scsi_cmnd *))
+static int NCR5380_queue_command(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
{
- struct Scsi_Host *instance = cmd->device->host;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
- struct scsi_cmnd *tmp;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+ unsigned long flags;
#if (NDEBUG & NDEBUG_NO_WRITE)
switch (cmd->cmnd[0]) {
case WRITE_6:
case WRITE_10:
- printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n", instance->host_no);
+ shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
cmd->result = (DID_ERROR << 16);
- done(cmd);
+ cmd->scsi_done(cmd);
return 0;
}
-#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
+#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
- /*
- * We use the host_scribble field as a pointer to the next command
- * in a queue
- */
-
- cmd->host_scribble = NULL;
- cmd->scsi_done = done;
cmd->result = 0;
- /*
- * Insert the cmd into the issue queue. Note that REQUEST SENSE
+ spin_lock_irqsave(&hostdata->lock, flags);
+
+ /*
+ * Insert the cmd into the issue queue. Note that REQUEST SENSE
* commands are added to the head of the queue since any command will
- * clear the contingent allegiance condition that exists and the
+ * clear the contingent allegiance condition that exists and the
* sense data is only guaranteed to be valid while the condition exists.
*/
- if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
- LIST(cmd, hostdata->issue_queue);
- cmd->host_scribble = (unsigned char *) hostdata->issue_queue;
- hostdata->issue_queue = cmd;
- } else {
- for (tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp->host_scribble; tmp = (struct scsi_cmnd *) tmp->host_scribble);
- LIST(cmd, tmp);
- tmp->host_scribble = (unsigned char *) cmd;
- }
- dprintk(NDEBUG_QUEUES, "scsi%d : command added to %s of queue\n", instance->host_no, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ list_add(&ncmd->list, &hostdata->unissued);
+ else
+ list_add_tail(&ncmd->list, &hostdata->unissued);
+
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+
+ dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+ cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
- /* Run the coroutine if it isn't already running. */
/* Kick off command processing */
- schedule_delayed_work(&hostdata->coroutine, 0);
+ queue_work(hostdata->work_q, &hostdata->main_task);
return 0;
}
-static DEF_SCSI_QCMD(NCR5380_queue_command)
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd;
+ struct scsi_cmnd *cmd;
+
+ if (list_empty(&hostdata->autosense)) {
+ list_for_each_entry(ncmd, &hostdata->unissued, list) {
+ cmd = NCR5380_to_scmd(ncmd);
+ dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+ cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+ if (!(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))) {
+ list_del(&ncmd->list);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "dequeue: removed %p from issue queue\n", cmd);
+ return cmd;
+ }
+ }
+ } else {
+ /* Autosense processing begins here */
+ ncmd = list_first_entry(&hostdata->autosense,
+ struct NCR5380_cmd, list);
+ list_del(&ncmd->list);
+ cmd = NCR5380_to_scmd(ncmd);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "dequeue: removed %p from autosense queue\n", cmd);
+ scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+ hostdata->sensing = cmd;
+ return cmd;
+ }
+ return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+ if (hostdata->sensing) {
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ list_add(&ncmd->list, &hostdata->autosense);
+ hostdata->sensing = NULL;
+ } else
+ list_add(&ncmd->list, &hostdata->unissued);
+}
/**
- * NCR5380_main - NCR state machines
- *
- * NCR5380_main is a coroutine that runs as long as more work can
- * be done on the NCR5380 host adapters in a system. Both
- * NCR5380_queue_command() and NCR5380_intr() will try to start it
- * in case it is not running.
- *
- * Locks: called as its own thread with no locks held. Takes the
- * host lock and called routines may take the isa dma lock.
+ * NCR5380_main - NCR state machines
+ *
+ * NCR5380_main is a coroutine that runs as long as more work can
+ * be done on the NCR5380 host adapters in a system. Both
+ * NCR5380_queue_command() and NCR5380_intr() will try to start it
+ * in case it is not running.
*/
static void NCR5380_main(struct work_struct *work)
{
struct NCR5380_hostdata *hostdata =
- container_of(work, struct NCR5380_hostdata, coroutine.work);
+ container_of(work, struct NCR5380_hostdata, main_task);
struct Scsi_Host *instance = hostdata->host;
- struct scsi_cmnd *tmp, *prev;
+ struct scsi_cmnd *cmd;
int done;
-
- spin_lock_irq(instance->host_lock);
+
do {
- /* Lock held here */
done = 1;
- if (!hostdata->connected && !hostdata->selecting) {
- dprintk(NDEBUG_MAIN, "scsi%d : not connected\n", instance->host_no);
- /*
- * Search through the issue_queue for a command destined
- * for a target that's not busy.
- */
- for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
- {
- if (prev != tmp)
- dprintk(NDEBUG_LISTS, "MAIN tmp=%p target=%d busy=%d lun=%llu\n", tmp, tmp->device->id, hostdata->busy[tmp->device->id], tmp->device->lun);
- /* When we find one, remove it from the issue queue. */
- if (!(hostdata->busy[tmp->device->id] &
- (1 << (u8)(tmp->device->lun & 0xff)))) {
- if (prev) {
- REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
- prev->host_scribble = tmp->host_scribble;
- } else {
- REMOVE(-1, hostdata->issue_queue, tmp, tmp->host_scribble);
- hostdata->issue_queue = (struct scsi_cmnd *) tmp->host_scribble;
- }
- tmp->host_scribble = NULL;
- /*
- * Attempt to establish an I_T_L nexus here.
- * On success, instance->hostdata->connected is set.
- * On failure, we must add the command back to the
- * issue queue so we can keep trying.
- */
- dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main() : command for target %d lun %llu removed from issue_queue\n", instance->host_no, tmp->device->id, tmp->device->lun);
-
- /*
- * A successful selection is defined as one that
- * leaves us with the command connected and
- * in hostdata->connected, OR has terminated the
- * command.
- *
- * With successful commands, we fall through
- * and see if we can do an information transfer,
- * with failures we will restart.
- */
- hostdata->selecting = NULL;
- /* RvC: have to preset this to indicate a new command is being performed */
+ spin_lock_irq(&hostdata->lock);
+ while (!hostdata->connected &&
+ (cmd = dequeue_next_cmd(instance))) {
- /*
- * REQUEST SENSE commands are issued without tagged
- * queueing, even on SCSI-II devices because the
- * contingent allegiance condition exists for the
- * entire unit.
- */
+ dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
- if (!NCR5380_select(instance, tmp)) {
- break;
- } else {
- LIST(tmp, hostdata->issue_queue);
- tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
- hostdata->issue_queue = tmp;
- done = 0;
- dprintk(NDEBUG_MAIN|NDEBUG_QUEUES, "scsi%d : main(): select() failed, returned to issue_queue\n", instance->host_no);
- }
- /* lock held here still */
- } /* if target/lun is not busy */
- } /* for */
- /* exited locked */
- } /* if (!hostdata->connected) */
- if (hostdata->selecting) {
- tmp = (struct scsi_cmnd *) hostdata->selecting;
- /* Selection will drop and retake the lock */
- if (!NCR5380_select(instance, tmp)) {
- /* Ok ?? */
+ /*
+ * Attempt to establish an I_T_L nexus here.
+ * On success, instance->hostdata->connected is set.
+ * On failure, we must add the command back to the
+ * issue queue so we can keep trying.
+ */
+ /*
+ * REQUEST SENSE commands are issued without tagged
+ * queueing, even on SCSI-II devices because the
+ * contingent allegiance condition exists for the
+ * entire unit.
+ */
+
+ cmd = NCR5380_select(instance, cmd);
+ if (!cmd) {
+ dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
} else {
- /* RvC: device failed, so we wait a long time
- this is needed for Mustek scanners, that
- do not respond to commands immediately
- after a scan */
- printk(KERN_DEBUG "scsi%d: device %d did not respond in time\n", instance->host_no, tmp->device->id);
- LIST(tmp, hostdata->issue_queue);
- tmp->host_scribble = (unsigned char *) hostdata->issue_queue;
- hostdata->issue_queue = tmp;
- NCR5380_set_timer(hostdata, USLEEP_WAITLONG);
+ dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+ "main: select failed, returning %p to queue\n", cmd);
+ requeue_cmd(instance, cmd);
}
- } /* if hostdata->selecting */
+ }
if (hostdata->connected
#ifdef REAL_DMA
&& !hostdata->dmalen
#endif
- && (!hostdata->time_expires || time_before_eq(hostdata->time_expires, jiffies))
) {
- dprintk(NDEBUG_MAIN, "scsi%d : main() : performing information transfer\n", instance->host_no);
+ dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
NCR5380_information_transfer(instance);
- dprintk(NDEBUG_MAIN, "scsi%d : main() : done set false\n", instance->host_no);
done = 0;
- } else
- break;
+ }
+ spin_unlock_irq(&hostdata->lock);
+ if (!done)
+ cond_resched();
} while (!done);
-
- spin_unlock_irq(instance->host_lock);
}
#ifndef DONT_USE_INTR
/**
- * NCR5380_intr - generic NCR5380 irq handler
- * @irq: interrupt number
- * @dev_id: device info
- *
- * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
- * from the disconnected queue, and restarting NCR5380_main()
- * as required.
- *
- * Locks: takes the needed instance locks
+ * NCR5380_intr - generic NCR5380 irq handler
+ * @irq: interrupt number
+ * @dev_id: device info
+ *
+ * Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
+ * from the disconnected queue, and restarting NCR5380_main()
+ * as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
*/
-static irqreturn_t NCR5380_intr(int dummy, void *dev_id)
+static irqreturn_t NCR5380_intr(int irq, void *dev_id)
{
- NCR5380_local_declare();
struct Scsi_Host *instance = dev_id;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
- int done;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int handled = 0;
unsigned char basr;
unsigned long flags;
- dprintk(NDEBUG_INTR, "scsi : NCR5380 irq %d triggered\n",
- instance->irq);
+ spin_lock_irqsave(&hostdata->lock, flags);
+
+ basr = NCR5380_read(BUS_AND_STATUS_REG);
+ if (basr & BASR_IRQ) {
+ unsigned char mr = NCR5380_read(MODE_REG);
+ unsigned char sr = NCR5380_read(STATUS_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+ irq, basr, sr, mr);
- do {
- done = 1;
- spin_lock_irqsave(instance->host_lock, flags);
- /* Look for pending interrupts */
- NCR5380_setup(instance);
- basr = NCR5380_read(BUS_AND_STATUS_REG);
- /* XXX dispatch to appropriate routine if found and done=0 */
- if (basr & BASR_IRQ) {
- NCR5380_dprint(NDEBUG_INTR, instance);
- if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
- done = 0;
- dprintk(NDEBUG_INTR, "scsi%d : SEL interrupt\n", instance->host_no);
- NCR5380_reselect(instance);
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else if (basr & BASR_PARITY_ERROR) {
- dprintk(NDEBUG_INTR, "scsi%d : PARITY interrupt\n", instance->host_no);
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
- dprintk(NDEBUG_INTR, "scsi%d : RESET interrupt\n", instance->host_no);
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else {
#if defined(REAL_DMA)
- /*
- * We should only get PHASE MISMATCH and EOP interrupts
- * if we have DMA enabled, so do a sanity check based on
- * the current setting of the MODE register.
- */
+ if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+ /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+ * We ack IRQ after clearing Mode Register. Workarounds
+ * for End of DMA errata need to happen in DMA Mode.
+ */
- if ((NCR5380_read(MODE_REG) & MR_DMA) && ((basr & BASR_END_DMA_TRANSFER) || !(basr & BASR_PHASE_MATCH))) {
- int transferred;
+ dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
- if (!hostdata->connected)
- panic("scsi%d : received end of DMA interrupt with no connected cmd\n", instance->hostno);
+ int transferred;
- transferred = (hostdata->dmalen - NCR5380_dma_residual(instance));
- hostdata->connected->SCp.this_residual -= transferred;
- hostdata->connected->SCp.ptr += transferred;
- hostdata->dmalen = 0;
+ if (!hostdata->connected)
+ panic("scsi%d : DMA interrupt with no connected cmd\n",
+ instance->hostno);
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-
- /* FIXME: we need to poll briefly then defer a workqueue task ! */
- NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2*HZ);
+ transferred = hostdata->dmalen - NCR5380_dma_residual(instance);
+ hostdata->connected->SCp.this_residual -= transferred;
+ hostdata->connected->SCp.ptr += transferred;
+ hostdata->dmalen = 0;
- NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- }
-#else
- dprintk(NDEBUG_INTR, "scsi : unknown interrupt, BASR 0x%X, MR 0x%X, SR 0x%x\n", basr, NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
- (void) NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-#endif
+ /* FIXME: we need to poll briefly then defer a workqueue task ! */
+ NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG, BASR_ACK, 0, 2 * HZ);
+
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ NCR5380_write(MODE_REG, MR_BASE);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ } else
+#endif /* REAL_DMA */
+ if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+ (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+ /* Probably reselected */
+ NCR5380_write(SELECT_ENABLE_REG, 0);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+ if (!hostdata->connected) {
+ NCR5380_reselect(instance);
+ queue_work(hostdata->work_q, &hostdata->main_task);
}
- } /* if BASR_IRQ */
- spin_unlock_irqrestore(instance->host_lock, flags);
- if(!done)
- schedule_delayed_work(&hostdata->coroutine, 0);
- } while (!done);
- return IRQ_HANDLED;
+ if (!hostdata->connected)
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ } else {
+ /* Probably Bus Reset */
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
+ }
+ handled = 1;
+ } else {
+ shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
+ }
+
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+
+ return IRQ_RETVAL(handled);
}
-#endif
+#endif
-/*
+/*
* Function : int NCR5380_select(struct Scsi_Host *instance,
- * struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
*
* Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- * including ARBITRATION, SELECTION, and initial message out for
- * IDENTIFY and queue messages.
- *
- * Inputs : instance - instantiation of the 5380 driver on which this
- * target lives, cmd - SCSI command to execute.
- *
- * Returns : -1 if selection could not execute for some reason,
- * 0 if selection succeeded or failed because the target
- * did not respond.
- *
- * Side effects :
- * If bus busy, arbitration failed, etc, NCR5380_select() will exit
- * with registers as they should have been on entry - ie
- * SELECT_ENABLE will be set appropriately, the NCR5380
- * will cease to drive any SCSI bus signals.
- *
- * If successful : I_T_L or I_T_L_Q nexus will be established,
- * instance->connected will be set to cmd.
- * SELECT interrupt will be disabled.
- *
- * If failed (no target) : cmd->scsi_done() will be called, and the
- * cmd->result host byte set to DID_BAD_TARGET.
- *
- * Locks: caller holds hostdata lock in IRQ mode
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
+ *
+ * Inputs : instance - instantiation of the 5380 driver on which this
+ * target lives, cmd - SCSI command to execute.
+ *
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
+ *
+ * Side effects :
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
+ *
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
+ *
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
*/
-
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
{
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char tmp[3], phase;
unsigned char *data;
int len;
- unsigned long timeout;
- unsigned char value;
int err;
- NCR5380_setup(instance);
-
- if (hostdata->selecting)
- goto part2;
-
- hostdata->restart_select = 0;
NCR5380_dprint(NDEBUG_ARBITRATION, instance);
- dprintk(NDEBUG_ARBITRATION, "scsi%d : starting arbitration, id = %d\n", instance->host_no, instance->this_id);
+ dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+ instance->this_id);
+
+ /*
+ * Arbitration and selection phases are slow and involve dropping the
+ * lock, so we have to watch out for EH. An exception handler may
+ * change 'selecting' to NULL. This function will then return NULL
+ * so that the caller will forget about 'cmd'. (During information
+ * transfer phases, EH may change 'connected' to NULL.)
+ */
+ hostdata->selecting = cmd;
- /*
- * Set the phase bits to 0, otherwise the NCR5380 won't drive the
+ /*
+ * Set the phase bits to 0, otherwise the NCR5380 won't drive the
* data bus during SELECTION.
*/
NCR5380_write(TARGET_COMMAND_REG, 0);
- /*
+ /*
* Start arbitration.
*/
NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
NCR5380_write(MODE_REG, MR_ARBITRATE);
+ /* The chip now waits for BUS FREE phase. Then after the 800 ns
+ * Bus Free Delay, arbitration will begin.
+ */
- /* We can be relaxed here, interrupts are on, we are
- in workqueue context, the birds are singing in the trees */
- spin_unlock_irq(instance->host_lock);
- err = NCR5380_poll_politely(instance, INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS, ICR_ARBITRATION_PROGRESS, 5*HZ);
- spin_lock_irq(instance->host_lock);
+ spin_unlock_irq(&hostdata->lock);
+ err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+ INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+ ICR_ARBITRATION_PROGRESS, HZ);
+ spin_lock_irq(&hostdata->lock);
+ if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+ /* Reselection interrupt */
+ goto out;
+ }
if (err < 0) {
- printk(KERN_DEBUG "scsi: arbitration timeout at %d\n", __LINE__);
NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- goto failed;
+ shost_printk(KERN_ERR, instance,
+ "select: arbitration timeout\n");
+ goto out;
}
+ spin_unlock_irq(&hostdata->lock);
- dprintk(NDEBUG_ARBITRATION, "scsi%d : arbitration complete\n", instance->host_no);
-
- /*
- * The arbitration delay is 2.2us, but this is a minimum and there is
- * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
- * the integral nature of udelay().
- *
- */
-
+ /* The SCSI-2 arbitration delay is 2.4 us */
udelay(3);
/* Check for lost arbitration */
- if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) || (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) || (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
- NCR5380_write(MODE_REG, MR_BASE);
- dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting MR_ARBITRATE\n", instance->host_no);
- goto failed;
- }
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_SEL);
-
- if (!(hostdata->flags & FLAG_DTC3181E) &&
- /* RvC: DTC3181E has some trouble with this
- * so we simply removed it. Seems to work with
- * only Mustek scanner attached
- */
+ if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
+ (NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_ARBITRATION, "scsi%d : lost arbitration, deasserting ICR_ASSERT_SEL\n", instance->host_no);
- goto failed;
+ dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+ spin_lock_irq(&hostdata->lock);
+ goto out;
}
- /*
- * Again, bus clear + bus settle time is 1.2us, however, this is
+
+ /* After/during arbitration, BSY should be asserted.
+ * IBM DPES-31080 Version S31Q works now
+ * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+ */
+ NCR5380_write(INITIATOR_COMMAND_REG,
+ ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
+
+ /*
+ * Again, bus clear + bus settle time is 1.2us, however, this is
* a minimum so we'll udelay ceil(1.2)
*/
- udelay(2);
+ if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+ udelay(15);
+ else
+ udelay(2);
+
+ spin_lock_irq(&hostdata->lock);
+
+ /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+ if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+ goto out;
- dprintk(NDEBUG_ARBITRATION, "scsi%d : won arbitration\n", instance->host_no);
+ if (!hostdata->selecting) {
+ NCR5380_write(MODE_REG, MR_BASE);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ goto out;
+ }
- /*
- * Now that we have won arbitration, start Selection process, asserting
+ dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
+
+ /*
+ * Now that we have won arbitration, start Selection process, asserting
* the host and target ID's on the SCSI bus.
*/
- NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << scmd_id(cmd))));
+ NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
- /*
+ /*
* Raise ATN while SEL is true before BSY goes false from arbitration,
* since this is the only way to guarantee that we'll get a MESSAGE OUT
* phase immediately after selection.
*/
- NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
NCR5380_write(MODE_REG, MR_BASE);
- /*
+ /*
* Reselect interrupts must be turned off prior to the dropping of BSY,
* otherwise we will trigger an interrupt.
*/
NCR5380_write(SELECT_ENABLE_REG, 0);
+ spin_unlock_irq(&hostdata->lock);
+
/*
- * The initiator shall then wait at least two deskew delays and release
+ * The initiator shall then wait at least two deskew delays and release
* the BSY signal.
*/
- udelay(1); /* wingel -- wait two bus deskew delay >2*45ns */
+ udelay(1); /* wingel -- wait two bus deskew delay >2*45ns */
/* Reset BSY */
- NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+ ICR_ASSERT_ATN | ICR_ASSERT_SEL);
- /*
+ /*
* Something weird happens when we cease to drive BSY - looks
- * like the board/chip is letting us do another read before the
+ * like the board/chip is letting us do another read before the
* appropriate propagation delay has expired, and we're confusing
* a BSY signal from ourselves as the target's response to SELECTION.
*
* A small delay (the 'C++' frontend breaks the pipeline with an
* unnecessary jump, making it work on my 386-33/Trantor T128, the
- * tighter 'C' code breaks and requires this) solves the problem -
- * the 1 us delay is arbitrary, and only used because this delay will
- * be the same on other platforms and since it works here, it should
+ * tighter 'C' code breaks and requires this) solves the problem -
+ * the 1 us delay is arbitrary, and only used because this delay will
+ * be the same on other platforms and since it works here, it should
* work there.
*
* wingel suggests that this could be due to failing to wait
udelay(1);
- dprintk(NDEBUG_SELECTION, "scsi%d : selecting target %d\n", instance->host_no, scmd_id(cmd));
+ dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
- /*
- * The SCSI specification calls for a 250 ms timeout for the actual
+ /*
+ * The SCSI specification calls for a 250 ms timeout for the actual
* selection.
*/
- timeout = jiffies + msecs_to_jiffies(250);
+ err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+ msecs_to_jiffies(250));
- /*
- * XXX very interesting - we're seeing a bounce where the BSY we
- * asserted is being reflected / still asserted (propagation delay?)
- * and it's detecting as true. Sigh.
- */
-
- hostdata->select_time = 0; /* we count the clock ticks at which we polled */
- hostdata->selecting = cmd;
-
-part2:
- /* RvC: here we enter after a sleeping period, or immediately after
- execution of part 1
- we poll only once ech clock tick */
- value = NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO);
-
- if (!value && (hostdata->select_time < HZ/4)) {
- /* RvC: we still must wait for a device response */
- hostdata->select_time++; /* after 25 ticks the device has failed */
- NCR5380_set_timer(hostdata, 1);
- return 0; /* RvC: we return here with hostdata->selecting set,
- to go to sleep */
- }
-
- hostdata->selecting = NULL;/* clear this pointer, because we passed the
- waiting period */
if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+ spin_lock_irq(&hostdata->lock);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_reselect(instance);
- printk("scsi%d : reselection after won arbitration?\n", instance->host_no);
+ if (!hostdata->connected)
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+ goto out;
+ }
+
+ if (err < 0) {
+ spin_lock_irq(&hostdata->lock);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
+ /* Can't touch cmd if it has been reclaimed by the scsi ML */
+ if (hostdata->selecting) {
+ cmd->result = DID_BAD_TARGET << 16;
+ complete_cmd(instance, cmd);
+ dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+ cmd = NULL;
+ }
+ goto out;
}
- /*
- * No less than two deskew delays after the initiator detects the
- * BSY signal is true, it shall release the SEL signal and may
+
+ /*
+ * No less than two deskew delays after the initiator detects the
+ * BSY signal is true, it shall release the SEL signal and may
* change the DATA BUS. -wingel
*/
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
- if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- if (hostdata->targets_present & (1 << scmd_id(cmd))) {
- printk(KERN_DEBUG "scsi%d : weirdness\n", instance->host_no);
- if (hostdata->restart_select)
- printk(KERN_DEBUG "\trestart select\n");
- NCR5380_dprint(NDEBUG_SELECTION, instance);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
- }
- cmd->result = DID_BAD_TARGET << 16;
- cmd->scsi_done(cmd);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- dprintk(NDEBUG_SELECTION, "scsi%d : target did not respond within 250ms\n", instance->host_no);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return 0;
- }
- hostdata->targets_present |= (1 << scmd_id(cmd));
-
/*
- * Since we followed the SCSI spec, and raised ATN while SEL
+ * Since we followed the SCSI spec, and raised ATN while SEL
* was true but before BSY was false during selection, the information
* transfer phase should be a MESSAGE OUT phase so that we can send the
* IDENTIFY message.
- *
+ *
* If SCSI-II tagged queuing is enabled, we also send a SIMPLE_QUEUE_TAG
* message (2 bytes) with a tag ID that we increment with every command
* until it wraps back to 0.
*
* XXX - it turns out that there are some broken SCSI-II devices,
- * which claim to support tagged queuing but fail when more than
- * some number of commands are issued at once.
+ * which claim to support tagged queuing but fail when more than
+ * some number of commands are issued at once.
*/
/* Wait for start of REQ/ACK handshake */
- spin_unlock_irq(instance->host_lock);
err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
- spin_lock_irq(instance->host_lock);
-
- if(err) {
- printk(KERN_ERR "scsi%d: timeout at NCR5380.c:%d\n", instance->host_no, __LINE__);
+ spin_lock_irq(&hostdata->lock);
+ if (err < 0) {
+ shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- goto failed;
+ goto out;
+ }
+ if (!hostdata->selecting) {
+ do_abort(instance);
+ goto out;
}
- dprintk(NDEBUG_SELECTION, "scsi%d : target %d selected, going into MESSAGE OUT phase.\n", instance->host_no, cmd->device->id);
+ dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+ scmd_id(cmd));
tmp[0] = IDENTIFY(((instance->irq == NO_IRQ) ? 0 : 1), cmd->device->lun);
len = 1;
data = tmp;
phase = PHASE_MSGOUT;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_SELECTION, "scsi%d : nexus established.\n", instance->host_no);
+ dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
/* XXX need to handle errors here */
+
hostdata->connected = cmd;
- hostdata->busy[cmd->device->id] |= (1 << (cmd->device->lun & 0xFF));
+ hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
initialize_SCp(cmd);
- return 0;
-
- /* Selection failed */
-failed:
- return -1;
+ cmd = NULL;
+out:
+ if (!hostdata->selecting)
+ return NULL;
+ hostdata->selecting = NULL;
+ return cmd;
}
-/*
- * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
- * unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
*
* Purpose : transfers data in given phase using polled I/O
*
- * Inputs : instance - instance of driver, *phase - pointer to
- * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
- *
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
+ *
* Returns : -1 when different phase is entered without transferring
- * maximum number of bytes, 0 if all bytes or transferred or exit
- * is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
*
- * Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
*
* XXX Note : handling for bus free may be useful.
*/
/*
- * Note : this code is not as quick as it could be, however it
+ * Note : this code is not as quick as it could be, however it
* IS 100% reliable, and for the actual data transfer where speed
* counts, we will always do a pseudo DMA or DMA transfer.
*/
-static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
- NCR5380_local_declare();
+static int NCR5380_transfer_pio(struct Scsi_Host *instance,
+ unsigned char *phase, int *count,
+ unsigned char **data)
+{
unsigned char p = *phase, tmp;
int c = *count;
unsigned char *d = *data;
- /*
- * RvC: some administrative data to process polling time
- */
- int break_allowed = 0;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
- NCR5380_setup(instance);
-
- if (!(p & SR_IO))
- dprintk(NDEBUG_PIO, "scsi%d : pio write %d bytes\n", instance->host_no, c);
- else
- dprintk(NDEBUG_PIO, "scsi%d : pio read %d bytes\n", instance->host_no, c);
- /*
- * The NCR5380 chip will only drive the SCSI bus when the
+ /*
+ * The NCR5380 chip will only drive the SCSI bus when the
* phase specified in the appropriate bits of the TARGET COMMAND
* REGISTER match the STATUS REGISTER
*/
- NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
- /* RvC: don't know if this is necessary, but other SCSI I/O is short
- * so breaks are not necessary there
- */
- if ((p == PHASE_DATAIN) || (p == PHASE_DATAOUT)) {
- break_allowed = 1;
- }
- do {
- /*
- * Wait for assertion of REQ, after which the phase bits will be
- * valid
- */
+ NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
- /* RvC: we simply poll once, after that we stop temporarily
- * and let the device buffer fill up
- * if breaking is not allowed, we keep polling as long as needed
+ do {
+ /*
+ * Wait for assertion of REQ, after which the phase bits will be
+ * valid
*/
- /* FIXME */
- while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ) && !break_allowed);
- if (!(tmp & SR_REQ)) {
- /* timeout condition */
- NCR5380_set_timer(hostdata, USLEEP_SLEEP);
+ if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
break;
- }
- dprintk(NDEBUG_HANDSHAKE, "scsi%d : REQ detected\n", instance->host_no);
+ dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
/* Check for phase mismatch */
- if ((tmp & PHASE_MASK) != p) {
- dprintk(NDEBUG_HANDSHAKE, "scsi%d : phase mismatch\n", instance->host_no);
- NCR5380_dprint_phase(NDEBUG_HANDSHAKE, instance);
+ if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+ dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
+ NCR5380_dprint_phase(NDEBUG_PIO, instance);
break;
}
+
/* Do actual transfer from SCSI bus to / from memory */
if (!(p & SR_IO))
NCR5380_write(OUTPUT_DATA_REG, *d);
++d;
- /*
+ /*
* The SCSI standard suggests that in MSGOUT phase, the initiator
* should drop ATN on the last byte of the message phase
* after REQ has been asserted for the handshake but before
if (!((p & SR_MSG) && c > 1)) {
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
NCR5380_dprint(NDEBUG_PIO, instance);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+ ICR_ASSERT_DATA | ICR_ASSERT_ACK);
} else {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN);
NCR5380_dprint(NDEBUG_PIO, instance);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
}
} else {
NCR5380_dprint(NDEBUG_PIO, instance);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
}
- /* FIXME - if this fails bus reset ?? */
- NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 5*HZ);
- dprintk(NDEBUG_HANDSHAKE, "scsi%d : req false, handshake complete\n", instance->host_no);
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+ break;
+
+ dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
/*
- * We have several special cases to consider during REQ/ACK handshaking :
- * 1. We were in MSGOUT phase, and we are on the last byte of the
- * message. ATN must be dropped as ACK is dropped.
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1. We were in MSGOUT phase, and we are on the last byte of the
+ * message. ATN must be dropped as ACK is dropped.
*
- * 2. We are in a MSGIN phase, and we are on the last byte of the
- * message. We must exit with ACK asserted, so that the calling
- * code may raise ATN before dropping ACK to reject the message.
+ * 2. We are in a MSGIN phase, and we are on the last byte of the
+ * message. We must exit with ACK asserted, so that the calling
+ * code may raise ATN before dropping ACK to reject the message.
*
* 3. ACK and ATN are clear and the target may proceed as normal.
*/
}
} while (--c);
- dprintk(NDEBUG_PIO, "scsi%d : residual %d\n", instance->host_no, c);
+ dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
*count = c;
*data = d;
tmp = NCR5380_read(STATUS_REG);
- if (tmp & SR_REQ)
+ /* The phase read from the bus is valid if either REQ is (already)
+ * asserted or if ACK hasn't been released yet. The latter applies if
+ * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
+ */
+ if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
*phase = tmp & PHASE_MASK;
else
*phase = PHASE_UNKNOWN;
}
/**
- * do_reset - issue a reset command
- * @host: adapter to reset
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
*
- * Issue a reset sequence to the NCR5380 and try and get the bus
- * back into sane shape.
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
*
- * Locks: caller holds queue lock
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
*/
-
-static void do_reset(struct Scsi_Host *host) {
- NCR5380_local_declare();
- NCR5380_setup(host);
- NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+static void do_reset(struct Scsi_Host *instance)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ NCR5380_write(TARGET_COMMAND_REG,
+ PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
- udelay(25);
+ udelay(50);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ local_irq_restore(flags);
}
-/*
- * Function : do_abort (Scsi_Host *host)
- *
- * Purpose : abort the currently established nexus. Should only be
- * called from a routine which can drop into a
- *
- * Returns : 0 on success, -1 on failure.
- *
- * Locks: queue lock held by caller
- * FIXME: sort this out and get new_eh running
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
+ *
+ * Returns 0 on success, -1 on failure.
*/
-static int do_abort(struct Scsi_Host *host) {
- NCR5380_local_declare();
+static int do_abort(struct Scsi_Host *instance)
+{
unsigned char *msgptr, phase, tmp;
int len;
int rc;
- NCR5380_setup(host);
-
/* Request message out phase */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
- /*
- * Wait for the target to indicate a valid phase by asserting
- * REQ. Once this happens, we'll have either a MSGOUT phase
- * and can immediately send the ABORT message, or we'll have some
+ /*
+ * Wait for the target to indicate a valid phase by asserting
+ * REQ. Once this happens, we'll have either a MSGOUT phase
+ * and can immediately send the ABORT message, or we'll have some
* other phase and will have to source/sink data.
- *
+ *
* We really don't care what value was on the bus or what value
* the target sees, so we just handshake.
*/
- rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, SR_REQ, 60 * HZ);
-
- if(rc < 0)
- return -1;
+ rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+ if (rc < 0)
+ goto timeout;
+
+ tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
- tmp = (unsigned char)rc;
-
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
- if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
- rc = NCR5380_poll_politely(host, STATUS_REG, SR_REQ, 0, 3*HZ);
+ if (tmp != PHASE_MSGOUT) {
+ NCR5380_write(INITIATOR_COMMAND_REG,
+ ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+ rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+ if (rc < 0)
+ goto timeout;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
- if(rc == -1)
- return -1;
}
+
tmp = ABORT;
msgptr = &tmp;
len = 1;
phase = PHASE_MSGOUT;
- NCR5380_transfer_pio(host, &phase, &len, &msgptr);
+ NCR5380_transfer_pio(instance, &phase, &len, &msgptr);
/*
* If we got here, and the command completed successfully,
*/
return len ? -1 : 0;
+
+timeout:
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ return -1;
}
#if defined(REAL_DMA) || defined(PSEUDO_DMA) || defined (REAL_DMA_POLL)
-/*
- * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
- * unsigned char *phase, int *count, unsigned char **data)
+/*
+ * Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
+ * unsigned char *phase, int *count, unsigned char **data)
*
* Purpose : transfers data in given phase using either real
- * or pseudo DMA.
+ * or pseudo DMA.
*
- * Inputs : instance - instance of driver, *phase - pointer to
- * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
- *
- * Returns : -1 when different phase is entered without transferring
- * maximum number of bytes, 0 if all bytes or transferred or exit
- * is in same phase.
+ * Inputs : instance - instance of driver, *phase - pointer to
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
*
- * Also, *phase, *count, *data are modified in place.
+ * Returns : -1 when different phase is entered without transferring
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
*
- * Locks: io_request lock held by caller
+ * Also, *phase, *count, *data are modified in place.
*/
-static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data) {
- NCR5380_local_declare();
+static int NCR5380_transfer_dma(struct Scsi_Host *instance,
+ unsigned char *phase, int *count,
+ unsigned char **data)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
register int c = *count;
register unsigned char p = *phase;
register unsigned char *d = *data;
unsigned char saved_data = 0, overrun = 0, residue;
#endif
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
-
- NCR5380_setup(instance);
-
if ((tmp = (NCR5380_read(STATUS_REG) & PHASE_MASK)) != p) {
*phase = tmp;
return -1;
}
#if defined(REAL_DMA) || defined(REAL_DMA_POLL)
-#ifdef READ_OVERRUNS
if (p & SR_IO) {
- c -= 2;
+ if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS))
+ c -= 2;
}
-#endif
- dprintk(NDEBUG_DMA, "scsi%d : initializing DMA channel %d for %s, %d bytes %s %0x\n", instance->host_no, instance->dma_channel, (p & SR_IO) ? "reading" : "writing", c, (p & SR_IO) ? "to" : "from", (unsigned) d);
hostdata->dma_len = (p & SR_IO) ? NCR5380_dma_read_setup(instance, d, c) : NCR5380_dma_write_setup(instance, d, c);
+
+ dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+ (p & SR_IO) ? "receive" : "send", c, *data);
#endif
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
#ifdef REAL_DMA
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+ MR_ENABLE_EOP_INTR);
#elif defined(REAL_DMA_POLL)
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
#else
/*
* Note : on my sample board, watch-dog timeouts occurred when interrupts
- * were not disabled for the duration of a single DMA transfer, from
+ * were not disabled for the duration of a single DMA transfer, from
* before the setting of DMA mode to after transfer of the last byte.
*/
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
- spin_unlock_irq(instance->host_lock);
-#endif
- /* KLL May need eop and parity in 53c400 */
- if (hostdata->flags & FLAG_NCR53C400)
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE |
- MR_ENABLE_PAR_CHECK | MR_ENABLE_PAR_INTR |
- MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
+ if (hostdata->flags & FLAG_NO_DMA_FIXUP)
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+ MR_ENABLE_EOP_INTR);
else
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE);
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY);
#endif /* def REAL_DMA */
dprintk(NDEBUG_DMA, "scsi%d : mode reg = 0x%X\n", instance->host_no, NCR5380_read(MODE_REG));
- /*
- * On the PAS16 at least I/O recovery delays are not needed here.
- * Everyone else seems to want them.
+ /*
+ * On the PAS16 at least I/O recovery delays are not needed here.
+ * Everyone else seems to want them.
*/
if (p & SR_IO) {
} while ((tmp & BASR_PHASE_MATCH) && !(tmp & (BASR_BUSY_ERROR | BASR_END_DMA_TRANSFER)));
/*
- At this point, either we've completed DMA, or we have a phase mismatch,
- or we've unexpectedly lost BUSY (which is a real error).
-
- For write DMAs, we want to wait until the last byte has been
- transferred out over the bus before we turn off DMA mode. Alas, there
- seems to be no terribly good way of doing this on a 5380 under all
- conditions. For non-scatter-gather operations, we can wait until REQ
- and ACK both go false, or until a phase mismatch occurs. Gather-writes
- are nastier, since the device will be expecting more data than we
- are prepared to send it, and REQ will remain asserted. On a 53C8[01] we
- could test LAST BIT SENT to assure transfer (I imagine this is precisely
- why this signal was added to the newer chips) but on the older 538[01]
- this signal does not exist. The workaround for this lack is a watchdog;
- we bail out of the wait-loop after a modest amount of wait-time if
- the usual exit conditions are not met. Not a terribly clean or
- correct solution :-%
-
- Reads are equally tricky due to a nasty characteristic of the NCR5380.
- If the chip is in DMA mode for an READ, it will respond to a target's
- REQ by latching the SCSI data into the INPUT DATA register and asserting
- ACK, even if it has _already_ been notified by the DMA controller that
- the current DMA transfer has completed! If the NCR5380 is then taken
- out of DMA mode, this already-acknowledged byte is lost.
-
- This is not a problem for "one DMA transfer per command" reads, because
- the situation will never arise... either all of the data is DMA'ed
- properly, or the target switches to MESSAGE IN phase to signal a
- disconnection (either operation bringing the DMA to a clean halt).
- However, in order to handle scatter-reads, we must work around the
- problem. The chosen fix is to DMA N-2 bytes, then check for the
- condition before taking the NCR5380 out of DMA mode. One or two extra
- bytes are transferred via PIO as necessary to fill out the original
- request.
+ * At this point, either we've completed DMA, or we have a phase mismatch,
+ * or we've unexpectedly lost BUSY (which is a real error).
+ *
+ * For DMA sends, we want to wait until the last byte has been
+ * transferred out over the bus before we turn off DMA mode. Alas, there
+ * seems to be no terribly good way of doing this on a 5380 under all
+ * conditions. For non-scatter-gather operations, we can wait until REQ
+ * and ACK both go false, or until a phase mismatch occurs. Gather-sends
+ * are nastier, since the device will be expecting more data than we
+ * are prepared to send it, and REQ will remain asserted. On a 53C8[01] we
+ * could test Last Byte Sent to assure transfer (I imagine this is precisely
+ * why this signal was added to the newer chips) but on the older 538[01]
+ * this signal does not exist. The workaround for this lack is a watchdog;
+ * we bail out of the wait-loop after a modest amount of wait-time if
+ * the usual exit conditions are not met. Not a terribly clean or
+ * correct solution :-%
+ *
+ * DMA receive is equally tricky due to a nasty characteristic of the NCR5380.
+ * If the chip is in DMA receive mode, it will respond to a target's
+ * REQ by latching the SCSI data into the INPUT DATA register and asserting
+ * ACK, even if it has _already_ been notified by the DMA controller that
+ * the current DMA transfer has completed! If the NCR5380 is then taken
+ * out of DMA mode, this already-acknowledged byte is lost. This is
+ * not a problem for "one DMA transfer per READ command", because
+ * the situation will never arise... either all of the data is DMA'ed
+ * properly, or the target switches to MESSAGE IN phase to signal a
+ * disconnection (either operation bringing the DMA to a clean halt).
+ * However, in order to handle scatter-receive, we must work around the
+ * problem. The chosen fix is to DMA N-2 bytes, then check for the
+ * condition before taking the NCR5380 out of DMA mode. One or two extra
+ * bytes are transferred via PIO as necessary to fill out the original
+ * request.
*/
if (p & SR_IO) {
-#ifdef READ_OVERRUNS
- udelay(10);
- if (((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) == (BASR_PHASE_MATCH | BASR_ACK))) {
- saved_data = NCR5380_read(INPUT_DATA_REGISTER);
- overrun = 1;
+ if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS)) {
+ udelay(10);
+ if ((NCR5380_read(BUS_AND_STATUS_REG) & (BASR_PHASE_MATCH | BASR_ACK)) ==
+ (BASR_PHASE_MATCH | BASR_ACK)) {
+ saved_data = NCR5380_read(INPUT_DATA_REGISTER);
+ overrun = 1;
+ }
}
-#endif
} else {
int limit = 100;
while (((tmp = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_ACK) || (NCR5380_read(STATUS_REG) & SR_REQ)) {
}
}
- dprintk(NDEBUG_DMA, "scsi%d : polled DMA transfer complete, basr 0x%X, sr 0x%X\n", instance->host_no, tmp, NCR5380_read(STATUS_REG));
+ dsprintk(NDEBUG_DMA, "polled DMA transfer complete, basr 0x%02x, sr 0x%02x\n",
+ tmp, NCR5380_read(STATUS_REG));
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
*data += c;
*phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-#ifdef READ_OVERRUNS
- if (*phase == p && (p & SR_IO) && residue == 0) {
+ if (!(hostdata->flags & FLAG_NO_DMA_FIXUPS) &&
+ *phase == p && (p & SR_IO) && residue == 0) {
if (overrun) {
dprintk(NDEBUG_DMA, "Got an input overrun, using saved byte\n");
**data = saved_data;
NCR5380_transfer_pio(instance, phase, &cnt, data);
*count -= toPIO - cnt;
}
-#endif
dprintk(NDEBUG_DMA, "Return with data ptr = 0x%X, count %d, last 0x%X, next 0x%X\n", *data, *count, *(*data + *count - 1), *(*data + *count));
return 0;
return 0;
#else /* defined(REAL_DMA_POLL) */
if (p & SR_IO) {
-#ifdef DMA_WORKS_RIGHT
- foo = NCR5380_pread(instance, d, c);
-#else
- int diff = 1;
- if (hostdata->flags & FLAG_NCR53C400) {
- diff = 0;
- }
- if (!(foo = NCR5380_pread(instance, d, c - diff))) {
+ foo = NCR5380_pread(instance, d,
+ hostdata->flags & FLAG_NO_DMA_FIXUP ? c : c - 1);
+ if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
/*
- * We can't disable DMA mode after successfully transferring
+ * We can't disable DMA mode after successfully transferring
* what we plan to be the last byte, since that would open up
- * a race condition where if the target asserted REQ before
+ * a race condition where if the target asserted REQ before
* we got the DMA mode reset, the NCR5380 would have latched
* an additional byte into the INPUT DATA register and we'd
* have dropped it.
- *
- * The workaround was to transfer one fewer bytes than we
- * intended to with the pseudo-DMA read function, wait for
+ *
+ * The workaround was to transfer one fewer bytes than we
+ * intended to with the pseudo-DMA read function, wait for
* the chip to latch the last byte, read it, and then disable
* pseudo-DMA mode.
- *
+ *
* After REQ is asserted, the NCR5380 asserts DRQ and ACK.
* REQ is deasserted when ACK is asserted, and not reasserted
* until ACK goes false. Since the NCR5380 won't lower ACK
* until DACK is asserted, which won't happen unless we twiddle
- * the DMA port or we take the NCR5380 out of DMA mode, we
- * can guarantee that we won't handshake another extra
+ * the DMA port or we take the NCR5380 out of DMA mode, we
+ * can guarantee that we won't handshake another extra
* byte.
*/
- if (!(hostdata->flags & FLAG_NCR53C400)) {
- while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ));
- /* Wait for clean handshake */
- while (NCR5380_read(STATUS_REG) & SR_REQ);
- d[c - 1] = NCR5380_read(INPUT_DATA_REG);
+ if (NCR5380_poll_politely(instance, BUS_AND_STATUS_REG,
+ BASR_DRQ, BASR_DRQ, HZ) < 0) {
+ foo = -1;
+ shost_printk(KERN_ERR, instance, "PDMA read: DRQ timeout\n");
}
+ if (NCR5380_poll_politely(instance, STATUS_REG,
+ SR_REQ, 0, HZ) < 0) {
+ foo = -1;
+ shost_printk(KERN_ERR, instance, "PDMA read: !REQ timeout\n");
+ }
+ d[c - 1] = NCR5380_read(INPUT_DATA_REG);
}
-#endif
} else {
-#ifdef DMA_WORKS_RIGHT
foo = NCR5380_pwrite(instance, d, c);
-#else
- int timeout;
- dprintk(NDEBUG_C400_PWRITE, "About to pwrite %d bytes\n", c);
- if (!(foo = NCR5380_pwrite(instance, d, c))) {
+ if (!foo && !(hostdata->flags & FLAG_NO_DMA_FIXUP)) {
/*
- * Wait for the last byte to be sent. If REQ is being asserted for
- * the byte we're interested, we'll ACK it and it will go false.
+ * Wait for the last byte to be sent. If REQ is being asserted for
+ * the byte we're interested, we'll ACK it and it will go false.
*/
- if (!(hostdata->flags & FLAG_HAS_LAST_BYTE_SENT)) {
- timeout = 20000;
- while (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_DRQ) && (NCR5380_read(BUS_AND_STATUS_REG) & BASR_PHASE_MATCH));
-
- if (!timeout)
- dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : timed out on last byte\n", instance->host_no);
-
- if (hostdata->flags & FLAG_CHECK_LAST_BYTE_SENT) {
- hostdata->flags &= ~FLAG_CHECK_LAST_BYTE_SENT;
- if (NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT) {
- hostdata->flags |= FLAG_HAS_LAST_BYTE_SENT;
- dprintk(NDEBUG_LAST_BYTE_SENT, "scsi%d : last byte sent works\n", instance->host_no);
- }
- }
- } else {
- dprintk(NDEBUG_C400_PWRITE, "Waiting for LASTBYTE\n");
- while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT));
- dprintk(NDEBUG_C400_PWRITE, "Got LASTBYTE\n");
+ if (NCR5380_poll_politely2(instance,
+ BUS_AND_STATUS_REG, BASR_DRQ, BASR_DRQ,
+ BUS_AND_STATUS_REG, BASR_PHASE_MATCH, 0, HZ) < 0) {
+ foo = -1;
+ shost_printk(KERN_ERR, instance, "PDMA write: DRQ and phase timeout\n");
}
}
-#endif
}
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
- if ((!(p & SR_IO)) && (hostdata->flags & FLAG_NCR53C400)) {
- dprintk(NDEBUG_C400_PWRITE, "53C400w: Checking for IRQ\n");
- if (NCR5380_read(BUS_AND_STATUS_REG) & BASR_IRQ) {
- dprintk(NDEBUG_C400_PWRITE, "53C400w: got it, reading reset interrupt reg\n");
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else {
- printk("53C400w: IRQ NOT THERE!\n");
- }
- }
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
*data = d + c;
*count = 0;
*phase = NCR5380_read(STATUS_REG) & PHASE_MASK;
-#if defined(PSEUDO_DMA) && defined(UNSAFE)
- spin_lock_irq(instance->host_lock);
-#endif /* defined(REAL_DMA_POLL) */
return foo;
#endif /* def REAL_DMA */
}
/*
* Function : NCR5380_information_transfer (struct Scsi_Host *instance)
*
- * Purpose : run through the various SCSI phases and do as the target
- * directs us to. Operates on the currently connected command,
- * instance->connected.
+ * Purpose : run through the various SCSI phases and do as the target
+ * directs us to. Operates on the currently connected command,
+ * instance->connected.
*
* Inputs : instance, instance for which we are doing commands
*
- * Side effects : SCSI things happen, the disconnected queue will be
- * modified if a command disconnects, *instance->connected will
- * change.
+ * Side effects : SCSI things happen, the disconnected queue will be
+ * modified if a command disconnects, *instance->connected will
+ * change.
*
- * XXX Note : we need to watch for bus free or a reset condition here
- * to recover from an unexpected bus free condition.
- *
- * Locks: io_request_lock held by caller in IRQ mode
+ * XXX Note : we need to watch for bus free or a reset condition here
+ * to recover from an unexpected bus free condition.
*/
-static void NCR5380_information_transfer(struct Scsi_Host *instance) {
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)instance->hostdata;
+static void NCR5380_information_transfer(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char msgout = NOP;
int sink = 0;
int len;
#endif
unsigned char *data;
unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
- struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
- /* RvC: we need to set the end of the polling time */
- unsigned long poll_time = jiffies + USLEEP_POLL;
+ struct scsi_cmnd *cmd;
- NCR5380_setup(instance);
+ while ((cmd = hostdata->connected)) {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
- while (1) {
tmp = NCR5380_read(STATUS_REG);
/* We only have a valid SCSI phase when REQ is asserted */
if (tmp & SR_REQ) {
if (sink && (phase != PHASE_MSGOUT)) {
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
- while (NCR5380_read(STATUS_REG) & SR_REQ);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
+ ICR_ASSERT_ACK);
+ while (NCR5380_read(STATUS_REG) & SR_REQ)
+ ;
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
+ ICR_ASSERT_ATN);
sink = 0;
continue;
}
+
switch (phase) {
- case PHASE_DATAIN:
case PHASE_DATAOUT:
#if (NDEBUG & NDEBUG_NO_DATAOUT)
- printk("scsi%d : NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n", instance->host_no);
+ shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
sink = 1;
do_abort(instance);
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
return;
#endif
- /*
+ case PHASE_DATAIN:
+ /*
* If there is no room left in the current buffer in the
* scatter-gather list, move onto the next one.
*/
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
- dprintk(NDEBUG_INFORMATION, "scsi%d : %d bytes and %d buffers left\n", instance->host_no, cmd->SCp.this_residual, cmd->SCp.buffers_residual);
+ dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+ cmd->SCp.this_residual,
+ cmd->SCp.buffers_residual);
}
+
/*
- * The preferred transfer method is going to be
+ * The preferred transfer method is going to be
* PSEUDO-DMA for systems that are strictly PIO,
* since we can let the hardware do the handshaking.
*
*/
#if defined(PSEUDO_DMA) || defined(REAL_DMA_POLL)
- /* KLL
- * PSEUDO_DMA is defined here. If this is the g_NCR5380
- * driver then it will always be defined, so the
- * FLAG_NO_PSEUDO_DMA is used to inhibit PDMA in the base
- * NCR5380 case. I think this is a fairly clean solution.
- * We supplement these 2 if's with the flag.
- */
-#ifdef NCR5380_dma_xfer_len
- if (!cmd->device->borken && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && (transfersize = NCR5380_dma_xfer_len(instance, cmd)) != 0) {
-#else
- transfersize = cmd->transfersize;
-
-#ifdef LIMIT_TRANSFERSIZE /* If we have problems with interrupt service */
- if (transfersize > 512)
- transfersize = 512;
-#endif /* LIMIT_TRANSFERSIZE */
-
- if (!cmd->device->borken && transfersize && !(hostdata->flags & FLAG_NO_PSEUDO_DMA) && cmd->SCp.this_residual && !(cmd->SCp.this_residual % transfersize)) {
- /* Limit transfers to 32K, for xx400 & xx406
- * pseudoDMA that transfers in 128 bytes blocks. */
- if (transfersize > 32 * 1024)
- transfersize = 32 * 1024;
-#endif
+ transfersize = 0;
+ if (!cmd->device->borken &&
+ !(hostdata->flags & FLAG_NO_PSEUDO_DMA))
+ transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+ if (transfersize) {
len = transfersize;
- if (NCR5380_transfer_dma(instance, &phase, &len, (unsigned char **) &cmd->SCp.ptr)) {
+ if (NCR5380_transfer_dma(instance, &phase,
+ &len, (unsigned char **)&cmd->SCp.ptr)) {
/*
- * If the watchdog timer fires, all future accesses to this
- * device will use the polled-IO.
+ * If the watchdog timer fires, all future
+ * accesses to this device will use the
+ * polled-IO.
*/
scmd_printk(KERN_INFO, cmd,
- "switching to slow handshake\n");
+ "switching to slow handshake\n");
cmd->device->borken = 1;
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
sink = 1;
do_abort(instance);
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
/* XXX - need to source or sink data here, as appropriate */
} else
cmd->SCp.this_residual -= transfersize - len;
} else
#endif /* defined(PSEUDO_DMA) || defined(REAL_DMA_POLL) */
- NCR5380_transfer_pio(instance, &phase, (int *) &cmd->SCp.this_residual, (unsigned char **)
- &cmd->SCp.ptr);
+ {
+ spin_unlock_irq(&hostdata->lock);
+ NCR5380_transfer_pio(instance, &phase,
+ (int *)&cmd->SCp.this_residual,
+ (unsigned char **)&cmd->SCp.ptr);
+ spin_lock_irq(&hostdata->lock);
+ }
break;
case PHASE_MSGIN:
len = 1;
cmd->SCp.Message = tmp;
switch (tmp) {
- /*
- * Linking lets us reduce the time required to get the
- * next command out to the device, hopefully this will
- * mean we don't waste another revolution due to the delays
- * required by ARBITRATION and another SELECTION.
- *
- * In the current implementation proposal, low level drivers
- * merely have to start the next command, pointed to by
- * next_link, done() is called as with unlinked commands.
- */
-#ifdef LINKED
- case LINKED_CMD_COMPLETE:
- case LINKED_FLG_CMD_COMPLETE:
- /* Accept message by clearing ACK */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked command complete.\n", instance->host_no, cmd->device->id, cmd->device->lun);
- /*
- * Sanity check : A linked command should only terminate with
- * one of these messages if there are more linked commands
- * available.
- */
- if (!cmd->next_link) {
- printk("scsi%d : target %d lun %llu linked command complete, no next_link\n" instance->host_no, cmd->device->id, cmd->device->lun);
- sink = 1;
- do_abort(instance);
- return;
- }
- initialize_SCp(cmd->next_link);
- /* The next command is still part of this process */
- cmd->next_link->tag = cmd->tag;
- cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
- dprintk(NDEBUG_LINKED, "scsi%d : target %d lun %llu linked request done, calling scsi_done().\n", instance->host_no, cmd->device->id, cmd->device->lun);
- cmd->scsi_done(cmd);
- cmd = hostdata->connected;
- break;
-#endif /* def LINKED */
case ABORT:
case COMMAND_COMPLETE:
/* Accept message by clearing ACK */
sink = 1;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- hostdata->connected = NULL;
- dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d, lun %llu completed\n", instance->host_no, cmd->device->id, cmd->device->lun);
- hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
-
- /*
- * I'm not sure what the correct thing to do here is :
- *
- * If the command that just executed is NOT a request
- * sense, the obvious thing to do is to set the result
- * code to the values of the stored parameters.
- *
- * If it was a REQUEST SENSE command, we need some way
- * to differentiate between the failure code of the original
- * and the failure code of the REQUEST sense - the obvious
- * case is success, where we fall through and leave the result
- * code unchanged.
- *
- * The non-obvious place is where the REQUEST SENSE failed
- */
-
- if (cmd->cmnd[0] != REQUEST_SENSE)
- cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
- else if (status_byte(cmd->SCp.Status) != GOOD)
- cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
- if ((cmd->cmnd[0] == REQUEST_SENSE) &&
- hostdata->ses.cmd_len) {
- scsi_eh_restore_cmnd(cmd, &hostdata->ses);
- hostdata->ses.cmd_len = 0 ;
- }
+ dsprintk(NDEBUG_QUEUES, instance,
+ "COMMAND COMPLETE %p target %d lun %llu\n",
+ cmd, scmd_id(cmd), cmd->device->lun);
- if ((cmd->cmnd[0] != REQUEST_SENSE) && (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
- scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
-
- dprintk(NDEBUG_AUTOSENSE, "scsi%d : performing request sense\n", instance->host_no);
+ hostdata->connected = NULL;
- LIST(cmd, hostdata->issue_queue);
- cmd->host_scribble = (unsigned char *)
- hostdata->issue_queue;
- hostdata->issue_queue = (struct scsi_cmnd *) cmd;
- dprintk(NDEBUG_QUEUES, "scsi%d : REQUEST SENSE added to head of issue queue\n", instance->host_no);
- } else {
- cmd->scsi_done(cmd);
+ cmd->result &= ~0xffff;
+ cmd->result |= cmd->SCp.Status;
+ cmd->result |= cmd->SCp.Message << 8;
+
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ complete_cmd(instance, cmd);
+ else {
+ if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+ cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+ dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+ cmd);
+ list_add_tail(&ncmd->list,
+ &hostdata->autosense);
+ } else
+ complete_cmd(instance, cmd);
}
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- /*
- * Restore phase bits to 0 so an interrupted selection,
+ /*
+ * Restore phase bits to 0 so an interrupted selection,
* arbitration can resume.
*/
NCR5380_write(TARGET_COMMAND_REG, 0);
- while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
- barrier();
+ /* Enable reselect interrupts */
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
return;
case MESSAGE_REJECT:
/* Accept message by clearing ACK */
default:
break;
}
- case DISCONNECT:{
- /* Accept message by clearing ACK */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- cmd->device->disconnect = 1;
- LIST(cmd, hostdata->disconnected_queue);
- cmd->host_scribble = (unsigned char *)
- hostdata->disconnected_queue;
- hostdata->connected = NULL;
- hostdata->disconnected_queue = cmd;
- dprintk(NDEBUG_QUEUES, "scsi%d : command for target %d lun %llu was moved from connected to" " the disconnected_queue\n", instance->host_no, cmd->device->id, cmd->device->lun);
- /*
- * Restore phase bits to 0 so an interrupted selection,
- * arbitration can resume.
- */
- NCR5380_write(TARGET_COMMAND_REG, 0);
-
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- /* Wait for bus free to avoid nasty timeouts - FIXME timeout !*/
- /* NCR538_poll_politely(instance, STATUS_REG, SR_BSY, 0, 30 * HZ); */
- while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
- barrier();
- return;
- }
- /*
+ break;
+ case DISCONNECT:
+ /* Accept message by clearing ACK */
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ hostdata->connected = NULL;
+ list_add(&ncmd->list, &hostdata->disconnected);
+ dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+ instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+ cmd, scmd_id(cmd), cmd->device->lun);
+
+ /*
+ * Restore phase bits to 0 so an interrupted selection,
+ * arbitration can resume.
+ */
+ NCR5380_write(TARGET_COMMAND_REG, 0);
+
+ /* Enable reselect interrupts */
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ return;
+ /*
* The SCSI data pointer is *IMPLICITLY* saved on a disconnect
- * operation, in violation of the SCSI spec so we can safely
+ * operation, in violation of the SCSI spec so we can safely
* ignore SAVE/RESTORE pointers calls.
*
- * Unfortunately, some disks violate the SCSI spec and
+ * Unfortunately, some disks violate the SCSI spec and
* don't issue the required SAVE_POINTERS message before
- * disconnecting, and we have to break spec to remain
+ * disconnecting, and we have to break spec to remain
* compatible.
*/
case SAVE_POINTERS:
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
break;
case EXTENDED_MESSAGE:
-/*
- * Extended messages are sent in the following format :
- * Byte
- * 0 EXTENDED_MESSAGE == 1
- * 1 length (includes one byte for code, doesn't
- * include first two bytes)
- * 2 code
- * 3..length+1 arguments
- *
- * Start the extended message buffer with the EXTENDED_MESSAGE
- * byte, since spi_print_msg() wants the whole thing.
- */
+ /*
+ * Start the message buffer with the EXTENDED_MESSAGE
+ * byte, since spi_print_msg() wants the whole thing.
+ */
extended_msg[0] = EXTENDED_MESSAGE;
/* Accept first byte by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_EXTENDED, "scsi%d : receiving extended message\n", instance->host_no);
+
+ spin_unlock_irq(&hostdata->lock);
+
+ dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
len = 2;
data = extended_msg + 1;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
+ dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+ (int)extended_msg[1],
+ (int)extended_msg[2]);
- dprintk(NDEBUG_EXTENDED, "scsi%d : length=%d, code=0x%02x\n", instance->host_no, (int) extended_msg[1], (int) extended_msg[2]);
-
- if (!len && extended_msg[1] <= (sizeof(extended_msg) - 1)) {
+ if (!len && extended_msg[1] > 0 &&
+ extended_msg[1] <= sizeof(extended_msg) - 2) {
/* Accept third byte by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
len = extended_msg[1] - 1;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_EXTENDED, "scsi%d : message received, residual %d\n", instance->host_no, len);
+ dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+ len);
switch (extended_msg[2]) {
case EXTENDED_SDTR:
tmp = 0;
}
} else if (len) {
- printk("scsi%d: error receiving extended message\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "error receiving extended message\n");
tmp = 0;
} else {
- printk("scsi%d: extended message code %02x length %d is too long\n", instance->host_no, extended_msg[2], extended_msg[1]);
+ shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+ extended_msg[2], extended_msg[1]);
tmp = 0;
}
+
+ spin_lock_irq(&hostdata->lock);
+ if (!hostdata->connected)
+ return;
+
/* Fall through to reject message */
- /*
- * If we get something weird that we aren't expecting,
+ /*
+ * If we get something weird that we aren't expecting,
* reject it.
*/
default:
if (!tmp) {
- printk("scsi%d: rejecting message ", instance->host_no);
+ shost_printk(KERN_ERR, instance, "rejecting message ");
spi_print_msg(extended_msg);
printk("\n");
} else if (tmp != EXTENDED_MESSAGE)
scmd_printk(KERN_INFO, cmd,
- "rejecting unknown message %02x\n",tmp);
+ "rejecting unknown message %02x\n",
+ tmp);
else
scmd_printk(KERN_INFO, cmd,
- "rejecting unknown extended message code %02x, length %d\n", extended_msg[1], extended_msg[0]);
+ "rejecting unknown extended message code %02x, length %d\n",
+ extended_msg[1], extended_msg[0]);
msgout = MESSAGE_REJECT;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
break;
- } /* switch (tmp) */
+ } /* switch (tmp) */
break;
case PHASE_MSGOUT:
len = 1;
hostdata->last_message = msgout;
NCR5380_transfer_pio(instance, &phase, &len, &data);
if (msgout == ABORT) {
- hostdata->busy[cmd->device->id] &= ~(1 << (cmd->device->lun & 0xFF));
hostdata->connected = NULL;
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
return;
}
case PHASE_CMDOUT:
len = cmd->cmd_len;
data = cmd->cmnd;
- /*
- * XXX for performance reasons, on machines with a
- * PSEUDO-DMA architecture we should probably
- * use the dma transfer function.
+ /*
+ * XXX for performance reasons, on machines with a
+ * PSEUDO-DMA architecture we should probably
+ * use the dma transfer function.
*/
NCR5380_transfer_pio(instance, &phase, &len, &data);
- if (!cmd->device->disconnect && should_disconnect(cmd->cmnd[0])) {
- NCR5380_set_timer(hostdata, USLEEP_SLEEP);
- dprintk(NDEBUG_USLEEP, "scsi%d : issued command, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
- return;
- }
break;
case PHASE_STATIN:
len = 1;
cmd->SCp.Status = tmp;
break;
default:
- printk("scsi%d : unknown phase\n", instance->host_no);
+ shost_printk(KERN_ERR, instance, "unknown phase\n");
NCR5380_dprint(NDEBUG_ANY, instance);
- } /* switch(phase) */
- } /* if (tmp * SR_REQ) */
- else {
- /* RvC: go to sleep if polling time expired
- */
- if (!cmd->device->disconnect && time_after_eq(jiffies, poll_time)) {
- NCR5380_set_timer(hostdata, USLEEP_SLEEP);
- dprintk(NDEBUG_USLEEP, "scsi%d : poll timed out, sleeping until %lu\n", instance->host_no, hostdata->time_expires);
- return;
- }
+ } /* switch(phase) */
+ } else {
+ spin_unlock_irq(&hostdata->lock);
+ NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+ spin_lock_irq(&hostdata->lock);
}
- } /* while (1) */
+ }
}
/*
* Function : void NCR5380_reselect (struct Scsi_Host *instance)
*
- * Purpose : does reselection, initializing the instance->connected
- * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- * nexus has been reestablished,
- *
- * Inputs : instance - this instance of the NCR5380.
+ * Purpose : does reselection, initializing the instance->connected
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
*
- * Locks: io_request_lock held by caller if IRQ driven
+ * Inputs : instance - this instance of the NCR5380.
*/
-static void NCR5380_reselect(struct Scsi_Host *instance) {
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *)
- instance->hostdata;
+static void NCR5380_reselect(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char target_mask;
unsigned char lun, phase;
int len;
unsigned char msg[3];
unsigned char *data;
- struct scsi_cmnd *tmp = NULL, *prev;
- int abort = 0;
- NCR5380_setup(instance);
+ struct NCR5380_cmd *ncmd;
+ struct scsi_cmnd *tmp;
/*
* Disable arbitration, etc. since the host adapter obviously
*/
NCR5380_write(MODE_REG, MR_BASE);
- hostdata->restart_select = 1;
target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
- dprintk(NDEBUG_SELECTION, "scsi%d : reselect\n", instance->host_no);
- /*
+ dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
+
+ /*
* At this point, we have detected that our SCSI ID is on the bus,
* SEL is true and BSY was false for at least one bus settle delay
* (400 ns).
*/
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
- /* FIXME: timeout too long, must fail to workqueue */
- if(NCR5380_poll_politely(instance, STATUS_REG, SR_SEL, 0, 2*HZ)<0)
- abort = 1;
-
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ return;
+ }
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
/*
* Wait for target to go into MSGIN.
- * FIXME: timeout needed and fail to work queeu
*/
- if(NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 2*HZ))
- abort = 1;
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+ do_abort(instance);
+ return;
+ }
len = 1;
data = msg;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
+ if (len) {
+ do_abort(instance);
+ return;
+ }
+
if (!(msg[0] & 0x80)) {
- printk(KERN_ERR "scsi%d : expecting IDENTIFY message, got ", instance->host_no);
+ shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
spi_print_msg(msg);
- abort = 1;
- } else {
- /* Accept message by clearing ACK */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- lun = (msg[0] & 0x07);
+ printk("\n");
+ do_abort(instance);
+ return;
+ }
+ lun = msg[0] & 0x07;
- /*
- * We need to add code for SCSI-II to track which devices have
- * I_T_L_Q nexuses established, and which have simple I_T_L
- * nexuses so we can chose to do additional data transfer.
- */
+ /*
+ * We need to add code for SCSI-II to track which devices have
+ * I_T_L_Q nexuses established, and which have simple I_T_L
+ * nexuses so we can chose to do additional data transfer.
+ */
- /*
- * Find the command corresponding to the I_T_L or I_T_L_Q nexus we
- * just reestablished, and remove it from the disconnected queue.
- */
+ /*
+ * Find the command corresponding to the I_T_L or I_T_L_Q nexus we
+ * just reestablished, and remove it from the disconnected queue.
+ */
+ tmp = NULL;
+ list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
- for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL; tmp; prev = tmp, tmp = (struct scsi_cmnd *) tmp->host_scribble)
- if ((target_mask == (1 << tmp->device->id)) && (lun == (u8)tmp->device->lun)
- ) {
- if (prev) {
- REMOVE(prev, prev->host_scribble, tmp, tmp->host_scribble);
- prev->host_scribble = tmp->host_scribble;
- } else {
- REMOVE(-1, hostdata->disconnected_queue, tmp, tmp->host_scribble);
- hostdata->disconnected_queue = (struct scsi_cmnd *) tmp->host_scribble;
- }
- tmp->host_scribble = NULL;
- break;
- }
- if (!tmp) {
- printk(KERN_ERR "scsi%d : warning : target bitmask %02x lun %d not in disconnect_queue.\n", instance->host_no, target_mask, lun);
- /*
- * Since we have an established nexus that we can't do anything with,
- * we must abort it.
- */
- abort = 1;
+ if (target_mask == (1 << scmd_id(cmd)) &&
+ lun == (u8)cmd->device->lun) {
+ list_del(&ncmd->list);
+ tmp = cmd;
+ break;
}
}
- if (abort) {
- do_abort(instance);
+ if (tmp) {
+ dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+ "reselect: removed %p from disconnected queue\n", tmp);
} else {
- hostdata->connected = tmp;
- dprintk(NDEBUG_RESELECTION, "scsi%d : nexus established, target = %d, lun = %llu, tag = %d\n", instance->host_no, tmp->device->id, tmp->device->lun, tmp->tag);
+ shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+ target_mask, lun);
+ /*
+ * Since we have an established nexus that we can't do anything
+ * with, we must abort it.
+ */
+ do_abort(instance);
+ return;
}
+
+ /* Accept message by clearing ACK */
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+
+ hostdata->connected = tmp;
+ dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+ scmd_id(tmp), tmp->device->lun, tmp->tag);
}
/*
* Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
*
* Purpose : called by interrupt handler when DMA finishes or a phase
- * mismatch occurs (which would finish the DMA transfer).
+ * mismatch occurs (which would finish the DMA transfer).
*
* Inputs : instance - this instance of the NCR5380.
*
* Returns : pointer to the scsi_cmnd structure for which the I_T_L
- * nexus has been reestablished, on failure NULL is returned.
+ * nexus has been reestablished, on failure NULL is returned.
*/
#ifdef REAL_DMA
static void NCR5380_dma_complete(NCR5380_instance * instance) {
- NCR5380_local_declare();
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int transferred;
- NCR5380_setup(instance);
/*
* XXX this might not be right.
*
* Wait for final byte to transfer, ie wait for ACK to go false.
*
- * We should use the Last Byte Sent bit, unfortunately this is
+ * We should use the Last Byte Sent bit, unfortunately this is
* not available on the 5380/5381 (only the various CMOS chips)
*
* FIXME: timeout, and need to handle long timeout/irq case
NCR5380_poll_politely(instance, BUS_AND_STATUS_REG, BASR_ACK, 0, 5*HZ);
- NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
/*
}
#endif /* def REAL_DMA */
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- * host byte of the result field to, if zero DID_ABORTED is
- * used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- * XXX - there is no way to abort the command that is currently
- * connected, you have to wait for it to complete. If this is
- * a problem, we could implement longjmp() / setjmp(), setjmp()
- * called where the loop started in NCR5380_main().
- *
- * Locks: host lock taken by caller
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
*/
-static int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+ struct scsi_cmnd *needle)
{
- NCR5380_local_declare();
- struct Scsi_Host *instance = cmd->device->host;
- struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
- struct scsi_cmnd *tmp, **prev;
+ struct NCR5380_cmd *ncmd;
- scmd_printk(KERN_WARNING, cmd, "aborting command\n");
+ list_for_each_entry(ncmd, haystack, list)
+ if (NCR5380_to_scmd(ncmd) == needle)
+ return true;
+ return false;
+}
- NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
- NCR5380_setup(instance);
+static bool list_del_cmd(struct list_head *haystack,
+ struct scsi_cmnd *needle)
+{
+ if (list_find_cmd(haystack, needle)) {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
- dprintk(NDEBUG_ABORT, "scsi%d : abort called\n", instance->host_no);
- dprintk(NDEBUG_ABORT, " basr 0x%X, sr 0x%X\n", NCR5380_read(BUS_AND_STATUS_REG), NCR5380_read(STATUS_REG));
+ list_del(&ncmd->list);
+ return true;
+ }
+ return false;
+}
-#if 0
-/*
- * Case 1 : If the command is the currently executing command,
- * we'll set the aborted flag and return control so that
- * information transfer routine can exit cleanly.
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
*/
- if (hostdata->connected == cmd) {
- dprintk(NDEBUG_ABORT, "scsi%d : aborting connected command\n", instance->host_no);
- hostdata->aborted = 1;
-/*
- * We should perform BSY checking, and make sure we haven't slipped
- * into BUS FREE.
- */
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ unsigned long flags;
+ int result = SUCCESS;
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN);
-/*
- * Since we can't change phases until we've completed the current
- * handshake, we have to source or sink a byte of data if the current
- * phase is not MSGOUT.
- */
+ spin_lock_irqsave(&hostdata->lock, flags);
-/*
- * Return control to the executing NCR drive so we can clear the
- * aborted flag and get back into our main loop.
- */
+#if (NDEBUG & NDEBUG_ANY)
+ scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+ NCR5380_dprint(NDEBUG_ANY, instance);
+ NCR5380_dprint_phase(NDEBUG_ANY, instance);
- return SUCCESS;
+ if (list_del_cmd(&hostdata->unissued, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from issue queue\n", cmd);
+ cmd->result = DID_ABORT << 16;
+ cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
}
-#endif
-/*
- * Case 2 : If the command hasn't been issued yet, we simply remove it
- * from the issue queue.
- */
-
- dprintk(NDEBUG_ABORT, "scsi%d : abort going into loop.\n", instance->host_no);
- for (prev = (struct scsi_cmnd **) &(hostdata->issue_queue), tmp = (struct scsi_cmnd *) hostdata->issue_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
- if (cmd == tmp) {
- REMOVE(5, *prev, tmp, tmp->host_scribble);
- (*prev) = (struct scsi_cmnd *) tmp->host_scribble;
- tmp->host_scribble = NULL;
- tmp->result = DID_ABORT << 16;
- dprintk(NDEBUG_ABORT, "scsi%d : abort removed command from issue queue.\n", instance->host_no);
- tmp->scsi_done(tmp);
- return SUCCESS;
+ if (hostdata->selecting == cmd) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: cmd %p == selecting\n", cmd);
+ hostdata->selecting = NULL;
+ cmd->result = DID_ABORT << 16;
+ complete_cmd(instance, cmd);
+ goto out;
+ }
+
+ if (list_del_cmd(&hostdata->disconnected, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from disconnected list\n", cmd);
+ cmd->result = DID_ERROR << 16;
+ if (!hostdata->connected)
+ NCR5380_select(instance, cmd);
+ if (hostdata->connected != cmd) {
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
+ }
+ }
+
+ if (hostdata->connected == cmd) {
+ dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+ hostdata->connected = NULL;
+ if (do_abort(instance)) {
+ set_host_byte(cmd, DID_ERROR);
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
}
-#if (NDEBUG & NDEBUG_ABORT)
- /* KLL */
- else if (prev == tmp)
- printk(KERN_ERR "scsi%d : LOOP\n", instance->host_no);
+ set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
#endif
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ complete_cmd(instance, cmd);
+ else {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
-/*
- * Case 3 : If any commands are connected, we're going to fail the abort
- * and let the high level SCSI driver retry at a later time or
- * issue a reset.
- *
- * Timeouts, and therefore aborted commands, will be highly unlikely
- * and handling them cleanly in this situation would make the common
- * case of noresets less efficient, and would pollute our code. So,
- * we fail.
- */
+ /* Perform autosense for this command */
+ list_add(&ncmd->list, &hostdata->autosense);
+ }
+ }
- if (hostdata->connected) {
- dprintk(NDEBUG_ABORT, "scsi%d : abort failed, command connected.\n", instance->host_no);
- return FAILED;
+ if (list_find_cmd(&hostdata->autosense, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: found %p on sense queue\n", cmd);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ msleep(1000);
+ spin_lock_irqsave(&hostdata->lock, flags);
+ if (list_del_cmd(&hostdata->autosense, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from sense queue\n", cmd);
+ set_host_byte(cmd, DID_ABORT);
+ complete_cmd(instance, cmd);
+ goto out;
+ }
}
-/*
- * Case 4: If the command is currently disconnected from the bus, and
- * there are no connected commands, we reconnect the I_T_L or
- * I_T_L_Q nexus associated with it, go into message out, and send
- * an abort message.
- *
- * This case is especially ugly. In order to reestablish the nexus, we
- * need to call NCR5380_select(). The easiest way to implement this
- * function was to abort if the bus was busy, and let the interrupt
- * handler triggered on the SEL for reselect take care of lost arbitrations
- * where necessary, meaning interrupts need to be enabled.
- *
- * When interrupts are enabled, the queues may change - so we
- * can't remove it from the disconnected queue before selecting it
- * because that could cause a failure in hashing the nexus if that
- * device reselected.
- *
- * Since the queues may change, we can't use the pointers from when we
- * first locate it.
- *
- * So, we must first locate the command, and if NCR5380_select()
- * succeeds, then issue the abort, relocate the command and remove
- * it from the disconnected queue.
- */
- for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; tmp = (struct scsi_cmnd *) tmp->host_scribble)
- if (cmd == tmp) {
- dprintk(NDEBUG_ABORT, "scsi%d : aborting disconnected command.\n", instance->host_no);
+ if (hostdata->connected == cmd) {
+ dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+ hostdata->connected = NULL;
+ if (do_abort(instance)) {
+ set_host_byte(cmd, DID_ERROR);
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
+ }
+ set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
+#endif
+ complete_cmd(instance, cmd);
+ }
- if (NCR5380_select(instance, cmd))
- return FAILED;
- dprintk(NDEBUG_ABORT, "scsi%d : nexus reestablished.\n", instance->host_no);
+out:
+ if (result == FAILED)
+ dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+ else
+ dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
- do_abort(instance);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
- for (prev = (struct scsi_cmnd **) &(hostdata->disconnected_queue), tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp; prev = (struct scsi_cmnd **) &(tmp->host_scribble), tmp = (struct scsi_cmnd *) tmp->host_scribble)
- if (cmd == tmp) {
- REMOVE(5, *prev, tmp, tmp->host_scribble);
- *prev = (struct scsi_cmnd *) tmp->host_scribble;
- tmp->host_scribble = NULL;
- tmp->result = DID_ABORT << 16;
- tmp->scsi_done(tmp);
- return SUCCESS;
- }
- }
-/*
- * Case 5 : If we reached this point, the command was not found in any of
- * the queues.
- *
- * We probably reached this point because of an unlikely race condition
- * between the command completing successfully and the abortion code,
- * so we won't panic, but we will notify the user in case something really
- * broke.
- */
- printk(KERN_WARNING "scsi%d : warning : SCSI command probably completed successfully\n"
- " before abortion\n", instance->host_no);
- return FAILED;
+ return result;
}
-/*
- * Function : int NCR5380_bus_reset (struct scsi_cmnd *cmd)
- *
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
*
- * Locks: host lock taken by caller
+ * Returns SUCCESS
*/
static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
{
struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int i;
+ unsigned long flags;
+ struct NCR5380_cmd *ncmd;
- NCR5380_local_declare();
- NCR5380_setup(instance);
- NCR5380_print_status(instance);
+ spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+ scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+ NCR5380_dprint(NDEBUG_ANY, instance);
+ NCR5380_dprint_phase(NDEBUG_ANY, instance);
- spin_lock_irq(instance->host_lock);
do_reset(instance);
- spin_unlock_irq(instance->host_lock);
+
+ /* reset NCR registers */
+ NCR5380_write(MODE_REG, MR_BASE);
+ NCR5380_write(TARGET_COMMAND_REG, 0);
+ NCR5380_write(SELECT_ENABLE_REG, 0);
+
+ /* After the reset, there are no more connected or disconnected commands
+ * and no busy units; so clear the low-level status here to avoid
+ * conflicts when the mid-level code tries to wake up the affected
+ * commands!
+ */
+
+ hostdata->selecting = NULL;
+
+ list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ set_host_byte(cmd, DID_RESET);
+ cmd->scsi_done(cmd);
+ }
+
+ list_for_each_entry(ncmd, &hostdata->autosense, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ set_host_byte(cmd, DID_RESET);
+ cmd->scsi_done(cmd);
+ }
+
+ if (hostdata->connected) {
+ set_host_byte(hostdata->connected, DID_RESET);
+ complete_cmd(instance, hostdata->connected);
+ hostdata->connected = NULL;
+ }
+
+ if (hostdata->sensing) {
+ set_host_byte(hostdata->connected, DID_RESET);
+ complete_cmd(instance, hostdata->sensing);
+ hostdata->sensing = NULL;
+ }
+
+ for (i = 0; i < 8; ++i)
+ hostdata->busy[i] = 0;
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
+#endif
+
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
return SUCCESS;
}
#ifndef NCR5380_H
#define NCR5380_H
+#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <scsi/scsi_dbg.h>
#include <scsi/scsi_eh.h>
+#include <scsi/scsi_transport_spi.h>
#define NDEBUG_ARBITRATION 0x1
#define NDEBUG_AUTOSENSE 0x2
/* Write any value to this register to start an ini mode DMA receive */
#define START_DMA_INITIATOR_RECEIVE_REG 7 /* wo */
-#define C400_CONTROL_STATUS_REG NCR53C400_register_offset-8 /* rw */
-
+/* NCR 53C400(A) Control Status Register bits: */
#define CSR_RESET 0x80 /* wo Resets 53c400 */
#define CSR_53C80_REG 0x80 /* ro 5380 registers busy */
#define CSR_TRANS_DIR 0x40 /* rw Data transfer direction */
#define CSR_BASE CSR_53C80_INTR
#endif
-/* Number of 128-byte blocks to be transferred */
-#define C400_BLOCK_COUNTER_REG NCR53C400_register_offset-7 /* rw */
-
-/* Resume transfer after disconnect */
-#define C400_RESUME_TRANSFER_REG NCR53C400_register_offset-6 /* wo */
-
-/* Access to host buffer stack */
-#define C400_HOST_BUFFER NCR53C400_register_offset-4 /* rw */
-
-
/* Note : PHASE_* macros are based on the values of the STATUS register */
#define PHASE_MASK (SR_MSG | SR_CD | SR_IO)
#define PHASE_SR_TO_TCR(phase) ((phase) >> 2)
-/*
- * The internal should_disconnect() function returns these based on the
- * expected length of a disconnect if a device supports disconnect/
- * reconnect.
- */
-
-#define DISCONNECT_NONE 0
-#define DISCONNECT_TIME_TO_DATA 1
-#define DISCONNECT_LONG 2
-
/*
* "Special" value for the (unsigned char) command tag, to indicate
* I_T_L nexus instead of I_T_L_Q.
#define NO_IRQ 0
#endif
-#define FLAG_HAS_LAST_BYTE_SENT 1 /* NCR53c81 or better */
-#define FLAG_CHECK_LAST_BYTE_SENT 2 /* Only test once */
-#define FLAG_NCR53C400 4 /* NCR53c400 */
+#define FLAG_NO_DMA_FIXUP 1 /* No DMA errata workarounds */
#define FLAG_NO_PSEUDO_DMA 8 /* Inhibit DMA */
-#define FLAG_DTC3181E 16 /* DTC3181E */
#define FLAG_LATE_DMA_SETUP 32 /* Setup NCR before DMA H/W */
#define FLAG_TAGGED_QUEUING 64 /* as X3T9.2 spelled it */
-
-#ifndef ASM
+#define FLAG_TOSHIBA_DELAY 128 /* Allow for borken CD-ROMs */
#ifdef SUPPORT_TAGS
struct tag_alloc {
NCR5380_implementation_fields; /* implementation specific */
struct Scsi_Host *host; /* Host backpointer */
unsigned char id_mask, id_higher_mask; /* 1 << id, all bits greater */
- unsigned char targets_present; /* targets we have connected
- to, so we can call a select
- failure a retryable condition */
- volatile unsigned char busy[8]; /* index = target, bit = lun */
+ unsigned char busy[8]; /* index = target, bit = lun */
#if defined(REAL_DMA) || defined(REAL_DMA_POLL)
- volatile int dma_len; /* requested length of DMA */
+ int dma_len; /* requested length of DMA */
#endif
- volatile unsigned char last_message; /* last message OUT */
- volatile struct scsi_cmnd *connected; /* currently connected command */
- volatile struct scsi_cmnd *issue_queue; /* waiting to be issued */
- volatile struct scsi_cmnd *disconnected_queue; /* waiting for reconnect */
- volatile int restart_select; /* we have disconnected,
- used to restart
- NCR5380_select() */
- volatile unsigned aborted:1; /* flag, says aborted */
+ unsigned char last_message; /* last message OUT */
+ struct scsi_cmnd *connected; /* currently connected cmnd */
+ struct scsi_cmnd *selecting; /* cmnd to be connected */
+ struct list_head unissued; /* waiting to be issued */
+ struct list_head autosense; /* priority issue queue */
+ struct list_head disconnected; /* waiting for reconnect */
+ spinlock_t lock; /* protects this struct */
int flags;
- unsigned long time_expires; /* in jiffies, set prior to sleeping */
- int select_time; /* timer in select for target response */
- volatile struct scsi_cmnd *selecting;
- struct delayed_work coroutine; /* our co-routine */
struct scsi_eh_save ses;
+ struct scsi_cmnd *sensing;
char info[256];
int read_overruns; /* number of bytes to cut from a
* transfer to handle chip overruns */
- int retain_dma_intr;
struct work_struct main_task;
- volatile int main_running;
#ifdef SUPPORT_TAGS
struct tag_alloc TagAlloc[8][8]; /* 8 targets and 8 LUNs */
#endif
unsigned spin_max_r;
unsigned spin_max_w;
#endif
+ struct workqueue_struct *work_q;
+ unsigned long accesses_per_ms; /* chip register accesses per ms */
};
#ifdef __KERNEL__
+struct NCR5380_cmd {
+ struct list_head list;
+};
+
+#define NCR5380_CMD_SIZE (sizeof(struct NCR5380_cmd))
+
+static inline struct scsi_cmnd *NCR5380_to_scmd(struct NCR5380_cmd *ncmd_ptr)
+{
+ return ((struct scsi_cmnd *)ncmd_ptr) - 1;
+}
+
#ifndef NDEBUG
#define NDEBUG (0)
#endif
do { if ((NDEBUG) & (flg)) \
printk(KERN_DEBUG fmt, ## __VA_ARGS__); } while (0)
+#define dsprintk(flg, host, fmt, ...) \
+ do { if ((NDEBUG) & (flg)) \
+ shost_printk(KERN_DEBUG, host, fmt, ## __VA_ARGS__); \
+ } while (0)
+
#if NDEBUG
#define NCR5380_dprint(flg, arg) \
do { if ((NDEBUG) & (flg)) NCR5380_print(arg); } while (0)
static int NCR5380_probe_irq(struct Scsi_Host *instance, int possible);
#endif
static int NCR5380_init(struct Scsi_Host *instance, int flags);
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *);
static void NCR5380_exit(struct Scsi_Host *instance);
static void NCR5380_information_transfer(struct Scsi_Host *instance);
#ifndef DONT_USE_INTR
static void NCR5380_main(struct work_struct *work);
static const char *NCR5380_info(struct Scsi_Host *instance);
static void NCR5380_reselect(struct Scsi_Host *instance);
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd);
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *);
#if defined(PSEUDO_DMA) || defined(REAL_DMA) || defined(REAL_DMA_POLL)
static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data);
#endif
#endif /* defined(i386) || defined(__alpha__) */
#endif /* defined(REAL_DMA) */
#endif /* __KERNEL__ */
-#endif /* ndef ASM */
#endif /* NCR5380_H */
* Copyright 1995-2002, Russell King
*/
#include <linux/module.h>
-#include <linux/signal.h>
#include <linux/ioport.h>
-#include <linux/delay.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <scsi/scsi_host.h>
-#include <scsi/scsicam.h>
-
#define PSEUDO_DMA
#define priv(host) ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare() struct Scsi_Host *_instance
-#define NCR5380_setup(instance) _instance = instance
-#define NCR5380_read(reg) cumanascsi_read(_instance, reg)
-#define NCR5380_write(reg, value) cumanascsi_write(_instance, reg, value)
+#define NCR5380_read(reg) cumanascsi_read(instance, reg)
+#define NCR5380_write(reg, value) cumanascsi_write(instance, reg, value)
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
+
#define NCR5380_intr cumanascsi_intr
#define NCR5380_queue_command cumanascsi_queue_command
#define NCR5380_info cumanascsi_info
.cmd_per_lun = 2,
.use_clustering = DISABLE_CLUSTERING,
.proc_name = "CumanaSCSI-1",
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
static int cumanascsi1_probe(struct expansion_card *ec,
host->irq = ec->irq;
- NCR5380_init(host, 0);
+ ret = NCR5380_init(host, 0);
+ if (ret)
+ goto out_unmap;
+
+ NCR5380_maybe_reset_bus(host);
priv(host)->ctrl = 0;
writeb(0, priv(host)->base + CTRL);
- host->n_io_port = 255;
- if (!(request_region(host->io_port, host->n_io_port, "CumanaSCSI-1"))) {
- ret = -EBUSY;
- goto out_unmap;
- }
-
ret = request_irq(host->irq, cumanascsi_intr, 0,
"CumanaSCSI-1", host);
if (ret) {
printk("scsi%d: IRQ%d not free: %d\n",
host->host_no, host->irq, ret);
- goto out_unmap;
+ goto out_exit;
}
ret = scsi_add_host(host, &ec->dev);
out_free_irq:
free_irq(host->irq, host);
+ out_exit:
+ NCR5380_exit(host);
out_unmap:
iounmap(priv(host)->base);
iounmap(priv(host)->dma);
*/
#include <linux/module.h>
-#include <linux/signal.h>
#include <linux/ioport.h>
-#include <linux/delay.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#define DONT_USE_INTR
#define priv(host) ((struct NCR5380_hostdata *)(host)->hostdata)
-#define NCR5380_local_declare() void __iomem *_base
-#define NCR5380_setup(host) _base = priv(host)->base
-#define NCR5380_read(reg) readb(_base + ((reg) << 2))
-#define NCR5380_write(reg, value) writeb(value, _base + ((reg) << 2))
+#define NCR5380_read(reg) \
+ readb(priv(instance)->base + ((reg) << 2))
+#define NCR5380_write(reg, value) \
+ writeb(value, priv(instance)->base + ((reg) << 2))
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
+
#define NCR5380_queue_command oakscsi_queue_command
#define NCR5380_info oakscsi_info
-#define NCR5380_show_info oakscsi_show_info
#define NCR5380_implementation_fields \
void __iomem *base
static struct scsi_host_template oakscsi_template = {
.module = THIS_MODULE,
- .show_info = oakscsi_show_info,
.name = "Oak 16-bit SCSI",
.info = oakscsi_info,
.queuecommand = oakscsi_queue_command,
.cmd_per_lun = 2,
.use_clustering = DISABLE_CLUSTERING,
.proc_name = "oakscsi",
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
static int oakscsi_probe(struct expansion_card *ec, const struct ecard_id *id)
host->irq = NO_IRQ;
host->n_io_port = 255;
- NCR5380_init(host, 0);
+ ret = NCR5380_init(host, 0);
+ if (ret)
+ goto out_unmap;
+
+ NCR5380_maybe_reset_bus(host);
ret = scsi_add_host(host, &ec->dev);
if (ret)
- goto out_unmap;
+ goto out_exit;
scsi_scan_host(host);
goto out;
+ out_exit:
+ NCR5380_exit(host);
out_unmap:
iounmap(priv(host)->base);
unreg:
/*
* NCR 5380 generic driver routines. These should make it *trivial*
- * to implement 5380 SCSI drivers under Linux with a non-trantor
- * architecture.
+ * to implement 5380 SCSI drivers under Linux with a non-trantor
+ * architecture.
*
- * Note that these routines also work with NR53c400 family chips.
+ * Note that these routines also work with NR53c400 family chips.
*
* Copyright 1993, Drew Eckhardt
- * Visionary Computing
- * (Unix and Linux consulting and custom programming)
- * drew@colorado.edu
- * +1 (303) 666-5836
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * drew@colorado.edu
+ * +1 (303) 666-5836
*
* For more information, please consult
*
* 1+ (800) 334-5454
*/
-/*
- * ++roman: To port the 5380 driver to the Atari, I had to do some changes in
- * this file, too:
- *
- * - Some of the debug statements were incorrect (undefined variables and the
- * like). I fixed that.
- *
- * - In information_transfer(), I think a #ifdef was wrong. Looking at the
- * possible DMA transfer size should also happen for REAL_DMA. I added this
- * in the #if statement.
- *
- * - When using real DMA, information_transfer() should return in a DATAOUT
- * phase after starting the DMA. It has nothing more to do.
- *
- * - The interrupt service routine should run main after end of DMA, too (not
- * only after RESELECTION interrupts). Additionally, it should _not_ test
- * for more interrupts after running main, since a DMA process may have
- * been started and interrupts are turned on now. The new int could happen
- * inside the execution of NCR5380_intr(), leading to recursive
- * calls.
- *
- * - I've added a function merge_contiguous_buffers() that tries to
- * merge scatter-gather buffers that are located at contiguous
- * physical addresses and can be processed with the same DMA setup.
- * Since most scatter-gather operations work on a page (4K) of
- * 4 buffers (1K), in more than 90% of all cases three interrupts and
- * DMA setup actions are saved.
- *
- * - I've deleted all the stuff for AUTOPROBE_IRQ, REAL_DMA_POLL, PSEUDO_DMA
- * and USLEEP, because these were messing up readability and will never be
- * needed for Atari SCSI.
- *
- * - I've revised the NCR5380_main() calling scheme (relax the 'main_running'
- * stuff), and 'main' is executed in a bottom half if awoken by an
- * interrupt.
- *
- * - The code was quite cluttered up by "#if (NDEBUG & NDEBUG_*) printk..."
- * constructs. In my eyes, this made the source rather unreadable, so I
- * finally replaced that by the *_PRINTK() macros.
- *
- */
-
-/*
- * Further development / testing that should be done :
- * 1. Test linked command handling code after Eric is ready with
- * the high level code.
- */
+/* Ported to Atari by Roman Hodek and others. */
/* Adapted for the sun3 by Sam Creasey. */
-#include <scsi/scsi_dbg.h>
-#include <scsi/scsi_transport_spi.h>
-
-#if (NDEBUG & NDEBUG_LISTS)
-#define LIST(x, y) \
- do { \
- printk("LINE:%d Adding %p to %p\n", \
- __LINE__, (void*)(x), (void*)(y)); \
- if ((x) == (y)) \
- udelay(5); \
- } while (0)
-#define REMOVE(w, x, y, z) \
- do { \
- printk("LINE:%d Removing: %p->%p %p->%p \n", \
- __LINE__, (void*)(w), (void*)(x), \
- (void*)(y), (void*)(z)); \
- if ((x) == (y)) \
- udelay(5); \
- } while (0)
-#else
-#define LIST(x,y)
-#define REMOVE(w,x,y,z)
-#endif
-
-#ifndef notyet
-#undef LINKED
-#endif
-
/*
* Design
*
* piece of hardware that requires you to sit in a loop polling for
* the REQ signal as long as you are connected. Some devices are
* brain dead (ie, many TEXEL CD ROM drives) and won't disconnect
- * while doing long seek operations.
- *
- * The workaround for this is to keep track of devices that have
- * disconnected. If the device hasn't disconnected, for commands that
- * should disconnect, we do something like
- *
- * while (!REQ is asserted) { sleep for N usecs; poll for M usecs }
- *
- * Some tweaking of N and M needs to be done. An algorithm based
- * on "time to data" would give the best results as long as short time
- * to datas (ie, on the same track) were considered, however these
+ * while doing long seek operations. [...] These
* broken devices are the exception rather than the rule and I'd rather
* spend my time optimizing for the normal case.
*
*
* These macros control options :
* AUTOSENSE - if defined, REQUEST SENSE will be performed automatically
- * for commands that return with a CHECK CONDITION status.
+ * for commands that return with a CHECK CONDITION status.
*
* DIFFERENTIAL - if defined, NCR53c81 chips will use external differential
- * transceivers.
- *
- * LINKED - if defined, linked commands are supported.
+ * transceivers.
*
* REAL_DMA - if defined, REAL DMA is used during the data transfer phases.
*
* NCR5380_write(register, value) - write to the specific register
*
* NCR5380_implementation_fields - additional fields needed for this
- * specific implementation of the NCR5380
+ * specific implementation of the NCR5380
*
* Either real DMA *or* pseudo DMA may be implemented
* REAL functions :
* NCR5380_REAL_DMA should be defined if real DMA is to be used.
* Note that the DMA setup functions should return the number of bytes
- * that they were able to program the controller for.
+ * that they were able to program the controller for.
*
* Also note that generic i386/PC versions of these macros are
- * available as NCR5380_i386_dma_write_setup,
- * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
+ * available as NCR5380_i386_dma_write_setup,
+ * NCR5380_i386_dma_read_setup, and NCR5380_i386_dma_residual.
*
* NCR5380_dma_write_setup(instance, src, count) - initialize
* NCR5380_dma_read_setup(instance, dst, count) - initialize
* possible) function may be used.
*/
-/* Macros ease life... :-) */
-#define SETUP_HOSTDATA(in) \
- struct NCR5380_hostdata *hostdata = \
- (struct NCR5380_hostdata *)(in)->hostdata
-#define HOSTDATA(in) ((struct NCR5380_hostdata *)(in)->hostdata)
-
-#define NEXT(cmd) ((struct scsi_cmnd *)(cmd)->host_scribble)
-#define SET_NEXT(cmd,next) ((cmd)->host_scribble = (void *)(next))
-#define NEXTADDR(cmd) ((struct scsi_cmnd **)&(cmd)->host_scribble)
-
-#define HOSTNO instance->host_no
-#define H_NO(cmd) (cmd)->device->host->host_no
+static int do_abort(struct Scsi_Host *);
+static void do_reset(struct Scsi_Host *);
#ifdef SUPPORT_TAGS
* cannot know it in advance :-( We just see a QUEUE_FULL status being
* returned. So, in this case, the driver internal queue size assumption is
* reduced to the number of active tags if QUEUE_FULL is returned by the
- * target. The command is returned to the mid-level, but with status changed
- * to BUSY, since --as I've seen-- the mid-level can't handle QUEUE_FULL
- * correctly.
+ * target.
*
* We're also not allowed running tagged commands as long as an untagged
* command is active. And REQUEST SENSE commands after a contingent allegiance
static int is_lun_busy(struct scsi_cmnd *cmd, int should_be_tagged)
{
u8 lun = cmd->device->lun;
- SETUP_HOSTDATA(cmd->device->host);
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
if (hostdata->busy[cmd->device->id] & (1 << lun))
return 1;
return 0;
if (hostdata->TagAlloc[scmd_id(cmd)][lun].nr_allocated >=
hostdata->TagAlloc[scmd_id(cmd)][lun].queue_size) {
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d: no free tags\n",
- H_NO(cmd), cmd->device->id, lun);
+ dsprintk(NDEBUG_TAGS, instance, "target %d lun %d: no free tags\n",
+ scmd_id(cmd), lun);
return 1;
}
return 0;
static void cmd_get_tag(struct scsi_cmnd *cmd, int should_be_tagged)
{
u8 lun = cmd->device->lun;
- SETUP_HOSTDATA(cmd->device->host);
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
/* If we or the target don't support tagged queuing, allocate the LUN for
* an untagged command.
!cmd->device->tagged_supported) {
cmd->tag = TAG_NONE;
hostdata->busy[cmd->device->id] |= (1 << lun);
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d now allocated by untagged "
- "command\n", H_NO(cmd), cmd->device->id, lun);
+ dsprintk(NDEBUG_TAGS, instance, "target %d lun %d now allocated by untagged command\n",
+ scmd_id(cmd), lun);
} else {
struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
cmd->tag = find_first_zero_bit(ta->allocated, MAX_TAGS);
set_bit(cmd->tag, ta->allocated);
ta->nr_allocated++;
- dprintk(NDEBUG_TAGS, "scsi%d: using tag %d for target %d lun %d "
- "(now %d tags in use)\n",
- H_NO(cmd), cmd->tag, cmd->device->id,
- lun, ta->nr_allocated);
+ dsprintk(NDEBUG_TAGS, instance, "using tag %d for target %d lun %d (%d tags allocated)\n",
+ cmd->tag, scmd_id(cmd), lun, ta->nr_allocated);
}
}
static void cmd_free_tag(struct scsi_cmnd *cmd)
{
u8 lun = cmd->device->lun;
- SETUP_HOSTDATA(cmd->device->host);
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
if (cmd->tag == TAG_NONE) {
hostdata->busy[cmd->device->id] &= ~(1 << lun);
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %d untagged cmd finished\n",
- H_NO(cmd), cmd->device->id, lun);
+ dsprintk(NDEBUG_TAGS, instance, "target %d lun %d untagged cmd freed\n",
+ scmd_id(cmd), lun);
} else if (cmd->tag >= MAX_TAGS) {
- printk(KERN_NOTICE "scsi%d: trying to free bad tag %d!\n",
- H_NO(cmd), cmd->tag);
+ shost_printk(KERN_NOTICE, instance,
+ "trying to free bad tag %d!\n", cmd->tag);
} else {
struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
clear_bit(cmd->tag, ta->allocated);
ta->nr_allocated--;
- dprintk(NDEBUG_TAGS, "scsi%d: freed tag %d for target %d lun %d\n",
- H_NO(cmd), cmd->tag, cmd->device->id, lun);
+ dsprintk(NDEBUG_TAGS, instance, "freed tag %d for target %d lun %d\n",
+ cmd->tag, scmd_id(cmd), lun);
}
}
#endif /* SUPPORT_TAGS */
-
-/*
- * Function: void merge_contiguous_buffers( struct scsi_cmnd *cmd )
- *
- * Purpose: Try to merge several scatter-gather requests into one DMA
- * transfer. This is possible if the scatter buffers lie on
- * physical contiguous addresses.
- *
- * Parameters: struct scsi_cmnd *cmd
- * The command to work on. The first scatter buffer's data are
- * assumed to be already transferred into ptr/this_residual.
+/**
+ * merge_contiguous_buffers - coalesce scatter-gather list entries
+ * @cmd: command requesting IO
+ *
+ * Try to merge several scatter-gather buffers into one DMA transfer.
+ * This is possible if the scatter buffers lie on physically
+ * contiguous addresses. The first scatter-gather buffer's data are
+ * assumed to be already transferred into cmd->SCp.this_residual.
+ * Every buffer merged avoids an interrupt and a DMA setup operation.
*/
static void merge_contiguous_buffers(struct scsi_cmnd *cmd)
cmd->SCp.buffers_residual = scsi_sg_count(cmd) - 1;
cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
cmd->SCp.this_residual = cmd->SCp.buffer->length;
- /* ++roman: Try to merge some scatter-buffers if they are at
- * contiguous physical addresses.
- */
+
merge_contiguous_buffers(cmd);
} else {
cmd->SCp.buffer = NULL;
cmd->SCp.ptr = NULL;
cmd->SCp.this_residual = 0;
}
+
+ cmd->SCp.Status = 0;
+ cmd->SCp.Message = 0;
+}
+
+/**
+ * NCR5380_poll_politely2 - wait for two chip register values
+ * @instance: controller to poll
+ * @reg1: 5380 register to poll
+ * @bit1: Bitmask to check
+ * @val1: Expected value
+ * @reg2: Second 5380 register to poll
+ * @bit2: Second bitmask to check
+ * @val2: Second expected value
+ * @wait: Time-out in jiffies
+ *
+ * Polls the chip in a reasonably efficient manner waiting for an
+ * event to occur. After a short quick poll we begin to yield the CPU
+ * (if possible). In irq contexts the time-out is arbitrarily limited.
+ * Callers may hold locks as long as they are held in irq mode.
+ *
+ * Returns 0 if either or both event(s) occurred otherwise -ETIMEDOUT.
+ */
+
+static int NCR5380_poll_politely2(struct Scsi_Host *instance,
+ int reg1, int bit1, int val1,
+ int reg2, int bit2, int val2, int wait)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ unsigned long deadline = jiffies + wait;
+ unsigned long n;
+
+ /* Busy-wait for up to 10 ms */
+ n = min(10000U, jiffies_to_usecs(wait));
+ n *= hostdata->accesses_per_ms;
+ n /= 2000;
+ do {
+ if ((NCR5380_read(reg1) & bit1) == val1)
+ return 0;
+ if ((NCR5380_read(reg2) & bit2) == val2)
+ return 0;
+ cpu_relax();
+ } while (n--);
+
+ if (irqs_disabled() || in_interrupt())
+ return -ETIMEDOUT;
+
+ /* Repeatedly sleep for 1 ms until deadline */
+ while (time_is_after_jiffies(deadline)) {
+ schedule_timeout_uninterruptible(1);
+ if ((NCR5380_read(reg1) & bit1) == val1)
+ return 0;
+ if ((NCR5380_read(reg2) & bit2) == val2)
+ return 0;
+ }
+
+ return -ETIMEDOUT;
}
-#include <linux/delay.h>
+static inline int NCR5380_poll_politely(struct Scsi_Host *instance,
+ int reg, int bit, int val, int wait)
+{
+ return NCR5380_poll_politely2(instance, reg, bit, val,
+ reg, bit, val, wait);
+}
#if NDEBUG
static struct {
unsigned char mask;
const char *name;
} signals[] = {
- { SR_DBP, "PARITY"}, { SR_RST, "RST" }, { SR_BSY, "BSY" },
- { SR_REQ, "REQ" }, { SR_MSG, "MSG" }, { SR_CD, "CD" }, { SR_IO, "IO" },
- { SR_SEL, "SEL" }, {0, NULL}
-}, basrs[] = {
- {BASR_ATN, "ATN"}, {BASR_ACK, "ACK"}, {0, NULL}
-}, icrs[] = {
- {ICR_ASSERT_RST, "ASSERT RST"},{ICR_ASSERT_ACK, "ASSERT ACK"},
- {ICR_ASSERT_BSY, "ASSERT BSY"}, {ICR_ASSERT_SEL, "ASSERT SEL"},
- {ICR_ASSERT_ATN, "ASSERT ATN"}, {ICR_ASSERT_DATA, "ASSERT DATA"},
+ {SR_DBP, "PARITY"},
+ {SR_RST, "RST"},
+ {SR_BSY, "BSY"},
+ {SR_REQ, "REQ"},
+ {SR_MSG, "MSG"},
+ {SR_CD, "CD"},
+ {SR_IO, "IO"},
+ {SR_SEL, "SEL"},
{0, NULL}
-}, mrs[] = {
- {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"}, {MR_TARGET, "MODE TARGET"},
- {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"}, {MR_ENABLE_PAR_INTR,
- "MODE PARITY INTR"}, {MR_ENABLE_EOP_INTR,"MODE EOP INTR"},
+},
+basrs[] = {
+ {BASR_ATN, "ATN"},
+ {BASR_ACK, "ACK"},
+ {0, NULL}
+},
+icrs[] = {
+ {ICR_ASSERT_RST, "ASSERT RST"},
+ {ICR_ASSERT_ACK, "ASSERT ACK"},
+ {ICR_ASSERT_BSY, "ASSERT BSY"},
+ {ICR_ASSERT_SEL, "ASSERT SEL"},
+ {ICR_ASSERT_ATN, "ASSERT ATN"},
+ {ICR_ASSERT_DATA, "ASSERT DATA"},
+ {0, NULL}
+},
+mrs[] = {
+ {MR_BLOCK_DMA_MODE, "MODE BLOCK DMA"},
+ {MR_TARGET, "MODE TARGET"},
+ {MR_ENABLE_PAR_CHECK, "MODE PARITY CHECK"},
+ {MR_ENABLE_PAR_INTR, "MODE PARITY INTR"},
+ {MR_ENABLE_EOP_INTR, "MODE EOP INTR"},
{MR_MONITOR_BSY, "MODE MONITOR BSY"},
- {MR_DMA_MODE, "MODE DMA"}, {MR_ARBITRATE, "MODE ARBITRATION"},
+ {MR_DMA_MODE, "MODE DMA"},
+ {MR_ARBITRATE, "MODE ARBITRATION"},
{0, NULL}
};
static void NCR5380_print(struct Scsi_Host *instance)
{
unsigned char status, data, basr, mr, icr, i;
- unsigned long flags;
- local_irq_save(flags);
data = NCR5380_read(CURRENT_SCSI_DATA_REG);
status = NCR5380_read(STATUS_REG);
mr = NCR5380_read(MODE_REG);
icr = NCR5380_read(INITIATOR_COMMAND_REG);
basr = NCR5380_read(BUS_AND_STATUS_REG);
- local_irq_restore(flags);
+
printk("STATUS_REG: %02x ", status);
for (i = 0; signals[i].mask; ++i)
if (status & signals[i].mask)
unsigned char value;
const char *name;
} phases[] = {
- {PHASE_DATAOUT, "DATAOUT"}, {PHASE_DATAIN, "DATAIN"}, {PHASE_CMDOUT, "CMDOUT"},
- {PHASE_STATIN, "STATIN"}, {PHASE_MSGOUT, "MSGOUT"}, {PHASE_MSGIN, "MSGIN"},
+ {PHASE_DATAOUT, "DATAOUT"},
+ {PHASE_DATAIN, "DATAIN"},
+ {PHASE_CMDOUT, "CMDOUT"},
+ {PHASE_STATIN, "STATIN"},
+ {PHASE_MSGOUT, "MSGOUT"},
+ {PHASE_MSGIN, "MSGIN"},
{PHASE_UNKNOWN, "UNKNOWN"}
};
* @instance: adapter to dump
*
* Print the current SCSI phase for debugging purposes
- *
- * Locks: none
*/
static void NCR5380_print_phase(struct Scsi_Host *instance)
status = NCR5380_read(STATUS_REG);
if (!(status & SR_REQ))
- printk(KERN_DEBUG "scsi%d: REQ not asserted, phase unknown.\n", HOSTNO);
+ shost_printk(KERN_DEBUG, instance, "REQ not asserted, phase unknown.\n");
else {
for (i = 0; (phases[i].value != PHASE_UNKNOWN) &&
(phases[i].value != (status & PHASE_MASK)); ++i)
;
- printk(KERN_DEBUG "scsi%d: phase %s\n", HOSTNO, phases[i].name);
+ shost_printk(KERN_DEBUG, instance, "phase %s\n", phases[i].name);
}
}
-
#endif
-/*
- * ++roman: New scheme of calling NCR5380_main()
- *
- * If we're not in an interrupt, we can call our main directly, it cannot be
- * already running. Else, we queue it on a task queue, if not 'main_running'
- * tells us that a lower level is already executing it. This way,
- * 'main_running' needs not be protected in a special way.
- *
- * queue_main() is a utility function for putting our main onto the task
- * queue, if main_running is false. It should be called only from a
- * interrupt or bottom half.
- */
-
-#include <linux/gfp.h>
-#include <linux/workqueue.h>
-#include <linux/interrupt.h>
-
-static inline void queue_main(struct NCR5380_hostdata *hostdata)
-{
- if (!hostdata->main_running) {
- /* If in interrupt and NCR5380_main() not already running,
- queue it on the 'immediate' task queue, to be processed
- immediately after the current interrupt processing has
- finished. */
- schedule_work(&hostdata->main_task);
- }
- /* else: nothing to do: the running NCR5380_main() will pick up
- any newly queued command. */
-}
-
/**
* NCR58380_info - report driver and host information
* @instance: relevant scsi host instance
*
* For use as the host template info() handler.
- *
- * Locks: none
*/
static const char *NCR5380_info(struct Scsi_Host *instance)
"base 0x%lx, irq %d, "
"can_queue %d, cmd_per_lun %d, "
"sg_tablesize %d, this_id %d, "
- "flags { %s}, "
+ "flags { %s%s}, "
"options { %s} ",
instance->hostt->name, instance->io_port, instance->n_io_port,
instance->base, instance->irq,
instance->can_queue, instance->cmd_per_lun,
instance->sg_tablesize, instance->this_id,
hostdata->flags & FLAG_TAGGED_QUEUING ? "TAGGED_QUEUING " : "",
+ hostdata->flags & FLAG_TOSHIBA_DELAY ? "TOSHIBA_DELAY " : "",
#ifdef DIFFERENTIAL
"DIFFERENTIAL "
#endif
"");
}
-/**
- * NCR5380_print_status - dump controller info
- * @instance: controller to dump
- *
- * Print commands in the various queues, called from NCR5380_abort
- * to aid debugging.
- */
-
-static void lprint_Scsi_Cmnd(struct scsi_cmnd *cmd)
-{
- int i, s;
- unsigned char *command;
- printk("scsi%d: destination target %d, lun %llu\n",
- H_NO(cmd), cmd->device->id, cmd->device->lun);
- printk(KERN_CONT " command = ");
- command = cmd->cmnd;
- printk(KERN_CONT "%2d (0x%02x)", command[0], command[0]);
- for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
- printk(KERN_CONT " %02x", command[i]);
- printk("\n");
-}
-
-static void NCR5380_print_status(struct Scsi_Host *instance)
-{
- struct NCR5380_hostdata *hostdata;
- struct scsi_cmnd *ptr;
- unsigned long flags;
-
- NCR5380_dprint(NDEBUG_ANY, instance);
- NCR5380_dprint_phase(NDEBUG_ANY, instance);
-
- hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
- local_irq_save(flags);
- printk("NCR5380: coroutine is%s running.\n",
- hostdata->main_running ? "" : "n't");
- if (!hostdata->connected)
- printk("scsi%d: no currently connected command\n", HOSTNO);
- else
- lprint_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected);
- printk("scsi%d: issue_queue\n", HOSTNO);
- for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
- lprint_Scsi_Cmnd(ptr);
-
- printk("scsi%d: disconnected_queue\n", HOSTNO);
- for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
- ptr = NEXT(ptr))
- lprint_Scsi_Cmnd(ptr);
-
- local_irq_restore(flags);
- printk("\n");
-}
-
-static void show_Scsi_Cmnd(struct scsi_cmnd *cmd, struct seq_file *m)
-{
- int i, s;
- unsigned char *command;
- seq_printf(m, "scsi%d: destination target %d, lun %llu\n",
- H_NO(cmd), cmd->device->id, cmd->device->lun);
- seq_puts(m, " command = ");
- command = cmd->cmnd;
- seq_printf(m, "%2d (0x%02x)", command[0], command[0]);
- for (i = 1, s = COMMAND_SIZE(command[0]); i < s; ++i)
- seq_printf(m, " %02x", command[i]);
- seq_putc(m, '\n');
-}
-
-static int __maybe_unused NCR5380_show_info(struct seq_file *m,
- struct Scsi_Host *instance)
-{
- struct NCR5380_hostdata *hostdata;
- struct scsi_cmnd *ptr;
- unsigned long flags;
-
- hostdata = (struct NCR5380_hostdata *)instance->hostdata;
-
- local_irq_save(flags);
- seq_printf(m, "NCR5380: coroutine is%s running.\n",
- hostdata->main_running ? "" : "n't");
- if (!hostdata->connected)
- seq_printf(m, "scsi%d: no currently connected command\n", HOSTNO);
- else
- show_Scsi_Cmnd((struct scsi_cmnd *) hostdata->connected, m);
- seq_printf(m, "scsi%d: issue_queue\n", HOSTNO);
- for (ptr = (struct scsi_cmnd *)hostdata->issue_queue; ptr; ptr = NEXT(ptr))
- show_Scsi_Cmnd(ptr, m);
-
- seq_printf(m, "scsi%d: disconnected_queue\n", HOSTNO);
- for (ptr = (struct scsi_cmnd *) hostdata->disconnected_queue; ptr;
- ptr = NEXT(ptr))
- show_Scsi_Cmnd(ptr, m);
-
- local_irq_restore(flags);
- return 0;
-}
-
/**
* NCR5380_init - initialise an NCR5380
* @instance: adapter to configure
static int __init NCR5380_init(struct Scsi_Host *instance, int flags)
{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int i;
- SETUP_HOSTDATA(instance);
+ unsigned long deadline;
hostdata->host = instance;
- hostdata->aborted = 0;
hostdata->id_mask = 1 << instance->this_id;
hostdata->id_higher_mask = 0;
for (i = hostdata->id_mask; i <= 0x80; i <<= 1)
#if defined (REAL_DMA)
hostdata->dma_len = 0;
#endif
- hostdata->targets_present = 0;
+ spin_lock_init(&hostdata->lock);
hostdata->connected = NULL;
- hostdata->issue_queue = NULL;
- hostdata->disconnected_queue = NULL;
+ hostdata->sensing = NULL;
+ INIT_LIST_HEAD(&hostdata->autosense);
+ INIT_LIST_HEAD(&hostdata->unissued);
+ INIT_LIST_HEAD(&hostdata->disconnected);
+
hostdata->flags = flags;
INIT_WORK(&hostdata->main_task, NCR5380_main);
+ hostdata->work_q = alloc_workqueue("ncr5380_%d",
+ WQ_UNBOUND | WQ_MEM_RECLAIM,
+ 1, instance->host_no);
+ if (!hostdata->work_q)
+ return -ENOMEM;
prepare_info(instance);
NCR5380_write(TARGET_COMMAND_REG, 0);
NCR5380_write(SELECT_ENABLE_REG, 0);
+ /* Calibrate register polling loop */
+ i = 0;
+ deadline = jiffies + 1;
+ do {
+ cpu_relax();
+ } while (time_is_after_jiffies(deadline));
+ deadline += msecs_to_jiffies(256);
+ do {
+ NCR5380_read(STATUS_REG);
+ ++i;
+ cpu_relax();
+ } while (time_is_after_jiffies(deadline));
+ hostdata->accesses_per_ms = i / 256;
+
+ return 0;
+}
+
+/**
+ * NCR5380_maybe_reset_bus - Detect and correct bus wedge problems.
+ * @instance: adapter to check
+ *
+ * If the system crashed, it may have crashed with a connected target and
+ * the SCSI bus busy. Check for BUS FREE phase. If not, try to abort the
+ * currently established nexus, which we know nothing about. Failing that
+ * do a bus reset.
+ *
+ * Note that a bus reset will cause the chip to assert IRQ.
+ *
+ * Returns 0 if successful, otherwise -ENXIO.
+ */
+
+static int NCR5380_maybe_reset_bus(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int pass;
+
+ for (pass = 1; (NCR5380_read(STATUS_REG) & SR_BSY) && pass <= 6; ++pass) {
+ switch (pass) {
+ case 1:
+ case 3:
+ case 5:
+ shost_printk(KERN_ERR, instance, "SCSI bus busy, waiting up to five seconds\n");
+ NCR5380_poll_politely(instance,
+ STATUS_REG, SR_BSY, 0, 5 * HZ);
+ break;
+ case 2:
+ shost_printk(KERN_ERR, instance, "bus busy, attempting abort\n");
+ do_abort(instance);
+ break;
+ case 4:
+ shost_printk(KERN_ERR, instance, "bus busy, attempting reset\n");
+ do_reset(instance);
+ /* Wait after a reset; the SCSI standard calls for
+ * 250ms, we wait 500ms to be on the safe side.
+ * But some Toshiba CD-ROMs need ten times that.
+ */
+ if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+ msleep(2500);
+ else
+ msleep(500);
+ break;
+ case 6:
+ shost_printk(KERN_ERR, instance, "bus locked solid\n");
+ return -ENXIO;
+ }
+ }
return 0;
}
struct NCR5380_hostdata *hostdata = shost_priv(instance);
cancel_work_sync(&hostdata->main_task);
+ destroy_workqueue(hostdata->work_q);
+}
+
+/**
+ * complete_cmd - finish processing a command and return it to the SCSI ML
+ * @instance: the host instance
+ * @cmd: command to complete
+ */
+
+static void complete_cmd(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+
+ dsprintk(NDEBUG_QUEUES, instance, "complete_cmd: cmd %p\n", cmd);
+
+ if (hostdata->sensing == cmd) {
+ /* Autosense processing ends here */
+ if ((cmd->result & 0xff) != SAM_STAT_GOOD) {
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ set_host_byte(cmd, DID_ERROR);
+ } else
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ hostdata->sensing = NULL;
+ }
+
+#ifdef SUPPORT_TAGS
+ cmd_free_tag(cmd);
+#else
+ hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun);
+#endif
+ cmd->scsi_done(cmd);
}
/**
* @instance: the relevant SCSI adapter
* @cmd: SCSI command
*
- * cmd is added to the per instance issue_queue, with minor
+ * cmd is added to the per-instance issue queue, with minor
* twiddling done to the host specific fields of cmd. If the
* main coroutine is not running, it is restarted.
*/
struct scsi_cmnd *cmd)
{
struct NCR5380_hostdata *hostdata = shost_priv(instance);
- struct scsi_cmnd *tmp;
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
unsigned long flags;
#if (NDEBUG & NDEBUG_NO_WRITE)
switch (cmd->cmnd[0]) {
case WRITE_6:
case WRITE_10:
- printk(KERN_NOTICE "scsi%d: WRITE attempted with NO_WRITE debugging flag set\n",
- H_NO(cmd));
+ shost_printk(KERN_DEBUG, instance, "WRITE attempted with NDEBUG_NO_WRITE set\n");
cmd->result = (DID_ERROR << 16);
cmd->scsi_done(cmd);
return 0;
}
#endif /* (NDEBUG & NDEBUG_NO_WRITE) */
- /*
- * We use the host_scribble field as a pointer to the next command
- * in a queue
- */
-
- SET_NEXT(cmd, NULL);
cmd->result = 0;
/*
- * Insert the cmd into the issue queue. Note that REQUEST SENSE
- * commands are added to the head of the queue since any command will
- * clear the contingent allegiance condition that exists and the
- * sense data is only guaranteed to be valid while the condition exists.
- */
-
- /* ++guenther: now that the issue queue is being set up, we can lock ST-DMA.
- * Otherwise a running NCR5380_main may steal the lock.
- * Lock before actually inserting due to fairness reasons explained in
- * atari_scsi.c. If we insert first, then it's impossible for this driver
- * to release the lock.
- * Stop timer for this command while waiting for the lock, or timeouts
- * may happen (and they really do), and it's no good if the command doesn't
- * appear in any of the queues.
* ++roman: Just disabling the NCR interrupt isn't sufficient here,
* because also a timer int can trigger an abort or reset, which would
* alter queues and touch the lock.
if (!NCR5380_acquire_dma_irq(instance))
return SCSI_MLQUEUE_HOST_BUSY;
- local_irq_save(flags);
+ spin_lock_irqsave(&hostdata->lock, flags);
/*
* Insert the cmd into the issue queue. Note that REQUEST SENSE
* sense data is only guaranteed to be valid while the condition exists.
*/
- if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
- LIST(cmd, hostdata->issue_queue);
- SET_NEXT(cmd, hostdata->issue_queue);
- hostdata->issue_queue = cmd;
- } else {
- for (tmp = (struct scsi_cmnd *)hostdata->issue_queue;
- NEXT(tmp); tmp = NEXT(tmp))
- ;
- LIST(cmd, tmp);
- SET_NEXT(tmp, cmd);
- }
- local_irq_restore(flags);
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ list_add(&ncmd->list, &hostdata->unissued);
+ else
+ list_add_tail(&ncmd->list, &hostdata->unissued);
- dprintk(NDEBUG_QUEUES, "scsi%d: command added to %s of queue\n", H_NO(cmd),
- (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+ spin_unlock_irqrestore(&hostdata->lock, flags);
- /* If queue_command() is called from an interrupt (real one or bottom
- * half), we let queue_main() do the job of taking care about main. If it
- * is already running, this is a no-op, else main will be queued.
- *
- * If we're not in an interrupt, we can call NCR5380_main()
- * unconditionally, because it cannot be already running.
- */
- if (in_interrupt() || irqs_disabled())
- queue_main(hostdata);
- else
- NCR5380_main(&hostdata->main_task);
+ dsprintk(NDEBUG_QUEUES, instance, "command %p added to %s of queue\n",
+ cmd, (cmd->cmnd[0] == REQUEST_SENSE) ? "head" : "tail");
+
+ /* Kick off command processing */
+ queue_work(hostdata->work_q, &hostdata->main_task);
return 0;
}
struct NCR5380_hostdata *hostdata = shost_priv(instance);
/* Caller does the locking needed to set & test these data atomically */
- if (!hostdata->disconnected_queue &&
- !hostdata->issue_queue &&
+ if (list_empty(&hostdata->disconnected) &&
+ list_empty(&hostdata->unissued) &&
+ list_empty(&hostdata->autosense) &&
!hostdata->connected &&
- !hostdata->retain_dma_intr)
+ !hostdata->selecting)
NCR5380_release_dma_irq(instance);
}
+/**
+ * dequeue_next_cmd - dequeue a command for processing
+ * @instance: the scsi host instance
+ *
+ * Priority is given to commands on the autosense queue. These commands
+ * need autosense because of a CHECK CONDITION result.
+ *
+ * Returns a command pointer if a command is found for a target that is
+ * not already busy. Otherwise returns NULL.
+ */
+
+static struct scsi_cmnd *dequeue_next_cmd(struct Scsi_Host *instance)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd;
+ struct scsi_cmnd *cmd;
+
+ if (list_empty(&hostdata->autosense)) {
+ list_for_each_entry(ncmd, &hostdata->unissued, list) {
+ cmd = NCR5380_to_scmd(ncmd);
+ dsprintk(NDEBUG_QUEUES, instance, "dequeue: cmd=%p target=%d busy=0x%02x lun=%llu\n",
+ cmd, scmd_id(cmd), hostdata->busy[scmd_id(cmd)], cmd->device->lun);
+
+ if (
+#ifdef SUPPORT_TAGS
+ !is_lun_busy(cmd, 1)
+#else
+ !(hostdata->busy[scmd_id(cmd)] & (1 << cmd->device->lun))
+#endif
+ ) {
+ list_del(&ncmd->list);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "dequeue: removed %p from issue queue\n", cmd);
+ return cmd;
+ }
+ }
+ } else {
+ /* Autosense processing begins here */
+ ncmd = list_first_entry(&hostdata->autosense,
+ struct NCR5380_cmd, list);
+ list_del(&ncmd->list);
+ cmd = NCR5380_to_scmd(ncmd);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "dequeue: removed %p from autosense queue\n", cmd);
+ scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
+ hostdata->sensing = cmd;
+ return cmd;
+ }
+ return NULL;
+}
+
+static void requeue_cmd(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
+ if (hostdata->sensing) {
+ scsi_eh_restore_cmnd(cmd, &hostdata->ses);
+ list_add(&ncmd->list, &hostdata->autosense);
+ hostdata->sensing = NULL;
+ } else
+ list_add(&ncmd->list, &hostdata->unissued);
+}
+
/**
* NCR5380_main - NCR state machines
*
* be done on the NCR5380 host adapters in a system. Both
* NCR5380_queue_command() and NCR5380_intr() will try to start it
* in case it is not running.
- *
- * Locks: called as its own thread with no locks held.
*/
static void NCR5380_main(struct work_struct *work)
struct NCR5380_hostdata *hostdata =
container_of(work, struct NCR5380_hostdata, main_task);
struct Scsi_Host *instance = hostdata->host;
- struct scsi_cmnd *tmp, *prev;
+ struct scsi_cmnd *cmd;
int done;
- unsigned long flags;
/*
- * We run (with interrupts disabled) until we're sure that none of
- * the host adapters have anything that can be done, at which point
- * we set main_running to 0 and exit.
- *
- * Interrupts are enabled before doing various other internal
- * instructions, after we've decided that we need to run through
- * the loop again.
- *
- * this should prevent any race conditions.
- *
* ++roman: Just disabling the NCR interrupt isn't sufficient here,
* because also a timer int can trigger an abort or reset, which can
* alter queues and touch the Falcon lock.
*/
- /* Tell int handlers main() is now already executing. Note that
- no races are possible here. If an int comes in before
- 'main_running' is set here, and queues/executes main via the
- task queue, it doesn't do any harm, just this instance of main
- won't find any work left to do. */
- if (hostdata->main_running)
- return;
- hostdata->main_running = 1;
-
- local_save_flags(flags);
do {
- local_irq_disable(); /* Freeze request queues */
done = 1;
- if (!hostdata->connected) {
- dprintk(NDEBUG_MAIN, "scsi%d: not connected\n", HOSTNO);
- /*
- * Search through the issue_queue for a command destined
- * for a target that's not busy.
- */
-#if (NDEBUG & NDEBUG_LISTS)
- for (tmp = (struct scsi_cmnd *) hostdata->issue_queue, prev = NULL;
- tmp && (tmp != prev); prev = tmp, tmp = NEXT(tmp))
- ;
- /*printk("%p ", tmp);*/
- if ((tmp == prev) && tmp)
- printk(" LOOP\n");
- /* else printk("\n"); */
-#endif
- for (tmp = (struct scsi_cmnd *) hostdata->issue_queue,
- prev = NULL; tmp; prev = tmp, tmp = NEXT(tmp)) {
- u8 lun = tmp->device->lun;
-
- dprintk(NDEBUG_LISTS,
- "MAIN tmp=%p target=%d busy=%d lun=%d\n",
- tmp, scmd_id(tmp), hostdata->busy[scmd_id(tmp)],
- lun);
- /* When we find one, remove it from the issue queue. */
- /* ++guenther: possible race with Falcon locking */
- if (
-#ifdef SUPPORT_TAGS
- !is_lun_busy( tmp, tmp->cmnd[0] != REQUEST_SENSE)
-#else
- !(hostdata->busy[tmp->device->id] & (1 << lun))
-#endif
- ) {
- /* ++guenther: just to be sure, this must be atomic */
- local_irq_disable();
- if (prev) {
- REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
- SET_NEXT(prev, NEXT(tmp));
- } else {
- REMOVE(-1, hostdata->issue_queue, tmp, NEXT(tmp));
- hostdata->issue_queue = NEXT(tmp);
- }
- SET_NEXT(tmp, NULL);
- hostdata->retain_dma_intr++;
+ spin_lock_irq(&hostdata->lock);
+ while (!hostdata->connected &&
+ (cmd = dequeue_next_cmd(instance))) {
- /* reenable interrupts after finding one */
- local_irq_restore(flags);
+ dsprintk(NDEBUG_MAIN, instance, "main: dequeued %p\n", cmd);
- /*
- * Attempt to establish an I_T_L nexus here.
- * On success, instance->hostdata->connected is set.
- * On failure, we must add the command back to the
- * issue queue so we can keep trying.
- */
- dprintk(NDEBUG_MAIN, "scsi%d: main(): command for target %d "
- "lun %d removed from issue_queue\n",
- HOSTNO, tmp->device->id, lun);
- /*
- * REQUEST SENSE commands are issued without tagged
- * queueing, even on SCSI-II devices because the
- * contingent allegiance condition exists for the
- * entire unit.
- */
- /* ++roman: ...and the standard also requires that
- * REQUEST SENSE command are untagged.
- */
+ /*
+ * Attempt to establish an I_T_L nexus here.
+ * On success, instance->hostdata->connected is set.
+ * On failure, we must add the command back to the
+ * issue queue so we can keep trying.
+ */
+ /*
+ * REQUEST SENSE commands are issued without tagged
+ * queueing, even on SCSI-II devices because the
+ * contingent allegiance condition exists for the
+ * entire unit.
+ */
+ /* ++roman: ...and the standard also requires that
+ * REQUEST SENSE command are untagged.
+ */
#ifdef SUPPORT_TAGS
- cmd_get_tag(tmp, tmp->cmnd[0] != REQUEST_SENSE);
+ cmd_get_tag(cmd, cmd->cmnd[0] != REQUEST_SENSE);
#endif
- if (!NCR5380_select(instance, tmp)) {
- local_irq_disable();
- hostdata->retain_dma_intr--;
- /* release if target did not response! */
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- break;
- } else {
- local_irq_disable();
- LIST(tmp, hostdata->issue_queue);
- SET_NEXT(tmp, hostdata->issue_queue);
- hostdata->issue_queue = tmp;
+ cmd = NCR5380_select(instance, cmd);
+ if (!cmd) {
+ dsprintk(NDEBUG_MAIN, instance, "main: select complete\n");
+ maybe_release_dma_irq(instance);
+ } else {
+ dsprintk(NDEBUG_MAIN | NDEBUG_QUEUES, instance,
+ "main: select failed, returning %p to queue\n", cmd);
+ requeue_cmd(instance, cmd);
#ifdef SUPPORT_TAGS
- cmd_free_tag(tmp);
+ cmd_free_tag(cmd);
#endif
- hostdata->retain_dma_intr--;
- local_irq_restore(flags);
- dprintk(NDEBUG_MAIN, "scsi%d: main(): select() failed, "
- "returned to issue_queue\n", HOSTNO);
- if (hostdata->connected)
- break;
- }
- } /* if target/lun/target queue is not busy */
- } /* for issue_queue */
- } /* if (!hostdata->connected) */
-
+ }
+ }
if (hostdata->connected
#ifdef REAL_DMA
&& !hostdata->dma_len
#endif
) {
- local_irq_restore(flags);
- dprintk(NDEBUG_MAIN, "scsi%d: main: performing information transfer\n",
- HOSTNO);
+ dsprintk(NDEBUG_MAIN, instance, "main: performing information transfer\n");
NCR5380_information_transfer(instance);
- dprintk(NDEBUG_MAIN, "scsi%d: main: done set false\n", HOSTNO);
done = 0;
}
+ spin_unlock_irq(&hostdata->lock);
+ if (!done)
+ cond_resched();
} while (!done);
-
- /* Better allow ints _after_ 'main_running' has been cleared, else
- an interrupt could believe we'll pick up the work it left for
- us, but we won't see it anymore here... */
- hostdata->main_running = 0;
- local_irq_restore(flags);
}
* Function : void NCR5380_dma_complete (struct Scsi_Host *instance)
*
* Purpose : Called by interrupt handler when DMA finishes or a phase
- * mismatch occurs (which would finish the DMA transfer).
+ * mismatch occurs (which would finish the DMA transfer).
*
* Inputs : instance - this instance of the NCR5380.
- *
*/
static void NCR5380_dma_complete(struct Scsi_Host *instance)
{
- SETUP_HOSTDATA(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int transferred;
unsigned char **data;
- volatile int *count;
+ int *count;
int saved_data = 0, overrun = 0;
unsigned char p;
- if (!hostdata->connected) {
- printk(KERN_WARNING "scsi%d: received end of DMA interrupt with "
- "no connected cmd\n", HOSTNO);
- return;
- }
-
if (hostdata->read_overruns) {
p = hostdata->connected->SCp.phase;
if (p & SR_IO) {
(BASR_PHASE_MATCH|BASR_ACK)) {
saved_data = NCR5380_read(INPUT_DATA_REG);
overrun = 1;
- dprintk(NDEBUG_DMA, "scsi%d: read overrun handled\n", HOSTNO);
+ dsprintk(NDEBUG_DMA, instance, "read overrun handled\n");
}
}
}
- dprintk(NDEBUG_DMA, "scsi%d: real DMA transfer complete, basr 0x%X, sr 0x%X\n",
- HOSTNO, NCR5380_read(BUS_AND_STATUS_REG),
- NCR5380_read(STATUS_REG));
-
#if defined(CONFIG_SUN3)
if ((sun3scsi_dma_finish(rq_data_dir(hostdata->connected->request)))) {
pr_err("scsi%d: overrun in UDC counter -- not prepared to deal with this!\n",
}
#endif
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
transferred = hostdata->dma_len - NCR5380_dma_residual(instance);
hostdata->dma_len = 0;
* Handle interrupts, reestablishing I_T_L or I_T_L_Q nexuses
* from the disconnected queue, and restarting NCR5380_main()
* as required.
+ *
+ * The chip can assert IRQ in any of six different conditions. The IRQ flag
+ * is then cleared by reading the Reset Parity/Interrupt Register (RPIR).
+ * Three of these six conditions are latched in the Bus and Status Register:
+ * - End of DMA (cleared by ending DMA Mode)
+ * - Parity error (cleared by reading RPIR)
+ * - Loss of BSY (cleared by reading RPIR)
+ * Two conditions have flag bits that are not latched:
+ * - Bus phase mismatch (non-maskable in DMA Mode, cleared by ending DMA Mode)
+ * - Bus reset (non-maskable)
+ * The remaining condition has no flag bit at all:
+ * - Selection/reselection
+ *
+ * Hence, establishing the cause(s) of any interrupt is partly guesswork.
+ * In "The DP8490 and DP5380 Comparison Guide", National Semiconductor
+ * claimed that "the design of the [DP8490] interrupt logic ensures
+ * interrupts will not be lost (they can be on the DP5380)."
+ * The L5380/53C80 datasheet from LOGIC Devices has more details.
+ *
+ * Checking for bus reset by reading RST is futile because of interrupt
+ * latency, but a bus reset will reset chip logic. Checking for parity error
+ * is unnecessary because that interrupt is never enabled. A Loss of BSY
+ * condition will clear DMA Mode. We can tell when this occurs because the
+ * the Busy Monitor interrupt is enabled together with DMA Mode.
*/
static irqreturn_t NCR5380_intr(int irq, void *dev_id)
{
struct Scsi_Host *instance = dev_id;
- int done = 1, handled = 0;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ int handled = 0;
unsigned char basr;
+ unsigned long flags;
- dprintk(NDEBUG_INTR, "scsi%d: NCR5380 irq triggered\n", HOSTNO);
+ spin_lock_irqsave(&hostdata->lock, flags);
- /* Look for pending interrupts */
basr = NCR5380_read(BUS_AND_STATUS_REG);
- dprintk(NDEBUG_INTR, "scsi%d: BASR=%02x\n", HOSTNO, basr);
- /* dispatch to appropriate routine if found and done=0 */
if (basr & BASR_IRQ) {
- NCR5380_dprint(NDEBUG_INTR, instance);
- if ((NCR5380_read(STATUS_REG) & (SR_SEL|SR_IO)) == (SR_SEL|SR_IO)) {
- done = 0;
- dprintk(NDEBUG_INTR, "scsi%d: SEL interrupt\n", HOSTNO);
- NCR5380_reselect(instance);
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else if (basr & BASR_PARITY_ERROR) {
- dprintk(NDEBUG_INTR, "scsi%d: PARITY interrupt\n", HOSTNO);
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else if ((NCR5380_read(STATUS_REG) & SR_RST) == SR_RST) {
- dprintk(NDEBUG_INTR, "scsi%d: RESET interrupt\n", HOSTNO);
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- } else {
- /*
- * The rest of the interrupt conditions can occur only during a
- * DMA transfer
- */
+ unsigned char mr = NCR5380_read(MODE_REG);
+ unsigned char sr = NCR5380_read(STATUS_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "IRQ %d, BASR 0x%02x, SR 0x%02x, MR 0x%02x\n",
+ irq, basr, sr, mr);
#if defined(REAL_DMA)
- /*
- * We should only get PHASE MISMATCH and EOP interrupts if we have
- * DMA enabled, so do a sanity check based on the current setting
- * of the MODE register.
+ if ((mr & MR_DMA_MODE) || (mr & MR_MONITOR_BSY)) {
+ /* Probably End of DMA, Phase Mismatch or Loss of BSY.
+ * We ack IRQ after clearing Mode Register. Workarounds
+ * for End of DMA errata need to happen in DMA Mode.
*/
- if ((NCR5380_read(MODE_REG) & MR_DMA_MODE) &&
- ((basr & BASR_END_DMA_TRANSFER) ||
- !(basr & BASR_PHASE_MATCH))) {
+ dsprintk(NDEBUG_INTR, instance, "interrupt in DMA mode\n");
- dprintk(NDEBUG_INTR, "scsi%d: PHASE MISM or EOP interrupt\n", HOSTNO);
- NCR5380_dma_complete( instance );
- done = 0;
- } else
+ if (hostdata->connected) {
+ NCR5380_dma_complete(instance);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ } else {
+ NCR5380_write(MODE_REG, MR_BASE);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ }
+ } else
#endif /* REAL_DMA */
- {
-/* MS: Ignore unknown phase mismatch interrupts (caused by EOP interrupt) */
- if (basr & BASR_PHASE_MATCH)
- dprintk(NDEBUG_INTR, "scsi%d: unknown interrupt, "
- "BASR 0x%x, MR 0x%x, SR 0x%x\n",
- HOSTNO, basr, NCR5380_read(MODE_REG),
- NCR5380_read(STATUS_REG));
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ if ((NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_mask) &&
+ (sr & (SR_SEL | SR_IO | SR_BSY | SR_RST)) == (SR_SEL | SR_IO)) {
+ /* Probably reselected */
+ NCR5380_write(SELECT_ENABLE_REG, 0);
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "interrupt with SEL and IO\n");
+
+ if (!hostdata->connected) {
+ NCR5380_reselect(instance);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ }
+ if (!hostdata->connected)
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ } else {
+ /* Probably Bus Reset */
+ NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+
+ dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n");
#ifdef SUN3_SCSI_VME
- dregs->csr |= CSR_DMA_ENABLE;
+ dregs->csr |= CSR_DMA_ENABLE;
#endif
- }
- } /* if !(SELECTION || PARITY) */
+ }
handled = 1;
- } /* BASR & IRQ */ else {
- printk(KERN_NOTICE "scsi%d: interrupt without IRQ bit set in BASR, "
- "BASR 0x%X, MR 0x%X, SR 0x%x\n", HOSTNO, basr,
- NCR5380_read(MODE_REG), NCR5380_read(STATUS_REG));
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ } else {
+ shost_printk(KERN_NOTICE, instance, "interrupt without IRQ bit\n");
#ifdef SUN3_SCSI_VME
dregs->csr |= CSR_DMA_ENABLE;
#endif
}
- if (!done) {
- dprintk(NDEBUG_INTR, "scsi%d: in int routine, calling main\n", HOSTNO);
- /* Put a call to NCR5380_main() on the queue... */
- queue_main(shost_priv(instance));
- }
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+
return IRQ_RETVAL(handled);
}
/*
* Function : int NCR5380_select(struct Scsi_Host *instance,
- * struct scsi_cmnd *cmd)
+ * struct scsi_cmnd *cmd)
*
* Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command,
- * including ARBITRATION, SELECTION, and initial message out for
- * IDENTIFY and queue messages.
+ * including ARBITRATION, SELECTION, and initial message out for
+ * IDENTIFY and queue messages.
*
* Inputs : instance - instantiation of the 5380 driver on which this
- * target lives, cmd - SCSI command to execute.
+ * target lives, cmd - SCSI command to execute.
*
- * Returns : -1 if selection could not execute for some reason,
- * 0 if selection succeeded or failed because the target
- * did not respond.
+ * Returns cmd if selection failed but should be retried,
+ * NULL if selection failed and should not be retried, or
+ * NULL if selection succeeded (hostdata->connected == cmd).
*
* Side effects :
- * If bus busy, arbitration failed, etc, NCR5380_select() will exit
- * with registers as they should have been on entry - ie
- * SELECT_ENABLE will be set appropriately, the NCR5380
- * will cease to drive any SCSI bus signals.
+ * If bus busy, arbitration failed, etc, NCR5380_select() will exit
+ * with registers as they should have been on entry - ie
+ * SELECT_ENABLE will be set appropriately, the NCR5380
+ * will cease to drive any SCSI bus signals.
*
- * If successful : I_T_L or I_T_L_Q nexus will be established,
- * instance->connected will be set to cmd.
- * SELECT interrupt will be disabled.
+ * If successful : I_T_L or I_T_L_Q nexus will be established,
+ * instance->connected will be set to cmd.
+ * SELECT interrupt will be disabled.
*
- * If failed (no target) : cmd->scsi_done() will be called, and the
- * cmd->result host byte set to DID_BAD_TARGET.
+ * If failed (no target) : cmd->scsi_done() will be called, and the
+ * cmd->result host byte set to DID_BAD_TARGET.
*/
-static int NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
+static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance,
+ struct scsi_cmnd *cmd)
{
- SETUP_HOSTDATA(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char tmp[3], phase;
unsigned char *data;
int len;
- unsigned long timeout;
- unsigned long flags;
+ int err;
- hostdata->restart_select = 0;
NCR5380_dprint(NDEBUG_ARBITRATION, instance);
- dprintk(NDEBUG_ARBITRATION, "scsi%d: starting arbitration, id = %d\n", HOSTNO,
- instance->this_id);
+ dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n",
+ instance->this_id);
+
+ /*
+ * Arbitration and selection phases are slow and involve dropping the
+ * lock, so we have to watch out for EH. An exception handler may
+ * change 'selecting' to NULL. This function will then return NULL
+ * so that the caller will forget about 'cmd'. (During information
+ * transfer phases, EH may change 'connected' to NULL.)
+ */
+ hostdata->selecting = cmd;
/*
* Set the phase bits to 0, otherwise the NCR5380 won't drive the
* data bus during SELECTION.
*/
- local_irq_save(flags);
- if (hostdata->connected) {
- local_irq_restore(flags);
- return -1;
- }
NCR5380_write(TARGET_COMMAND_REG, 0);
/*
NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask);
NCR5380_write(MODE_REG, MR_ARBITRATE);
- local_irq_restore(flags);
-
- /* Wait for arbitration logic to complete */
-#if defined(NCR_TIMEOUT)
- {
- unsigned long timeout = jiffies + 2*NCR_TIMEOUT;
+ /* The chip now waits for BUS FREE phase. Then after the 800 ns
+ * Bus Free Delay, arbitration will begin.
+ */
- while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
- time_before(jiffies, timeout) && !hostdata->connected)
- ;
- if (time_after_eq(jiffies, timeout)) {
- printk("scsi : arbitration timeout at %d\n", __LINE__);
- NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
- }
+ spin_unlock_irq(&hostdata->lock);
+ err = NCR5380_poll_politely2(instance, MODE_REG, MR_ARBITRATE, 0,
+ INITIATOR_COMMAND_REG, ICR_ARBITRATION_PROGRESS,
+ ICR_ARBITRATION_PROGRESS, HZ);
+ spin_lock_irq(&hostdata->lock);
+ if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE)) {
+ /* Reselection interrupt */
+ goto out;
}
-#else /* NCR_TIMEOUT */
- while (!(NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_PROGRESS) &&
- !hostdata->connected)
- ;
-#endif
-
- dprintk(NDEBUG_ARBITRATION, "scsi%d: arbitration complete\n", HOSTNO);
-
- if (hostdata->connected) {
+ if (err < 0) {
NCR5380_write(MODE_REG, MR_BASE);
- return -1;
+ shost_printk(KERN_ERR, instance,
+ "select: arbitration timeout\n");
+ goto out;
}
- /*
- * The arbitration delay is 2.2us, but this is a minimum and there is
- * no maximum so we can safely sleep for ceil(2.2) usecs to accommodate
- * the integral nature of udelay().
- *
- */
+ spin_unlock_irq(&hostdata->lock);
+ /* The SCSI-2 arbitration delay is 2.4 us */
udelay(3);
/* Check for lost arbitration */
if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
(NCR5380_read(CURRENT_SCSI_DATA_REG) & hostdata->id_higher_mask) ||
- (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
- hostdata->connected) {
+ (NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST)) {
NCR5380_write(MODE_REG, MR_BASE);
- dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting MR_ARBITRATE\n",
- HOSTNO);
- return -1;
+ dsprintk(NDEBUG_ARBITRATION, instance, "lost arbitration, deasserting MR_ARBITRATE\n");
+ spin_lock_irq(&hostdata->lock);
+ goto out;
}
- /* after/during arbitration, BSY should be asserted.
- IBM DPES-31080 Version S31Q works now */
- /* Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman) */
+ /* After/during arbitration, BSY should be asserted.
+ * IBM DPES-31080 Version S31Q works now
+ * Tnx to Thomas_Roesch@m2.maus.de for finding this! (Roman)
+ */
NCR5380_write(INITIATOR_COMMAND_REG,
ICR_BASE | ICR_ASSERT_SEL | ICR_ASSERT_BSY);
- if ((NCR5380_read(INITIATOR_COMMAND_REG) & ICR_ARBITRATION_LOST) ||
- hostdata->connected) {
- NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_ARBITRATION, "scsi%d: lost arbitration, deasserting ICR_ASSERT_SEL\n",
- HOSTNO);
- return -1;
- }
-
/*
* Again, bus clear + bus settle time is 1.2us, however, this is
* a minimum so we'll udelay ceil(1.2)
*/
-#ifdef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
- /* ++roman: But some targets (see above :-) seem to need a bit more... */
- udelay(15);
-#else
- udelay(2);
-#endif
+ if (hostdata->flags & FLAG_TOSHIBA_DELAY)
+ udelay(15);
+ else
+ udelay(2);
- if (hostdata->connected) {
+ spin_lock_irq(&hostdata->lock);
+
+ /* NCR5380_reselect() clears MODE_REG after a reselection interrupt */
+ if (!(NCR5380_read(MODE_REG) & MR_ARBITRATE))
+ goto out;
+
+ if (!hostdata->selecting) {
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- return -1;
+ goto out;
}
- dprintk(NDEBUG_ARBITRATION, "scsi%d: won arbitration\n", HOSTNO);
+ dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n");
/*
* Now that we have won arbitration, start Selection process, asserting
* the host and target ID's on the SCSI bus.
*/
- NCR5380_write(OUTPUT_DATA_REG, (hostdata->id_mask | (1 << cmd->device->id)));
+ NCR5380_write(OUTPUT_DATA_REG, hostdata->id_mask | (1 << scmd_id(cmd)));
/*
* Raise ATN while SEL is true before BSY goes false from arbitration,
* phase immediately after selection.
*/
- NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_BSY |
- ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL ));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY |
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_SEL);
NCR5380_write(MODE_REG, MR_BASE);
/*
* Reselect interrupts must be turned off prior to the dropping of BSY,
* otherwise we will trigger an interrupt.
*/
-
- if (hostdata->connected) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- return -1;
- }
-
NCR5380_write(SELECT_ENABLE_REG, 0);
+ spin_unlock_irq(&hostdata->lock);
+
/*
* The initiator shall then wait at least two deskew delays and release
* the BSY signal.
udelay(1); /* wingel -- wait two bus deskew delay >2*45ns */
/* Reset BSY */
- NCR5380_write(INITIATOR_COMMAND_REG, (ICR_BASE | ICR_ASSERT_DATA |
- ICR_ASSERT_ATN | ICR_ASSERT_SEL));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA |
+ ICR_ASSERT_ATN | ICR_ASSERT_SEL);
/*
* Something weird happens when we cease to drive BSY - looks
udelay(1);
- dprintk(NDEBUG_SELECTION, "scsi%d: selecting target %d\n", HOSTNO, cmd->device->id);
+ dsprintk(NDEBUG_SELECTION, instance, "selecting target %d\n", scmd_id(cmd));
/*
* The SCSI specification calls for a 250 ms timeout for the actual
* selection.
*/
- timeout = jiffies + msecs_to_jiffies(250);
-
- /*
- * XXX very interesting - we're seeing a bounce where the BSY we
- * asserted is being reflected / still asserted (propagation delay?)
- * and it's detecting as true. Sigh.
- */
-
-#if 0
- /* ++roman: If a target conformed to the SCSI standard, it wouldn't assert
- * IO while SEL is true. But again, there are some disks out the in the
- * world that do that nevertheless. (Somebody claimed that this announces
- * reselection capability of the target.) So we better skip that test and
- * only wait for BSY... (Famous german words: Der Klügere gibt nach :-)
- */
-
- while (time_before(jiffies, timeout) &&
- !(NCR5380_read(STATUS_REG) & (SR_BSY | SR_IO)))
- ;
+ err = NCR5380_poll_politely(instance, STATUS_REG, SR_BSY, SR_BSY,
+ msecs_to_jiffies(250));
if ((NCR5380_read(STATUS_REG) & (SR_SEL | SR_IO)) == (SR_SEL | SR_IO)) {
+ spin_lock_irq(&hostdata->lock);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_reselect(instance);
- printk(KERN_ERR "scsi%d: reselection after won arbitration?\n",
- HOSTNO);
+ if (!hostdata->connected)
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ shost_printk(KERN_ERR, instance, "reselection after won arbitration?\n");
+ goto out;
+ }
+
+ if (err < 0) {
+ spin_lock_irq(&hostdata->lock);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
+ /* Can't touch cmd if it has been reclaimed by the scsi ML */
+ if (hostdata->selecting) {
+ cmd->result = DID_BAD_TARGET << 16;
+ complete_cmd(instance, cmd);
+ dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n");
+ cmd = NULL;
+ }
+ goto out;
}
-#else
- while (time_before(jiffies, timeout) && !(NCR5380_read(STATUS_REG) & SR_BSY))
- ;
-#endif
/*
* No less than two deskew delays after the initiator detects the
* change the DATA BUS. -wingel
*/
- udelay(1);
-
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
-
- if (!(NCR5380_read(STATUS_REG) & SR_BSY)) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- if (hostdata->targets_present & (1 << cmd->device->id)) {
- printk(KERN_ERR "scsi%d: weirdness\n", HOSTNO);
- if (hostdata->restart_select)
- printk(KERN_NOTICE "\trestart select\n");
- NCR5380_dprint(NDEBUG_ANY, instance);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return -1;
- }
- cmd->result = DID_BAD_TARGET << 16;
-#ifdef SUPPORT_TAGS
- cmd_free_tag(cmd);
-#endif
- cmd->scsi_done(cmd);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- dprintk(NDEBUG_SELECTION, "scsi%d: target did not respond within 250ms\n", HOSTNO);
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- return 0;
- }
-
- hostdata->targets_present |= (1 << cmd->device->id);
+ udelay(1);
+
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
/*
* Since we followed the SCSI spec, and raised ATN while SEL
* until it wraps back to 0.
*
* XXX - it turns out that there are some broken SCSI-II devices,
- * which claim to support tagged queuing but fail when more than
- * some number of commands are issued at once.
+ * which claim to support tagged queuing but fail when more than
+ * some number of commands are issued at once.
*/
/* Wait for start of REQ/ACK handshake */
- while (!(NCR5380_read(STATUS_REG) & SR_REQ))
- ;
- dprintk(NDEBUG_SELECTION, "scsi%d: target %d selected, going into MESSAGE OUT phase.\n",
- HOSTNO, cmd->device->id);
+ err = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+ spin_lock_irq(&hostdata->lock);
+ if (err < 0) {
+ shost_printk(KERN_ERR, instance, "select: REQ timeout\n");
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ goto out;
+ }
+ if (!hostdata->selecting) {
+ do_abort(instance);
+ goto out;
+ }
+
+ dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n",
+ scmd_id(cmd));
tmp[0] = IDENTIFY(1, cmd->device->lun);
#ifdef SUPPORT_TAGS
data = tmp;
phase = PHASE_MSGOUT;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_SELECTION, "scsi%d: nexus established.\n", HOSTNO);
+ dsprintk(NDEBUG_SELECTION, instance, "nexus established.\n");
/* XXX need to handle errors here */
+
hostdata->connected = cmd;
#ifndef SUPPORT_TAGS
- hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
+ hostdata->busy[cmd->device->id] |= 1 << cmd->device->lun;
#endif
#ifdef SUN3_SCSI_VME
dregs->csr |= CSR_INTR;
initialize_SCp(cmd);
- return 0;
+ cmd = NULL;
+
+out:
+ if (!hostdata->selecting)
+ return NULL;
+ hostdata->selecting = NULL;
+ return cmd;
}
/*
* Function : int NCR5380_transfer_pio (struct Scsi_Host *instance,
- * unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
*
* Purpose : transfers data in given phase using polled I/O
*
* Inputs : instance - instance of driver, *phase - pointer to
- * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
*
* Returns : -1 when different phase is entered without transferring
- * maximum number of bytes, 0 if all bytes are transferred or exit
- * is in same phase.
+ * maximum number of bytes, 0 if all bytes are transferred or exit
+ * is in same phase.
*
- * Also, *phase, *count, *data are modified in place.
+ * Also, *phase, *count, *data are modified in place.
*
* XXX Note : handling for bus free may be useful.
*/
unsigned char *phase, int *count,
unsigned char **data)
{
- register unsigned char p = *phase, tmp;
- register int c = *count;
- register unsigned char *d = *data;
+ unsigned char p = *phase, tmp;
+ int c = *count;
+ unsigned char *d = *data;
/*
* The NCR5380 chip will only drive the SCSI bus when the
* Wait for assertion of REQ, after which the phase bits will be
* valid
*/
- while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
- ;
- dprintk(NDEBUG_HANDSHAKE, "scsi%d: REQ detected\n", HOSTNO);
+ if (NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ) < 0)
+ break;
+
+ dsprintk(NDEBUG_HANDSHAKE, instance, "REQ asserted\n");
/* Check for phase mismatch */
- if ((tmp & PHASE_MASK) != p) {
- dprintk(NDEBUG_PIO, "scsi%d: phase mismatch\n", HOSTNO);
+ if ((NCR5380_read(STATUS_REG) & PHASE_MASK) != p) {
+ dsprintk(NDEBUG_PIO, instance, "phase mismatch\n");
NCR5380_dprint_phase(NDEBUG_PIO, instance);
break;
}
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_DATA);
NCR5380_dprint(NDEBUG_PIO, instance);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_DATA | ICR_ASSERT_ACK);
+ ICR_ASSERT_DATA | ICR_ASSERT_ACK);
} else {
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_DATA | ICR_ASSERT_ATN);
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN);
NCR5380_dprint(NDEBUG_PIO, instance);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+ ICR_ASSERT_DATA | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
}
} else {
NCR5380_dprint(NDEBUG_PIO, instance);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ACK);
}
- while (NCR5380_read(STATUS_REG) & SR_REQ)
- ;
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_REQ, 0, 5 * HZ) < 0)
+ break;
- dprintk(NDEBUG_HANDSHAKE, "scsi%d: req false, handshake complete\n", HOSTNO);
+ dsprintk(NDEBUG_HANDSHAKE, instance, "REQ negated, handshake complete\n");
- /*
- * We have several special cases to consider during REQ/ACK handshaking :
- * 1. We were in MSGOUT phase, and we are on the last byte of the
- * message. ATN must be dropped as ACK is dropped.
- *
- * 2. We are in a MSGIN phase, and we are on the last byte of the
- * message. We must exit with ACK asserted, so that the calling
- * code may raise ATN before dropping ACK to reject the message.
- *
- * 3. ACK and ATN are clear and the target may proceed as normal.
- */
+/*
+ * We have several special cases to consider during REQ/ACK handshaking :
+ * 1. We were in MSGOUT phase, and we are on the last byte of the
+ * message. ATN must be dropped as ACK is dropped.
+ *
+ * 2. We are in a MSGIN phase, and we are on the last byte of the
+ * message. We must exit with ACK asserted, so that the calling
+ * code may raise ATN before dropping ACK to reject the message.
+ *
+ * 3. ACK and ATN are clear and the target may proceed as normal.
+ */
if (!(p == PHASE_MSGIN && c == 1)) {
if (p == PHASE_MSGOUT && c > 1)
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
}
} while (--c);
- dprintk(NDEBUG_PIO, "scsi%d: residual %d\n", HOSTNO, c);
+ dsprintk(NDEBUG_PIO, instance, "residual %d\n", c);
*count = c;
*data = d;
tmp = NCR5380_read(STATUS_REG);
/* The phase read from the bus is valid if either REQ is (already)
- * asserted or if ACK hasn't been released yet. The latter is the case if
- * we're in MSGIN and all wanted bytes have been received.
+ * asserted or if ACK hasn't been released yet. The latter applies if
+ * we're in MSG IN, DATA IN or STATUS and all bytes have been received.
*/
- if ((tmp & SR_REQ) || (p == PHASE_MSGIN && c == 0))
+ if ((tmp & SR_REQ) || ((tmp & SR_IO) && c == 0))
*phase = tmp & PHASE_MASK;
else
*phase = PHASE_UNKNOWN;
return -1;
}
-/*
- * Function : do_abort (Scsi_Host *host)
+/**
+ * do_reset - issue a reset command
+ * @instance: adapter to reset
+ *
+ * Issue a reset sequence to the NCR5380 and try and get the bus
+ * back into sane shape.
*
- * Purpose : abort the currently established nexus. Should only be
- * called from a routine which can drop into a
+ * This clears the reset interrupt flag because there may be no handler for
+ * it. When the driver is initialized, the NCR5380_intr() handler has not yet
+ * been installed. And when in EH we may have released the ST DMA interrupt.
+ */
+
+static void do_reset(struct Scsi_Host *instance)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ NCR5380_write(TARGET_COMMAND_REG,
+ PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG) & PHASE_MASK));
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
+ udelay(50);
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
+ local_irq_restore(flags);
+}
+
+/**
+ * do_abort - abort the currently established nexus by going to
+ * MESSAGE OUT phase and sending an ABORT message.
+ * @instance: relevant scsi host instance
*
- * Returns : 0 on success, -1 on failure.
+ * Returns 0 on success, -1 on failure.
*/
static int do_abort(struct Scsi_Host *instance)
{
- unsigned char tmp, *msgptr, phase;
+ unsigned char *msgptr, phase, tmp;
int len;
+ int rc;
/* Request message out phase */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
* the target sees, so we just handshake.
*/
- while (!((tmp = NCR5380_read(STATUS_REG)) & SR_REQ))
- ;
+ rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, 10 * HZ);
+ if (rc < 0)
+ goto timeout;
+
+ tmp = NCR5380_read(STATUS_REG) & PHASE_MASK;
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
- if ((tmp & PHASE_MASK) != PHASE_MSGOUT) {
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
- ICR_ASSERT_ACK);
- while (NCR5380_read(STATUS_REG) & SR_REQ)
- ;
+ if (tmp != PHASE_MSGOUT) {
+ NCR5380_write(INITIATOR_COMMAND_REG,
+ ICR_BASE | ICR_ASSERT_ATN | ICR_ASSERT_ACK);
+ rc = NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, 0, 3 * HZ);
+ if (rc < 0)
+ goto timeout;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN);
}
*/
return len ? -1 : 0;
+
+timeout:
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ return -1;
}
#if defined(REAL_DMA)
/*
* Function : int NCR5380_transfer_dma (struct Scsi_Host *instance,
- * unsigned char *phase, int *count, unsigned char **data)
+ * unsigned char *phase, int *count, unsigned char **data)
*
* Purpose : transfers data in given phase using either real
- * or pseudo DMA.
+ * or pseudo DMA.
*
* Inputs : instance - instance of driver, *phase - pointer to
- * what phase is expected, *count - pointer to number of
- * bytes to transfer, **data - pointer to data pointer.
+ * what phase is expected, *count - pointer to number of
+ * bytes to transfer, **data - pointer to data pointer.
*
* Returns : -1 when different phase is entered without transferring
- * maximum number of bytes, 0 if all bytes or transferred or exit
- * is in same phase.
- *
- * Also, *phase, *count, *data are modified in place.
+ * maximum number of bytes, 0 if all bytes or transferred or exit
+ * is in same phase.
*
+ * Also, *phase, *count, *data are modified in place.
*/
unsigned char *phase, int *count,
unsigned char **data)
{
- SETUP_HOSTDATA(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
register int c = *count;
register unsigned char p = *phase;
- unsigned long flags;
#if defined(CONFIG_SUN3)
/* sanity check */
}
hostdata->dma_len = c;
- dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
- instance->host_no, (p & SR_IO) ? "reading" : "writing",
- c, (p & SR_IO) ? "to" : "from", *data);
+ dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+ (p & SR_IO) ? "receive" : "send", c, *data);
/* netbsd turns off ints here, why not be safe and do it too */
- local_irq_save(flags);
/* send start chain */
sun3scsi_dma_start(c, *data);
+ NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+ MR_ENABLE_EOP_INTR);
if (p & SR_IO) {
- NCR5380_write(TARGET_COMMAND_REG, 1);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
NCR5380_write(INITIATOR_COMMAND_REG, 0);
- NCR5380_write(MODE_REG,
- (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
NCR5380_write(START_DMA_INITIATOR_RECEIVE_REG, 0);
} else {
- NCR5380_write(TARGET_COMMAND_REG, 0);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_DATA);
- NCR5380_write(MODE_REG,
- (NCR5380_read(MODE_REG) | MR_DMA_MODE | MR_ENABLE_EOP_INTR));
NCR5380_write(START_DMA_SEND_REG, 0);
}
dregs->csr |= CSR_DMA_ENABLE;
#endif
- local_irq_restore(flags);
-
sun3_dma_active = 1;
#else /* !defined(CONFIG_SUN3) */
if (hostdata->read_overruns && (p & SR_IO))
c -= hostdata->read_overruns;
- dprintk(NDEBUG_DMA, "scsi%d: initializing DMA for %s, %d bytes %s %p\n",
- HOSTNO, (p & SR_IO) ? "reading" : "writing",
- c, (p & SR_IO) ? "to" : "from", d);
+ dsprintk(NDEBUG_DMA, instance, "initializing DMA %s: length %d, address %p\n",
+ (p & SR_IO) ? "receive" : "send", c, d);
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(p));
-
-#ifdef REAL_DMA
- NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_ENABLE_EOP_INTR | MR_MONITOR_BSY);
-#endif /* def REAL_DMA */
+ NCR5380_write(MODE_REG, MR_BASE | MR_DMA_MODE | MR_MONITOR_BSY |
+ MR_ENABLE_EOP_INTR);
if (!(hostdata->flags & FLAG_LATE_DMA_SETUP)) {
/* On the Medusa, it is a must to initialize the DMA before
* starting the NCR. This is also the cleaner way for the TT.
*/
- local_irq_save(flags);
hostdata->dma_len = (p & SR_IO) ?
NCR5380_dma_read_setup(instance, d, c) :
NCR5380_dma_write_setup(instance, d, c);
- local_irq_restore(flags);
}
if (p & SR_IO)
/* On the Falcon, the DMA setup must be done after the last */
/* NCR access, else the DMA setup gets trashed!
*/
- local_irq_save(flags);
hostdata->dma_len = (p & SR_IO) ?
NCR5380_dma_read_setup(instance, d, c) :
NCR5380_dma_write_setup(instance, d, c);
- local_irq_restore(flags);
}
#endif /* !defined(CONFIG_SUN3) */
* Function : NCR5380_information_transfer (struct Scsi_Host *instance)
*
* Purpose : run through the various SCSI phases and do as the target
- * directs us to. Operates on the currently connected command,
- * instance->connected.
+ * directs us to. Operates on the currently connected command,
+ * instance->connected.
*
* Inputs : instance, instance for which we are doing commands
*
* Side effects : SCSI things happen, the disconnected queue will be
- * modified if a command disconnects, *instance->connected will
- * change.
+ * modified if a command disconnects, *instance->connected will
+ * change.
*
* XXX Note : we need to watch for bus free or a reset condition here
- * to recover from an unexpected bus free condition.
+ * to recover from an unexpected bus free condition.
*/
static void NCR5380_information_transfer(struct Scsi_Host *instance)
{
- SETUP_HOSTDATA(instance);
- unsigned long flags;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char msgout = NOP;
int sink = 0;
int len;
#endif
unsigned char *data;
unsigned char phase, tmp, extended_msg[10], old_phase = 0xff;
- struct scsi_cmnd *cmd = (struct scsi_cmnd *) hostdata->connected;
+ struct scsi_cmnd *cmd;
#ifdef SUN3_SCSI_VME
dregs->csr |= CSR_INTR;
#endif
- while (1) {
+ while ((cmd = hostdata->connected)) {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
+
tmp = NCR5380_read(STATUS_REG);
/* We only have a valid SCSI phase when REQ is asserted */
if (tmp & SR_REQ) {
/* this command setup for dma yet? */
if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != cmd)) {
if (cmd->request->cmd_type == REQ_TYPE_FS) {
- sun3scsi_dma_setup(d, count,
+ sun3scsi_dma_setup(instance, d, count,
rq_data_dir(cmd->request));
sun3_dma_setup_done = cmd;
}
NCR5380_write(TARGET_COMMAND_REG, PHASE_SR_TO_TCR(tmp));
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_ATN |
- ICR_ASSERT_ACK);
+ ICR_ASSERT_ACK);
while (NCR5380_read(STATUS_REG) & SR_REQ)
;
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_ATN);
+ ICR_ASSERT_ATN);
sink = 0;
continue;
}
switch (phase) {
case PHASE_DATAOUT:
#if (NDEBUG & NDEBUG_NO_DATAOUT)
- printk("scsi%d: NDEBUG_NO_DATAOUT set, attempted DATAOUT "
- "aborted\n", HOSTNO);
+ shost_printk(KERN_DEBUG, instance, "NDEBUG_NO_DATAOUT set, attempted DATAOUT aborted\n");
sink = 1;
do_abort(instance);
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
return;
#endif
case PHASE_DATAIN:
--cmd->SCp.buffers_residual;
cmd->SCp.this_residual = cmd->SCp.buffer->length;
cmd->SCp.ptr = sg_virt(cmd->SCp.buffer);
- /* ++roman: Try to merge some scatter-buffers if
- * they are at contiguous physical addresses.
- */
merge_contiguous_buffers(cmd);
- dprintk(NDEBUG_INFORMATION, "scsi%d: %d bytes and %d buffers left\n",
- HOSTNO, cmd->SCp.this_residual,
- cmd->SCp.buffers_residual);
+ dsprintk(NDEBUG_INFORMATION, instance, "%d bytes and %d buffers left\n",
+ cmd->SCp.this_residual,
+ cmd->SCp.buffers_residual);
}
/*
*/
/* ++roman: I suggest, this should be
- * #if def(REAL_DMA)
+ * #if def(REAL_DMA)
* instead of leaving REAL_DMA out.
*/
#if defined(REAL_DMA)
- if (
#if !defined(CONFIG_SUN3)
- !cmd->device->borken &&
+ transfersize = 0;
+ if (!cmd->device->borken)
#endif
- (transfersize = NCR5380_dma_xfer_len(instance, cmd, phase)) >= DMA_MIN_SIZE) {
+ transfersize = NCR5380_dma_xfer_len(instance, cmd, phase);
+
+ if (transfersize >= DMA_MIN_SIZE) {
len = transfersize;
cmd->SCp.phase = phase;
if (NCR5380_transfer_dma(instance, &phase,
/*
* If the watchdog timer fires, all future
* accesses to this device will use the
- * polled-IO. */
+ * polled-IO.
+ */
scmd_printk(KERN_INFO, cmd,
"switching to slow handshake\n");
cmd->device->borken = 1;
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE |
- ICR_ASSERT_ATN);
sink = 1;
do_abort(instance);
cmd->result = DID_ERROR << 16;
- cmd->scsi_done(cmd);
+ complete_cmd(instance, cmd);
/* XXX - need to source or sink data here, as appropriate */
} else {
#ifdef REAL_DMA
}
} else
#endif /* defined(REAL_DMA) */
+ {
+ spin_unlock_irq(&hostdata->lock);
NCR5380_transfer_pio(instance, &phase,
- (int *)&cmd->SCp.this_residual,
- (unsigned char **)&cmd->SCp.ptr);
+ (int *)&cmd->SCp.this_residual,
+ (unsigned char **)&cmd->SCp.ptr);
+ spin_lock_irq(&hostdata->lock);
+ }
#if defined(CONFIG_SUN3) && defined(REAL_DMA)
/* if we had intended to dma that command clear it */
if (sun3_dma_setup_done == cmd)
case PHASE_MSGIN:
len = 1;
data = &tmp;
- NCR5380_write(SELECT_ENABLE_REG, 0); /* disable reselects */
NCR5380_transfer_pio(instance, &phase, &len, &data);
cmd->SCp.Message = tmp;
switch (tmp) {
- /*
- * Linking lets us reduce the time required to get the
- * next command out to the device, hopefully this will
- * mean we don't waste another revolution due to the delays
- * required by ARBITRATION and another SELECTION.
- *
- * In the current implementation proposal, low level drivers
- * merely have to start the next command, pointed to by
- * next_link, done() is called as with unlinked commands.
- */
-#ifdef LINKED
- case LINKED_CMD_COMPLETE:
- case LINKED_FLG_CMD_COMPLETE:
- /* Accept message by clearing ACK */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
-
- dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked command "
- "complete.\n", HOSTNO, cmd->device->id, cmd->device->lun);
-
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- /*
- * Sanity check : A linked command should only terminate
- * with one of these messages if there are more linked
- * commands available.
- */
-
- if (!cmd->next_link) {
- printk(KERN_NOTICE "scsi%d: target %d lun %llu "
- "linked command complete, no next_link\n",
- HOSTNO, cmd->device->id, cmd->device->lun);
- sink = 1;
- do_abort(instance);
- return;
- }
-
- initialize_SCp(cmd->next_link);
- /* The next command is still part of this process; copy it
- * and don't free it! */
- cmd->next_link->tag = cmd->tag;
- cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
- dprintk(NDEBUG_LINKED, "scsi%d: target %d lun %llu linked request "
- "done, calling scsi_done().\n",
- HOSTNO, cmd->device->id, cmd->device->lun);
- cmd->scsi_done(cmd);
- cmd = hostdata->connected;
- break;
-#endif /* def LINKED */
case ABORT:
case COMMAND_COMPLETE:
/* Accept message by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d, lun %llu "
- "completed\n", HOSTNO, cmd->device->id, cmd->device->lun);
+ dsprintk(NDEBUG_QUEUES, instance,
+ "COMMAND COMPLETE %p target %d lun %llu\n",
+ cmd, scmd_id(cmd), cmd->device->lun);
- local_irq_save(flags);
- hostdata->retain_dma_intr++;
hostdata->connected = NULL;
#ifdef SUPPORT_TAGS
cmd_free_tag(cmd);
if (status_byte(cmd->SCp.Status) == QUEUE_FULL) {
- /* Turn a QUEUE FULL status into BUSY, I think the
- * mid level cannot handle QUEUE FULL :-( (The
- * command is retried after BUSY). Also update our
- * queue size to the number of currently issued
- * commands now.
- */
- /* ++Andreas: the mid level code knows about
- QUEUE_FULL now. */
- struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][cmd->device->lun];
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu returned "
- "QUEUE_FULL after %d commands\n",
- HOSTNO, cmd->device->id, cmd->device->lun,
- ta->nr_allocated);
+ u8 lun = cmd->device->lun;
+ struct tag_alloc *ta = &hostdata->TagAlloc[scmd_id(cmd)][lun];
+
+ dsprintk(NDEBUG_TAGS, instance,
+ "QUEUE_FULL %p target %d lun %d nr_allocated %d\n",
+ cmd, scmd_id(cmd), lun, ta->nr_allocated);
if (ta->queue_size > ta->nr_allocated)
- ta->nr_allocated = ta->queue_size;
+ ta->queue_size = ta->nr_allocated;
}
-#else
- hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
#endif
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
-
- /*
- * I'm not sure what the correct thing to do here is :
- *
- * If the command that just executed is NOT a request
- * sense, the obvious thing to do is to set the result
- * code to the values of the stored parameters.
- *
- * If it was a REQUEST SENSE command, we need some way to
- * differentiate between the failure code of the original
- * and the failure code of the REQUEST sense - the obvious
- * case is success, where we fall through and leave the
- * result code unchanged.
- *
- * The non-obvious place is where the REQUEST SENSE failed
- */
-
- if (cmd->cmnd[0] != REQUEST_SENSE)
- cmd->result = cmd->SCp.Status | (cmd->SCp.Message << 8);
- else if (status_byte(cmd->SCp.Status) != GOOD)
- cmd->result = (cmd->result & 0x00ffff) | (DID_ERROR << 16);
-
- if ((cmd->cmnd[0] == REQUEST_SENSE) &&
- hostdata->ses.cmd_len) {
- scsi_eh_restore_cmnd(cmd, &hostdata->ses);
- hostdata->ses.cmd_len = 0 ;
- }
-
- if ((cmd->cmnd[0] != REQUEST_SENSE) &&
- (status_byte(cmd->SCp.Status) == CHECK_CONDITION)) {
- scsi_eh_prep_cmnd(cmd, &hostdata->ses, NULL, 0, ~0);
- dprintk(NDEBUG_AUTOSENSE, "scsi%d: performing request sense\n", HOSTNO);
-
- LIST(cmd,hostdata->issue_queue);
- SET_NEXT(cmd, hostdata->issue_queue);
- hostdata->issue_queue = (struct scsi_cmnd *) cmd;
- dprintk(NDEBUG_QUEUES, "scsi%d: REQUEST SENSE added to head of "
- "issue queue\n", H_NO(cmd));
- } else {
- cmd->scsi_done(cmd);
+ cmd->result &= ~0xffff;
+ cmd->result |= cmd->SCp.Status;
+ cmd->result |= cmd->SCp.Message << 8;
+
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ complete_cmd(instance, cmd);
+ else {
+ if (cmd->SCp.Status == SAM_STAT_CHECK_CONDITION ||
+ cmd->SCp.Status == SAM_STAT_COMMAND_TERMINATED) {
+ dsprintk(NDEBUG_QUEUES, instance, "autosense: adding cmd %p to tail of autosense queue\n",
+ cmd);
+ list_add_tail(&ncmd->list,
+ &hostdata->autosense);
+ } else
+ complete_cmd(instance, cmd);
}
- local_irq_restore(flags);
-
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
/*
* Restore phase bits to 0 so an interrupted selection,
* arbitration can resume.
*/
NCR5380_write(TARGET_COMMAND_REG, 0);
- while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
- barrier();
+ /* Enable reselect interrupts */
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- local_irq_save(flags);
- hostdata->retain_dma_intr--;
- /* ++roman: For Falcon SCSI, release the lock on the
- * ST-DMA here if no other commands are waiting on the
- * disconnected queue.
- */
maybe_release_dma_irq(instance);
- local_irq_restore(flags);
return;
case MESSAGE_REJECT:
/* Accept message by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
switch (hostdata->last_message) {
case HEAD_OF_QUEUE_TAG:
case ORDERED_QUEUE_TAG:
cmd->device->tagged_supported = 0;
hostdata->busy[cmd->device->id] |= (1 << cmd->device->lun);
cmd->tag = TAG_NONE;
- dprintk(NDEBUG_TAGS, "scsi%d: target %d lun %llu rejected "
- "QUEUE_TAG message; tagged queuing "
- "disabled\n",
- HOSTNO, cmd->device->id, cmd->device->lun);
+ dsprintk(NDEBUG_TAGS, instance, "target %d lun %llu rejected QUEUE_TAG message; tagged queuing disabled\n",
+ scmd_id(cmd), cmd->device->lun);
break;
}
break;
case DISCONNECT:
/* Accept message by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- local_irq_save(flags);
- cmd->device->disconnect = 1;
- LIST(cmd,hostdata->disconnected_queue);
- SET_NEXT(cmd, hostdata->disconnected_queue);
hostdata->connected = NULL;
- hostdata->disconnected_queue = cmd;
- local_irq_restore(flags);
- dprintk(NDEBUG_QUEUES, "scsi%d: command for target %d lun %llu was "
- "moved from connected to the "
- "disconnected_queue\n", HOSTNO,
- cmd->device->id, cmd->device->lun);
+ list_add(&ncmd->list, &hostdata->disconnected);
+ dsprintk(NDEBUG_INFORMATION | NDEBUG_QUEUES,
+ instance, "connected command %p for target %d lun %llu moved to disconnected queue\n",
+ cmd, scmd_id(cmd), cmd->device->lun);
+
/*
* Restore phase bits to 0 so an interrupted selection,
* arbitration can resume.
/* Enable reselect interrupts */
NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
- /* Wait for bus free to avoid nasty timeouts */
- while ((NCR5380_read(STATUS_REG) & SR_BSY) && !hostdata->connected)
- barrier();
#ifdef SUN3_SCSI_VME
dregs->csr |= CSR_DMA_ENABLE;
#endif
case RESTORE_POINTERS:
/* Accept message by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- /* Enable reselect interrupts */
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
break;
case EXTENDED_MESSAGE:
/*
- * Extended messages are sent in the following format :
- * Byte
- * 0 EXTENDED_MESSAGE == 1
- * 1 length (includes one byte for code, doesn't
- * include first two bytes)
- * 2 code
- * 3..length+1 arguments
- *
- * Start the extended message buffer with the EXTENDED_MESSAGE
+ * Start the message buffer with the EXTENDED_MESSAGE
* byte, since spi_print_msg() wants the whole thing.
*/
extended_msg[0] = EXTENDED_MESSAGE;
/* Accept first byte by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- dprintk(NDEBUG_EXTENDED, "scsi%d: receiving extended message\n", HOSTNO);
+ spin_unlock_irq(&hostdata->lock);
+
+ dsprintk(NDEBUG_EXTENDED, instance, "receiving extended message\n");
len = 2;
data = extended_msg + 1;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_EXTENDED, "scsi%d: length=%d, code=0x%02x\n", HOSTNO,
- (int)extended_msg[1], (int)extended_msg[2]);
+ dsprintk(NDEBUG_EXTENDED, instance, "length %d, code 0x%02x\n",
+ (int)extended_msg[1],
+ (int)extended_msg[2]);
- if (!len && extended_msg[1] <=
- (sizeof(extended_msg) - 1)) {
+ if (!len && extended_msg[1] > 0 &&
+ extended_msg[1] <= sizeof(extended_msg) - 2) {
/* Accept third byte by clearing ACK */
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
len = extended_msg[1] - 1;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
- dprintk(NDEBUG_EXTENDED, "scsi%d: message received, residual %d\n",
- HOSTNO, len);
+ dsprintk(NDEBUG_EXTENDED, instance, "message received, residual %d\n",
+ len);
switch (extended_msg[2]) {
case EXTENDED_SDTR:
tmp = 0;
}
} else if (len) {
- printk(KERN_NOTICE "scsi%d: error receiving "
- "extended message\n", HOSTNO);
+ shost_printk(KERN_ERR, instance, "error receiving extended message\n");
tmp = 0;
} else {
- printk(KERN_NOTICE "scsi%d: extended message "
- "code %02x length %d is too long\n",
- HOSTNO, extended_msg[2], extended_msg[1]);
+ shost_printk(KERN_NOTICE, instance, "extended message code %02x length %d is too long\n",
+ extended_msg[2], extended_msg[1]);
tmp = 0;
}
+
+ spin_lock_irq(&hostdata->lock);
+ if (!hostdata->connected)
+ return;
+
/* Fall through to reject message */
/*
*/
default:
if (!tmp) {
- printk(KERN_INFO "scsi%d: rejecting message ",
- instance->host_no);
+ shost_printk(KERN_ERR, instance, "rejecting message ");
spi_print_msg(extended_msg);
printk("\n");
} else if (tmp != EXTENDED_MESSAGE)
hostdata->last_message = msgout;
NCR5380_transfer_pio(instance, &phase, &len, &data);
if (msgout == ABORT) {
- local_irq_save(flags);
-#ifdef SUPPORT_TAGS
- cmd_free_tag(cmd);
-#else
- hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
hostdata->connected = NULL;
cmd->result = DID_ERROR << 16;
- NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
+ complete_cmd(instance, cmd);
maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- cmd->scsi_done(cmd);
+ NCR5380_write(SELECT_ENABLE_REG, hostdata->id_mask);
return;
}
msgout = NOP;
cmd->SCp.Status = tmp;
break;
default:
- printk("scsi%d: unknown phase\n", HOSTNO);
+ shost_printk(KERN_ERR, instance, "unknown phase\n");
NCR5380_dprint(NDEBUG_ANY, instance);
} /* switch(phase) */
- } /* if (tmp * SR_REQ) */
- } /* while (1) */
+ } else {
+ spin_unlock_irq(&hostdata->lock);
+ NCR5380_poll_politely(instance, STATUS_REG, SR_REQ, SR_REQ, HZ);
+ spin_lock_irq(&hostdata->lock);
+ }
+ }
}
/*
* Function : void NCR5380_reselect (struct Scsi_Host *instance)
*
* Purpose : does reselection, initializing the instance->connected
- * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
- * nexus has been reestablished,
+ * field to point to the scsi_cmnd for which the I_T_L or I_T_L_Q
+ * nexus has been reestablished,
*
* Inputs : instance - this instance of the NCR5380.
- *
*/
static void NCR5380_reselect(struct Scsi_Host *instance)
{
- SETUP_HOSTDATA(instance);
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
unsigned char target_mask;
unsigned char lun;
#ifdef SUPPORT_TAGS
unsigned char msg[3];
int __maybe_unused len;
unsigned char __maybe_unused *data, __maybe_unused phase;
- struct scsi_cmnd *tmp = NULL, *prev;
+ struct NCR5380_cmd *ncmd;
+ struct scsi_cmnd *tmp;
/*
* Disable arbitration, etc. since the host adapter obviously
*/
NCR5380_write(MODE_REG, MR_BASE);
- hostdata->restart_select = 1;
target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask);
- dprintk(NDEBUG_RESELECTION, "scsi%d: reselect\n", HOSTNO);
+ dsprintk(NDEBUG_RESELECTION, instance, "reselect\n");
/*
* At this point, we have detected that our SCSI ID is on the bus,
*/
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY);
-
- while (NCR5380_read(STATUS_REG) & SR_SEL)
- ;
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) {
+ NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
+ return;
+ }
NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
/*
* Wait for target to go into MSGIN.
*/
- while (!(NCR5380_read(STATUS_REG) & SR_REQ))
- ;
+ if (NCR5380_poll_politely(instance,
+ STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) {
+ do_abort(instance);
+ return;
+ }
#if defined(CONFIG_SUN3) && defined(REAL_DMA)
/* acknowledge toggle to MSGIN */
data = msg;
phase = PHASE_MSGIN;
NCR5380_transfer_pio(instance, &phase, &len, &data);
+
+ if (len) {
+ do_abort(instance);
+ return;
+ }
#endif
if (!(msg[0] & 0x80)) {
- printk(KERN_DEBUG "scsi%d: expecting IDENTIFY message, got ", HOSTNO);
+ shost_printk(KERN_ERR, instance, "expecting IDENTIFY message, got ");
spi_print_msg(msg);
+ printk("\n");
do_abort(instance);
return;
}
- lun = (msg[0] & 0x07);
+ lun = msg[0] & 0x07;
#if defined(SUPPORT_TAGS) && !defined(CONFIG_SUN3)
/* If the phase is still MSGIN, the target wants to send some more
if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
msg[1] == SIMPLE_QUEUE_TAG)
tag = msg[2];
- dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at "
- "reselection\n", HOSTNO, target_mask, lun, tag);
+ dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n",
+ target_mask, lun, tag);
}
#endif
* just reestablished, and remove it from the disconnected queue.
*/
- for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue, prev = NULL;
- tmp; prev = tmp, tmp = NEXT(tmp)) {
- if ((target_mask == (1 << tmp->device->id)) && (lun == tmp->device->lun)
+ tmp = NULL;
+ list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ if (target_mask == (1 << scmd_id(cmd)) &&
+ lun == (u8)cmd->device->lun
#ifdef SUPPORT_TAGS
- && (tag == tmp->tag)
+ && (tag == cmd->tag)
#endif
) {
- if (prev) {
- REMOVE(prev, NEXT(prev), tmp, NEXT(tmp));
- SET_NEXT(prev, NEXT(tmp));
- } else {
- REMOVE(-1, hostdata->disconnected_queue, tmp, NEXT(tmp));
- hostdata->disconnected_queue = NEXT(tmp);
- }
- SET_NEXT(tmp, NULL);
+ list_del(&ncmd->list);
+ tmp = cmd;
break;
}
}
- if (!tmp) {
- printk(KERN_WARNING "scsi%d: warning: target bitmask %02x lun %d "
-#ifdef SUPPORT_TAGS
- "tag %d "
-#endif
- "not in disconnected_queue.\n",
- HOSTNO, target_mask, lun
+ if (tmp) {
+ dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance,
+ "reselect: removed %p from disconnected queue\n", tmp);
+ } else {
+
#ifdef SUPPORT_TAGS
- , tag
+ shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d tag %d not in disconnected queue.\n",
+ target_mask, lun, tag);
+#else
+ shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n",
+ target_mask, lun);
#endif
- );
/*
* Since we have an established nexus that we can't do anything
* with, we must abort it.
}
/* setup this command for dma if not already */
if ((count >= DMA_MIN_SIZE) && (sun3_dma_setup_done != tmp)) {
- sun3scsi_dma_setup(d, count, rq_data_dir(tmp->request));
+ sun3scsi_dma_setup(instance, d, count,
+ rq_data_dir(tmp->request));
sun3_dma_setup_done = tmp;
}
}
if (!NCR5380_transfer_pio(instance, &phase, &len, &data) &&
msg[1] == SIMPLE_QUEUE_TAG)
tag = msg[2];
- dprintk(NDEBUG_TAGS, "scsi%d: target mask %02x, lun %d sent tag %d at reselection\n"
- HOSTNO, target_mask, lun, tag);
+ dsprintk(NDEBUG_TAGS, instance, "reselect: target mask %02x, lun %d sent tag %d\n"
+ target_mask, lun, tag);
}
#endif
hostdata->connected = tmp;
- dprintk(NDEBUG_RESELECTION, "scsi%d: nexus established, target = %d, lun = %llu, tag = %d\n",
- HOSTNO, tmp->device->id, tmp->device->lun, tmp->tag);
+ dsprintk(NDEBUG_RESELECTION, instance, "nexus established, target %d, lun %llu, tag %d\n",
+ scmd_id(tmp), tmp->device->lun, tmp->tag);
}
-/*
- * Function : int NCR5380_abort (struct scsi_cmnd *cmd)
- *
- * Purpose : abort a command
- *
- * Inputs : cmd - the scsi_cmnd to abort, code - code to set the
- * host byte of the result field to, if zero DID_ABORTED is
- * used.
- *
- * Returns : SUCCESS - success, FAILED on failure.
- *
- * XXX - there is no way to abort the command that is currently
- * connected, you have to wait for it to complete. If this is
- * a problem, we could implement longjmp() / setjmp(), setjmp()
- * called where the loop started in NCR5380_main().
+/**
+ * list_find_cmd - test for presence of a command in a linked list
+ * @haystack: list of commands
+ * @needle: command to search for
*/
-static
-int NCR5380_abort(struct scsi_cmnd *cmd)
+static bool list_find_cmd(struct list_head *haystack,
+ struct scsi_cmnd *needle)
{
- struct Scsi_Host *instance = cmd->device->host;
- SETUP_HOSTDATA(instance);
- struct scsi_cmnd *tmp, **prev;
- unsigned long flags;
+ struct NCR5380_cmd *ncmd;
- scmd_printk(KERN_NOTICE, cmd, "aborting command\n");
+ list_for_each_entry(ncmd, haystack, list)
+ if (NCR5380_to_scmd(ncmd) == needle)
+ return true;
+ return false;
+}
- NCR5380_print_status(instance);
+/**
+ * list_remove_cmd - remove a command from linked list
+ * @haystack: list of commands
+ * @needle: command to remove
+ */
- local_irq_save(flags);
+static bool list_del_cmd(struct list_head *haystack,
+ struct scsi_cmnd *needle)
+{
+ if (list_find_cmd(haystack, needle)) {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(needle);
- dprintk(NDEBUG_ABORT, "scsi%d: abort called basr 0x%02x, sr 0x%02x\n", HOSTNO,
- NCR5380_read(BUS_AND_STATUS_REG),
- NCR5380_read(STATUS_REG));
+ list_del(&ncmd->list);
+ return true;
+ }
+ return false;
+}
-#if 1
- /*
- * Case 1 : If the command is the currently executing command,
- * we'll set the aborted flag and return control so that
- * information transfer routine can exit cleanly.
- */
+/**
+ * NCR5380_abort - scsi host eh_abort_handler() method
+ * @cmd: the command to be aborted
+ *
+ * Try to abort a given command by removing it from queues and/or sending
+ * the target an abort message. This may not succeed in causing a target
+ * to abort the command. Nonetheless, the low-level driver must forget about
+ * the command because the mid-layer reclaims it and it may be re-issued.
+ *
+ * The normal path taken by a command is as follows. For EH we trace this
+ * same path to locate and abort the command.
+ *
+ * unissued -> selecting -> [unissued -> selecting ->]... connected ->
+ * [disconnected -> connected ->]...
+ * [autosense -> connected ->] done
+ *
+ * If cmd is unissued then just remove it.
+ * If cmd is disconnected, try to select the target.
+ * If cmd is connected, try to send an abort message.
+ * If cmd is waiting for autosense, give it a chance to complete but check
+ * that it isn't left connected.
+ * If cmd was not found at all then presumably it has already been completed,
+ * in which case return SUCCESS to try to avoid further EH measures.
+ * If the command has not completed yet, we must not fail to find it.
+ */
- if (hostdata->connected == cmd) {
+static int NCR5380_abort(struct scsi_cmnd *cmd)
+{
+ struct Scsi_Host *instance = cmd->device->host;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ unsigned long flags;
+ int result = SUCCESS;
- dprintk(NDEBUG_ABORT, "scsi%d: aborting connected command\n", HOSTNO);
- /*
- * We should perform BSY checking, and make sure we haven't slipped
- * into BUS FREE.
- */
+ spin_lock_irqsave(&hostdata->lock, flags);
- /* NCR5380_write(INITIATOR_COMMAND_REG, ICR_ASSERT_ATN); */
- /*
- * Since we can't change phases until we've completed the current
- * handshake, we have to source or sink a byte of data if the current
- * phase is not MSGOUT.
- */
+#if (NDEBUG & NDEBUG_ANY)
+ scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+ NCR5380_dprint(NDEBUG_ANY, instance);
+ NCR5380_dprint_phase(NDEBUG_ANY, instance);
- /*
- * Return control to the executing NCR drive so we can clear the
- * aborted flag and get back into our main loop.
- */
+ if (list_del_cmd(&hostdata->unissued, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from issue queue\n", cmd);
+ cmd->result = DID_ABORT << 16;
+ cmd->scsi_done(cmd); /* No tag or busy flag to worry about */
+ }
- if (do_abort(instance) == 0) {
- hostdata->aborted = 1;
- hostdata->connected = NULL;
- cmd->result = DID_ABORT << 16;
-#ifdef SUPPORT_TAGS
- cmd_free_tag(cmd);
-#else
- hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- cmd->scsi_done(cmd);
- return SUCCESS;
- } else {
- local_irq_restore(flags);
- printk("scsi%d: abort of connected command failed!\n", HOSTNO);
- return FAILED;
- }
+ if (hostdata->selecting == cmd) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: cmd %p == selecting\n", cmd);
+ hostdata->selecting = NULL;
+ cmd->result = DID_ABORT << 16;
+ complete_cmd(instance, cmd);
+ goto out;
}
-#endif
- /*
- * Case 2 : If the command hasn't been issued yet, we simply remove it
- * from the issue queue.
- */
- for (prev = (struct scsi_cmnd **)&(hostdata->issue_queue),
- tmp = (struct scsi_cmnd *)hostdata->issue_queue;
- tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
- if (cmd == tmp) {
- REMOVE(5, *prev, tmp, NEXT(tmp));
- (*prev) = NEXT(tmp);
- SET_NEXT(tmp, NULL);
- tmp->result = DID_ABORT << 16;
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- dprintk(NDEBUG_ABORT, "scsi%d: abort removed command from issue queue.\n",
- HOSTNO);
- /* Tagged queuing note: no tag to free here, hasn't been assigned
- * yet... */
- tmp->scsi_done(tmp);
- return SUCCESS;
+ if (list_del_cmd(&hostdata->disconnected, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from disconnected list\n", cmd);
+ cmd->result = DID_ERROR << 16;
+ if (!hostdata->connected)
+ NCR5380_select(instance, cmd);
+ if (hostdata->connected != cmd) {
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
}
}
- /*
- * Case 3 : If any commands are connected, we're going to fail the abort
- * and let the high level SCSI driver retry at a later time or
- * issue a reset.
- *
- * Timeouts, and therefore aborted commands, will be highly unlikely
- * and handling them cleanly in this situation would make the common
- * case of noresets less efficient, and would pollute our code. So,
- * we fail.
- */
+ if (hostdata->connected == cmd) {
+ dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+ hostdata->connected = NULL;
+ if (do_abort(instance)) {
+ set_host_byte(cmd, DID_ERROR);
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
+ }
+ set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
+#endif
+ if (cmd->cmnd[0] == REQUEST_SENSE)
+ complete_cmd(instance, cmd);
+ else {
+ struct NCR5380_cmd *ncmd = scsi_cmd_priv(cmd);
- if (hostdata->connected) {
- local_irq_restore(flags);
- dprintk(NDEBUG_ABORT, "scsi%d: abort failed, command connected.\n", HOSTNO);
- return FAILED;
+ /* Perform autosense for this command */
+ list_add(&ncmd->list, &hostdata->autosense);
+ }
}
- /*
- * Case 4: If the command is currently disconnected from the bus, and
- * there are no connected commands, we reconnect the I_T_L or
- * I_T_L_Q nexus associated with it, go into message out, and send
- * an abort message.
- *
- * This case is especially ugly. In order to reestablish the nexus, we
- * need to call NCR5380_select(). The easiest way to implement this
- * function was to abort if the bus was busy, and let the interrupt
- * handler triggered on the SEL for reselect take care of lost arbitrations
- * where necessary, meaning interrupts need to be enabled.
- *
- * When interrupts are enabled, the queues may change - so we
- * can't remove it from the disconnected queue before selecting it
- * because that could cause a failure in hashing the nexus if that
- * device reselected.
- *
- * Since the queues may change, we can't use the pointers from when we
- * first locate it.
- *
- * So, we must first locate the command, and if NCR5380_select()
- * succeeds, then issue the abort, relocate the command and remove
- * it from the disconnected queue.
- */
-
- for (tmp = (struct scsi_cmnd *) hostdata->disconnected_queue; tmp;
- tmp = NEXT(tmp)) {
- if (cmd == tmp) {
- local_irq_restore(flags);
- dprintk(NDEBUG_ABORT, "scsi%d: aborting disconnected command.\n", HOSTNO);
-
- if (NCR5380_select(instance, cmd))
- return FAILED;
-
- dprintk(NDEBUG_ABORT, "scsi%d: nexus reestablished.\n", HOSTNO);
-
- do_abort(instance);
-
- local_irq_save(flags);
- for (prev = (struct scsi_cmnd **)&(hostdata->disconnected_queue),
- tmp = (struct scsi_cmnd *)hostdata->disconnected_queue;
- tmp; prev = NEXTADDR(tmp), tmp = NEXT(tmp)) {
- if (cmd == tmp) {
- REMOVE(5, *prev, tmp, NEXT(tmp));
- *prev = NEXT(tmp);
- SET_NEXT(tmp, NULL);
- tmp->result = DID_ABORT << 16;
- /* We must unlock the tag/LUN immediately here, since the
- * target goes to BUS FREE and doesn't send us another
- * message (COMMAND_COMPLETE or the like)
- */
-#ifdef SUPPORT_TAGS
- cmd_free_tag(tmp);
-#else
- hostdata->busy[cmd->device->id] &= ~(1 << cmd->device->lun);
-#endif
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
- tmp->scsi_done(tmp);
- return SUCCESS;
- }
- }
+ if (list_find_cmd(&hostdata->autosense, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: found %p on sense queue\n", cmd);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ msleep(1000);
+ spin_lock_irqsave(&hostdata->lock, flags);
+ if (list_del_cmd(&hostdata->autosense, cmd)) {
+ dsprintk(NDEBUG_ABORT, instance,
+ "abort: removed %p from sense queue\n", cmd);
+ set_host_byte(cmd, DID_ABORT);
+ complete_cmd(instance, cmd);
+ goto out;
}
}
- /* Maybe it is sufficient just to release the ST-DMA lock... (if
- * possible at all) At least, we should check if the lock could be
- * released after the abort, in case it is kept due to some bug.
- */
- maybe_release_dma_irq(instance);
- local_irq_restore(flags);
+ if (hostdata->connected == cmd) {
+ dsprintk(NDEBUG_ABORT, instance, "abort: cmd %p is connected\n", cmd);
+ hostdata->connected = NULL;
+ if (do_abort(instance)) {
+ set_host_byte(cmd, DID_ERROR);
+ complete_cmd(instance, cmd);
+ result = FAILED;
+ goto out;
+ }
+ set_host_byte(cmd, DID_ABORT);
+#ifdef REAL_DMA
+ hostdata->dma_len = 0;
+#endif
+ complete_cmd(instance, cmd);
+ }
- /*
- * Case 5 : If we reached this point, the command was not found in any of
- * the queues.
- *
- * We probably reached this point because of an unlikely race condition
- * between the command completing successfully and the abortion code,
- * so we won't panic, but we will notify the user in case something really
- * broke.
- */
+out:
+ if (result == FAILED)
+ dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd);
+ else
+ dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd);
- printk(KERN_INFO "scsi%d: warning : SCSI command probably completed successfully before abortion\n", HOSTNO);
+ queue_work(hostdata->work_q, &hostdata->main_task);
+ maybe_release_dma_irq(instance);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
- return FAILED;
+ return result;
}
-/*
- * Function : int NCR5380_reset (struct scsi_cmnd *cmd)
- *
- * Purpose : reset the SCSI bus.
- *
- * Returns : SUCCESS or FAILURE
+/**
+ * NCR5380_bus_reset - reset the SCSI bus
+ * @cmd: SCSI command undergoing EH
*
+ * Returns SUCCESS
*/
static int NCR5380_bus_reset(struct scsi_cmnd *cmd)
struct NCR5380_hostdata *hostdata = shost_priv(instance);
int i;
unsigned long flags;
+ struct NCR5380_cmd *ncmd;
- NCR5380_print_status(instance);
+ spin_lock_irqsave(&hostdata->lock, flags);
+
+#if (NDEBUG & NDEBUG_ANY)
+ scmd_printk(KERN_INFO, cmd, __func__);
+#endif
+ NCR5380_dprint(NDEBUG_ANY, instance);
+ NCR5380_dprint_phase(NDEBUG_ANY, instance);
+
+ do_reset(instance);
- /* get in phase */
- NCR5380_write(TARGET_COMMAND_REG,
- PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
- /* assert RST */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
- udelay(40);
/* reset NCR registers */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
NCR5380_write(MODE_REG, MR_BASE);
NCR5380_write(TARGET_COMMAND_REG, 0);
NCR5380_write(SELECT_ENABLE_REG, 0);
- /* ++roman: reset interrupt condition! otherwise no interrupts don't get
- * through anymore ... */
- (void)NCR5380_read(RESET_PARITY_INTERRUPT_REG);
/* After the reset, there are no more connected or disconnected commands
* and no busy units; so clear the low-level status here to avoid
* commands!
*/
- if (hostdata->issue_queue)
- dprintk(NDEBUG_ABORT, "scsi%d: reset aborted issued command(s)\n", H_NO(cmd));
- if (hostdata->connected)
- dprintk(NDEBUG_ABORT, "scsi%d: reset aborted a connected command\n", H_NO(cmd));
- if (hostdata->disconnected_queue)
- dprintk(NDEBUG_ABORT, "scsi%d: reset aborted disconnected command(s)\n", H_NO(cmd));
+ hostdata->selecting = NULL;
+
+ list_for_each_entry(ncmd, &hostdata->disconnected, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ set_host_byte(cmd, DID_RESET);
+ cmd->scsi_done(cmd);
+ }
+
+ list_for_each_entry(ncmd, &hostdata->autosense, list) {
+ struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd);
+
+ set_host_byte(cmd, DID_RESET);
+ cmd->scsi_done(cmd);
+ }
+
+ if (hostdata->connected) {
+ set_host_byte(hostdata->connected, DID_RESET);
+ complete_cmd(instance, hostdata->connected);
+ hostdata->connected = NULL;
+ }
+
+ if (hostdata->sensing) {
+ set_host_byte(hostdata->connected, DID_RESET);
+ complete_cmd(instance, hostdata->sensing);
+ hostdata->sensing = NULL;
+ }
- local_irq_save(flags);
- hostdata->issue_queue = NULL;
- hostdata->connected = NULL;
- hostdata->disconnected_queue = NULL;
#ifdef SUPPORT_TAGS
free_all_tags(hostdata);
#endif
hostdata->dma_len = 0;
#endif
+ queue_work(hostdata->work_q, &hostdata->main_task);
maybe_release_dma_irq(instance);
- local_irq_restore(flags);
+ spin_unlock_irqrestore(&hostdata->lock, flags);
return SUCCESS;
}
#include <linux/module.h>
#include <linux/types.h>
-#include <linux/delay.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#define NCR5380_queue_command atari_scsi_queue_command
#define NCR5380_abort atari_scsi_abort
-#define NCR5380_show_info atari_scsi_show_info
#define NCR5380_info atari_scsi_info
#define NCR5380_dma_read_setup(instance, data, count) \
return adr;
}
-#define HOSTDATA_DMALEN (((struct NCR5380_hostdata *) \
- (atari_scsi_host->hostdata))->dma_len)
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#ifndef CONFIG_ATARI_SCSI_TOSHIBA_DELAY
-#define AFTER_RESET_DELAY (HZ/2)
-#else
-#define AFTER_RESET_DELAY (5*HZ/2)
-#endif
-
#ifdef REAL_DMA
static void atari_scsi_fetch_restbytes(void);
#endif
-static struct Scsi_Host *atari_scsi_host;
static unsigned char (*atari_scsi_reg_read)(unsigned char reg);
static void (*atari_scsi_reg_write)(unsigned char reg, unsigned char value);
module_param(setup_cmd_per_lun, int, 0);
static int setup_sg_tablesize = -1;
module_param(setup_sg_tablesize, int, 0);
-#ifdef SUPPORT_TAGS
static int setup_use_tagged_queuing = -1;
module_param(setup_use_tagged_queuing, int, 0);
-#endif
static int setup_hostid = -1;
module_param(setup_hostid, int, 0);
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
#if defined(REAL_DMA)
#endif
-static irqreturn_t scsi_tt_intr(int irq, void *dummy)
+static irqreturn_t scsi_tt_intr(int irq, void *dev)
{
#ifdef REAL_DMA
+ struct Scsi_Host *instance = dev;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int dma_stat;
dma_stat = tt_scsi_dma.dma_ctrl;
- dprintk(NDEBUG_INTR, "scsi%d: NCR5380 interrupt, DMA status = %02x\n",
- atari_scsi_host->host_no, dma_stat & 0xff);
+ dsprintk(NDEBUG_INTR, instance, "NCR5380 interrupt, DMA status = %02x\n",
+ dma_stat & 0xff);
/* Look if it was the DMA that has interrupted: First possibility
* is that a bus error occurred...
* data reg!
*/
if ((dma_stat & 0x02) && !(dma_stat & 0x40)) {
- atari_dma_residual = HOSTDATA_DMALEN - (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
+ atari_dma_residual = hostdata->dma_len -
+ (SCSI_DMA_READ_P(dma_addr) - atari_dma_startaddr);
dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
atari_dma_residual);
#endif /* REAL_DMA */
- NCR5380_intr(irq, dummy);
+ NCR5380_intr(irq, dev);
return IRQ_HANDLED;
}
-static irqreturn_t scsi_falcon_intr(int irq, void *dummy)
+static irqreturn_t scsi_falcon_intr(int irq, void *dev)
{
#ifdef REAL_DMA
+ struct Scsi_Host *instance = dev;
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int dma_stat;
/* Turn off DMA and select sector counter register before
printk(KERN_ERR "SCSI DMA error: %ld bytes lost in "
"ST-DMA fifo\n", transferred & 15);
- atari_dma_residual = HOSTDATA_DMALEN - transferred;
+ atari_dma_residual = hostdata->dma_len - transferred;
dprintk(NDEBUG_DMA, "SCSI DMA: There are %ld residual bytes.\n",
atari_dma_residual);
} else
* data to the original destination address.
*/
memcpy(atari_dma_orig_addr, phys_to_virt(atari_dma_startaddr),
- HOSTDATA_DMALEN - atari_dma_residual);
+ hostdata->dma_len - atari_dma_residual);
atari_dma_orig_addr = NULL;
}
#endif /* REAL_DMA */
- NCR5380_intr(irq, dummy);
+ NCR5380_intr(irq, dev);
+
return IRQ_HANDLED;
}
* Defaults depend on TT or Falcon, determined at run time.
* Negative values mean don't change.
*/
- int ints[6];
+ int ints[8];
get_options(str, ARRAY_SIZE(ints), ints);
setup_sg_tablesize = ints[3];
if (ints[0] >= 4)
setup_hostid = ints[4];
-#ifdef SUPPORT_TAGS
if (ints[0] >= 5)
setup_use_tagged_queuing = ints[5];
-#endif
+ /* ints[6] (use_pdma) is ignored */
+ if (ints[0] >= 7)
+ setup_toshiba_delay = ints[7];
return 1;
}
#endif /* !MODULE */
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
-static void __init atari_scsi_reset_boot(void)
-{
- unsigned long end;
-
- /*
- * Do a SCSI reset to clean up the bus during initialization. No messing
- * with the queues, interrupts, or locks necessary here.
- */
-
- printk("Atari SCSI: resetting the SCSI bus...");
-
- /* get in phase */
- NCR5380_write(TARGET_COMMAND_REG,
- PHASE_SR_TO_TCR(NCR5380_read(STATUS_REG)));
-
- /* assert RST */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST);
- /* The min. reset hold time is 25us, so 40us should be enough */
- udelay(50);
- /* reset RST and interrupt */
- NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
-
- end = jiffies + AFTER_RESET_DELAY;
- while (time_before(jiffies, end))
- barrier();
-
- printk(" done\n");
-}
-#endif
-
#if defined(REAL_DMA)
static unsigned long atari_scsi_dma_setup(struct Scsi_Host *instance,
static struct scsi_host_template atari_scsi_template = {
.module = THIS_MODULE,
.proc_name = DRV_MODULE_NAME,
- .show_info = atari_scsi_show_info,
.name = "Atari native SCSI",
.info = atari_scsi_info,
.queuecommand = atari_scsi_queue_command,
.eh_abort_handler = atari_scsi_abort,
.eh_bus_reset_handler = atari_scsi_bus_reset,
.this_id = 7,
- .use_clustering = DISABLE_CLUSTERING
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
};
static int __init atari_scsi_probe(struct platform_device *pdev)
} else {
/* Test if a host id is set in the NVRam */
if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) {
- unsigned char b = nvram_read_byte(14);
+ unsigned char b = nvram_read_byte(16);
/* Arbitration enabled? (for TOS)
* If yes, use configured host ID
error = -ENOMEM;
goto fail_alloc;
}
- atari_scsi_host = instance;
-
-#ifdef CONFIG_ATARI_SCSI_RESET_BOOT
- atari_scsi_reset_boot();
-#endif
instance->irq = irq->start;
host_flags |= IS_A_TT() ? 0 : FLAG_LATE_DMA_SETUP;
-
#ifdef SUPPORT_TAGS
host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
#endif
+ host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
- NCR5380_init(instance, host_flags);
+ error = NCR5380_init(instance, host_flags);
+ if (error)
+ goto fail_init;
if (IS_A_TT()) {
error = request_irq(instance->irq, scsi_tt_intr, 0,
#endif
}
+ NCR5380_maybe_reset_bus(instance);
+
error = scsi_add_host(instance, NULL);
if (error)
goto fail_host;
free_irq(instance->irq, instance);
fail_irq:
NCR5380_exit(instance);
+fail_init:
scsi_host_put(instance);
fail_alloc:
if (atari_dma_buffer)
depends on PCI && SCSI && NET
select SCSI_ISCSI_ATTRS
select ISCSI_BOOT_SYSFS
+ select IRQ_POLL
help
This driver implements the iSCSI functionality for Emulex
#include <linux/pci.h>
#include <linux/if_vlan.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
#define FW_VER_LEN 32
#define MCC_Q_LEN 128
#define MCC_CQ_LEN 256
struct beiscsi_hba *phba;
struct be_queue_info *cq;
struct work_struct work_cqs; /* Work Item */
- struct blk_iopoll iopoll;
+ struct irq_poll iopoll;
};
struct be_mcc_obj {
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
beiscsi_process_cq(pbe_eq);
- blk_iopoll_enable(&pbe_eq->iopoll);
+ irq_poll_enable(&pbe_eq->iopoll);
}
}
num_eq_processed = 0;
while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
& EQE_VALID_MASK) {
- if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
- blk_iopoll_sched(&pbe_eq->iopoll);
+ irq_poll_sched(&pbe_eq->iopoll);
AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
queue_tail_inc(eq);
spin_unlock_irqrestore(&phba->isr_lock, flags);
num_mcceq_processed++;
} else {
- if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
- blk_iopoll_sched(&pbe_eq->iopoll);
+ irq_poll_sched(&pbe_eq->iopoll);
num_ioeq_processed++;
}
AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
}
-static int be_iopoll(struct blk_iopoll *iop, int budget)
+static int be_iopoll(struct irq_poll *iop, int budget)
{
unsigned int ret;
struct beiscsi_hba *phba;
pbe_eq->cq_count += ret;
if (ret < budget) {
phba = pbe_eq->phba;
- blk_iopoll_complete(iop);
+ irq_poll_complete(iop);
beiscsi_log(phba, KERN_INFO,
BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
"BM_%d : rearm pbe_eq->q.id =%d\n",
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
}
if (unload_state == BEISCSI_CLEAN_UNLOAD) {
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+ irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
be_iopoll);
- blk_iopoll_enable(&pbe_eq->iopoll);
}
i = (phba->msix_enabled) ? i : 0;
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+ irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
be_iopoll);
- blk_iopoll_enable(&pbe_eq->iopoll);
}
i = (phba->msix_enabled) ? i : 0;
destroy_workqueue(phba->wq);
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
}
free_twq:
beiscsi_clean_port(phba);
static int cxgb3i_rx_credit_thres = 10 * 1024;
module_param(cxgb3i_rx_credit_thres, int, 0644);
-MODULE_PARM_DESC(rx_credit_thres,
+MODULE_PARM_DESC(cxgb3i_rx_credit_thres,
"RX credits return threshold in bytes (default=10KB)");
static unsigned int cxgb3i_max_connect = 8 * 1024;
#define DONT_USE_INTR
-#define NCR5380_read(reg) inb(port + reg)
-#define NCR5380_write(reg, value) outb(value, port + reg)
+#define NCR5380_read(reg) inb(instance->io_port + reg)
+#define NCR5380_write(reg, value) outb(value, instance->io_port + reg)
#define NCR5380_implementation_fields /* none */
-#define NCR5380_local_declare() unsigned int port
-#define NCR5380_setup(instance) port = instance->io_port
-
-/*
- * Includes needed for NCR5380.[ch] (XXX: Move them to NCR5380.h)
- */
-#include <linux/delay.h>
#include "NCR5380.h"
#include "NCR5380.c"
static struct scsi_host_template dmx3191d_driver_template = {
+ .module = THIS_MODULE,
.proc_name = DMX3191D_DRIVER_NAME,
.name = "Domex DMX3191D",
.info = NCR5380_info,
.sg_tablesize = SG_ALL,
.cmd_per_lun = 2,
.use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
static int dmx3191d_probe_one(struct pci_dev *pdev,
*/
shost->irq = NO_IRQ;
- NCR5380_init(shost, FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E);
+ error = NCR5380_init(shost, FLAG_NO_PSEUDO_DMA);
+ if (error)
+ goto out_host_put;
+
+ NCR5380_maybe_reset_bus(shost);
pci_set_drvdata(pdev, shost);
error = scsi_add_host(shost, &pdev->dev);
if (error)
- goto out_release_region;
+ goto out_exit;
scsi_scan_host(shost);
return 0;
+out_exit:
+ NCR5380_exit(shost);
+out_host_put:
+ scsi_host_put(shost);
out_release_region:
release_region(io, DMX3191D_REGION_LEN);
out_disable_device:
static void dmx3191d_remove_one(struct pci_dev *pdev)
{
struct Scsi_Host *shost = pci_get_drvdata(pdev);
+ unsigned long io = shost->io_port;
scsi_remove_host(shost);
NCR5380_exit(shost);
-
- release_region(shost->io_port, DMX3191D_REGION_LEN);
- pci_disable_device(pdev);
-
scsi_host_put(shost);
+ release_region(io, DMX3191D_REGION_LEN);
+ pci_disable_device(pdev);
}
static struct pci_device_id dmx3191d_pci_tbl[] = {
-
#define PSEUDO_DMA
#define DONT_USE_INTR
-#define UNSAFE /* Leave interrupts enabled during pseudo-dma I/O */
-#define DMA_WORKS_RIGHT
-
/*
* DTC 3180/3280 driver, by
#include <linux/module.h>
-#include <linux/signal.h>
#include <linux/blkdev.h>
-#include <linux/delay.h>
-#include <linux/stat.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <scsi/scsi_host.h>
+
#include "dtc.h"
#define AUTOPROBE_IRQ
#include "NCR5380.h"
static int __init dtc_setup(char *str)
{
- static int commandline_current = 0;
+ static int commandline_current;
int i;
int ints[10];
static int __init dtc_detect(struct scsi_host_template * tpnt)
{
- static int current_override = 0, current_base = 0;
+ static int current_override, current_base;
struct Scsi_Host *instance;
unsigned int addr;
void __iomem *base;
addr = 0;
} else
for (; !addr && (current_base < NO_BASES); ++current_base) {
-#if (DTCDEBUG & DTCDEBUG_INIT)
- printk(KERN_DEBUG "scsi-dtc : probing address %08x\n", bases[current_base].address);
-#endif
+ dprintk(NDEBUG_INIT, "dtc: probing address 0x%08x\n",
+ (unsigned int)bases[current_base].address);
if (bases[current_base].noauto)
continue;
base = ioremap(bases[current_base].address, 0x2000);
for (sig = 0; sig < NO_SIGNATURES; ++sig) {
if (check_signature(base + signatures[sig].offset, signatures[sig].string, strlen(signatures[sig].string))) {
addr = bases[current_base].address;
-#if (DTCDEBUG & DTCDEBUG_INIT)
- printk(KERN_DEBUG "scsi-dtc : detected board.\n");
-#endif
+ dprintk(NDEBUG_INIT, "dtc: detected board\n");
goto found;
}
}
iounmap(base);
}
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
- printk(KERN_DEBUG "scsi-dtc : base = %08x\n", addr);
-#endif
+ dprintk(NDEBUG_INIT, "dtc: addr = 0x%08x\n", addr);
if (!addr)
break;
found:
instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
if (instance == NULL)
- break;
+ goto out_unmap;
instance->base = addr;
((struct NCR5380_hostdata *)(instance)->hostdata)->base = base;
- NCR5380_init(instance, 0);
+ if (NCR5380_init(instance, FLAG_NO_DMA_FIXUP))
+ goto out_unregister;
+
+ NCR5380_maybe_reset_bus(instance);
NCR5380_write(DTC_CONTROL_REG, CSR_5380_INTR); /* Enable int's */
if (overrides[current_override].irq != IRQ_AUTO)
printk(KERN_WARNING "scsi%d : interrupts not used. Might as well not jumper it.\n", instance->host_no);
instance->irq = NO_IRQ;
#endif
-#if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
- printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+ dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+ instance->host_no, instance->irq);
++current_override;
++count;
}
return count;
+
+out_unregister:
+ scsi_unregister(instance);
+out_unmap:
+ iounmap(base);
+ return count;
}
/*
unsigned char *d = dst;
int i; /* For counting time spent in the poll-loop */
struct NCR5380_hostdata *hostdata = shost_priv(instance);
- NCR5380_local_declare();
- NCR5380_setup(instance);
i = 0;
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
if (instance->irq == NO_IRQ)
NCR5380_write(DTC_CONTROL_REG, CSR_DIR_READ);
else
while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
++i;
rtrc(3);
- memcpy_fromio(d, base + DTC_DATA_BUF, 128);
+ memcpy_fromio(d, hostdata->base + DTC_DATA_BUF, 128);
d += 128;
len -= 128;
rtrc(7);
rtrc(4);
while (!(NCR5380_read(DTC_CONTROL_REG) & D_CR_ACCESS))
++i;
- NCR5380_write(MODE_REG, 0); /* Clear the operating mode */
rtrc(0);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
if (i > hostdata->spin_max_r)
hostdata->spin_max_r = i;
return (0);
{
int i;
struct NCR5380_hostdata *hostdata = shost_priv(instance);
- NCR5380_local_declare();
- NCR5380_setup(instance);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
- NCR5380_write(MODE_REG, MR_ENABLE_EOP_INTR | MR_DMA_MODE);
- /* set direction (write) */
if (instance->irq == NO_IRQ)
NCR5380_write(DTC_CONTROL_REG, 0);
else
while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
++i;
rtrc(3);
- memcpy_toio(base + DTC_DATA_BUF, src, 128);
+ memcpy_toio(hostdata->base + DTC_DATA_BUF, src, 128);
src += 128;
len -= 128;
}
++i;
rtrc(7);
/* Check for parity error here. fixme. */
- NCR5380_write(MODE_REG, 0); /* Clear the operating mode */
rtrc(0);
if (i > hostdata->spin_max_w)
hostdata->spin_max_w = i;
return (0);
}
+static int dtc_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+ int transfersize = cmd->transfersize;
+
+ /* Limit transfers to 32K, for xx400 & xx406
+ * pseudoDMA that transfers in 128 bytes blocks.
+ */
+ if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+ !(cmd->SCp.this_residual % transfersize))
+ transfersize = 32 * 1024;
+
+ return transfersize;
+}
+
MODULE_LICENSE("GPL");
#include "NCR5380.c"
static int dtc_release(struct Scsi_Host *shost)
{
- NCR5380_local_declare();
- NCR5380_setup(shost);
+ struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
if (shost->irq != NO_IRQ)
free_irq(shost->irq, shost);
NCR5380_exit(shost);
- if (shost->io_port && shost->n_io_port)
- release_region(shost->io_port, shost->n_io_port);
scsi_unregister(shost);
- iounmap(base);
+ iounmap(hostdata->base);
return 0;
}
static struct scsi_host_template driver_template = {
- .name = "DTC 3180/3280 ",
- .detect = dtc_detect,
- .release = dtc_release,
- .proc_name = "dtc3x80",
- .show_info = dtc_show_info,
- .write_info = dtc_write_info,
- .info = dtc_info,
- .queuecommand = dtc_queue_command,
- .eh_abort_handler = dtc_abort,
- .eh_bus_reset_handler = dtc_bus_reset,
- .bios_param = dtc_biosparam,
- .can_queue = CAN_QUEUE,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING,
+ .name = "DTC 3180/3280",
+ .detect = dtc_detect,
+ .release = dtc_release,
+ .proc_name = "dtc3x80",
+ .show_info = dtc_show_info,
+ .write_info = dtc_write_info,
+ .info = dtc_info,
+ .queuecommand = dtc_queue_command,
+ .eh_abort_handler = dtc_abort,
+ .eh_bus_reset_handler = dtc_bus_reset,
+ .bios_param = dtc_biosparam,
+ .can_queue = 32,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
#include "scsi_module.c"
#ifndef DTC3280_H
#define DTC3280_H
-#define DTCDEBUG 0
-#define DTCDEBUG_INIT 0x1
-#define DTCDEBUG_TRANSFER 0x2
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
#define NCR5380_implementation_fields \
void __iomem *base
-#define NCR5380_local_declare() \
- void __iomem *base
-
-#define NCR5380_setup(instance) \
- base = ((struct NCR5380_hostdata *)(instance)->hostdata)->base
+#define DTC_address(reg) \
+ (((struct NCR5380_hostdata *)shost_priv(instance))->base + DTC_5380_OFFSET + reg)
-#define DTC_address(reg) (base + DTC_5380_OFFSET + reg)
-
-#define dbNCR5380_read(reg) \
- (rval=readb(DTC_address(reg)), \
- (((unsigned char) printk("DTC : read register %d at addr %p is: %02x\n"\
- , (reg), DTC_address(reg), rval)), rval ) )
-
-#define dbNCR5380_write(reg, value) do { \
- printk("DTC : write %02x to register %d at address %p\n", \
- (value), (reg), DTC_address(reg)); \
- writeb(value, DTC_address(reg));} while(0)
-
-
-#if !(DTCDEBUG & DTCDEBUG_TRANSFER)
#define NCR5380_read(reg) (readb(DTC_address(reg)))
#define NCR5380_write(reg, value) (writeb(value, DTC_address(reg)))
-#else
-#define NCR5380_read(reg) (readb(DTC_address(reg)))
-#define xNCR5380_read(reg) \
- (((unsigned char) printk("DTC : read register %d at address %p\n"\
- , (reg), DTC_address(reg))), readb(DTC_address(reg)))
-#define NCR5380_write(reg, value) do { \
- printk("DTC : write %02x to register %d at address %p\n", \
- (value), (reg), DTC_address(reg)); \
- writeb(value, DTC_address(reg));} while(0)
-#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+ dtc_dma_xfer_len(cmd)
#define NCR5380_intr dtc_intr
#define NCR5380_queue_command dtc_queue_command
*
*/
-/* settings for DTC3181E card with only Mustek scanner attached */
-#define USLEEP_POLL msecs_to_jiffies(10)
-#define USLEEP_SLEEP msecs_to_jiffies(200)
-#define USLEEP_WAITLONG msecs_to_jiffies(5000)
-
#define AUTOPROBE_IRQ
#ifdef CONFIG_SCSI_GENERIC_NCR53C400
-#define NCR53C400_PSEUDO_DMA 1
#define PSEUDO_DMA
-#define NCR53C400
#endif
#include <asm/io.h>
-#include <linux/signal.h>
#include <linux/blkdev.h>
+#include <linux/module.h>
#include <scsi/scsi_host.h>
#include "g_NCR5380.h"
#include "NCR5380.h"
-#include <linux/stat.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/isapnp.h>
-#include <linux/delay.h>
#include <linux/interrupt.h>
-#define NCR_NOT_SET 0
-static int ncr_irq = NCR_NOT_SET;
-static int ncr_dma = NCR_NOT_SET;
-static int ncr_addr = NCR_NOT_SET;
-static int ncr_5380 = NCR_NOT_SET;
-static int ncr_53c400 = NCR_NOT_SET;
-static int ncr_53c400a = NCR_NOT_SET;
-static int dtc_3181e = NCR_NOT_SET;
+static int ncr_irq;
+static int ncr_dma;
+static int ncr_addr;
+static int ncr_5380;
+static int ncr_53c400;
+static int ncr_53c400a;
+static int dtc_3181e;
+static int hp_c2502;
static struct override {
NCR5380_map_type NCR5380_map_name;
static void __init internal_setup(int board, char *str, int *ints)
{
- static int commandline_current = 0;
+ static int commandline_current;
switch (board) {
case BOARD_NCR5380:
if (ints[0] != 2 && ints[0] != 3) {
#endif
+#ifndef SCSI_G_NCR5380_MEM
+/*
+ * Configure I/O address of 53C400A or DTC436 by writing magic numbers
+ * to ports 0x779 and 0x379.
+ */
+static void magic_configure(int idx, u8 irq, u8 magic[])
+{
+ u8 cfg = 0;
+
+ outb(magic[0], 0x779);
+ outb(magic[1], 0x379);
+ outb(magic[2], 0x379);
+ outb(magic[3], 0x379);
+ outb(magic[4], 0x379);
+
+ /* allowed IRQs for HP C2502 */
+ if (irq != 2 && irq != 3 && irq != 4 && irq != 5 && irq != 7)
+ irq = 0;
+ if (idx >= 0 && idx <= 7)
+ cfg = 0x80 | idx | (irq << 4);
+ outb(cfg, 0x379);
+}
+#endif
+
/**
* generic_NCR5380_detect - look for NCR5380 controllers
* @tpnt: the scsi template
* and DTC436(ISAPnP) controllers. If overrides have been set we use
* them.
*
- * The caller supplied NCR5380_init function is invoked from here, before
- * the interrupt line is taken.
- *
* Locks: none
*/
static int __init generic_NCR5380_detect(struct scsi_host_template *tpnt)
{
- static int current_override = 0;
+ static int current_override;
int count;
unsigned int *ports;
+ u8 *magic = NULL;
#ifndef SCSI_G_NCR5380_MEM
int i;
+ int port_idx = -1;
unsigned long region_size = 16;
#endif
static unsigned int __initdata ncr_53c400a_ports[] = {
static unsigned int __initdata dtc_3181e_ports[] = {
0x220, 0x240, 0x280, 0x2a0, 0x2c0, 0x300, 0x320, 0x340, 0
};
- int flags = 0;
+ static u8 ncr_53c400a_magic[] __initdata = { /* 53C400A & DTC436 */
+ 0x59, 0xb9, 0xc5, 0xae, 0xa6
+ };
+ static u8 hp_c2502_magic[] __initdata = { /* HP C2502 */
+ 0x0f, 0x22, 0xf0, 0x20, 0x80
+ };
+ int flags;
struct Scsi_Host *instance;
+ struct NCR5380_hostdata *hostdata;
#ifdef SCSI_G_NCR5380_MEM
unsigned long base;
void __iomem *iomem;
#endif
- if (ncr_irq != NCR_NOT_SET)
+ if (ncr_irq)
overrides[0].irq = ncr_irq;
- if (ncr_dma != NCR_NOT_SET)
+ if (ncr_dma)
overrides[0].dma = ncr_dma;
- if (ncr_addr != NCR_NOT_SET)
+ if (ncr_addr)
overrides[0].NCR5380_map_name = (NCR5380_map_type) ncr_addr;
- if (ncr_5380 != NCR_NOT_SET)
+ if (ncr_5380)
overrides[0].board = BOARD_NCR5380;
- else if (ncr_53c400 != NCR_NOT_SET)
+ else if (ncr_53c400)
overrides[0].board = BOARD_NCR53C400;
- else if (ncr_53c400a != NCR_NOT_SET)
+ else if (ncr_53c400a)
overrides[0].board = BOARD_NCR53C400A;
- else if (dtc_3181e != NCR_NOT_SET)
+ else if (dtc_3181e)
overrides[0].board = BOARD_DTC3181E;
+ else if (hp_c2502)
+ overrides[0].board = BOARD_HP_C2502;
#ifndef SCSI_G_NCR5380_MEM
if (!current_override && isapnp_present()) {
struct pnp_dev *dev = NULL;
}
}
#endif
- tpnt->proc_name = "g_NCR5380";
for (count = 0; current_override < NO_OVERRIDES; ++current_override) {
if (!(overrides[current_override].NCR5380_map_name))
continue;
ports = NULL;
+ flags = 0;
switch (overrides[current_override].board) {
case BOARD_NCR5380:
flags = FLAG_NO_PSEUDO_DMA;
break;
case BOARD_NCR53C400:
- flags = FLAG_NCR53C400;
+#ifdef PSEUDO_DMA
+ flags = FLAG_NO_DMA_FIXUP;
+#endif
break;
case BOARD_NCR53C400A:
- flags = FLAG_NO_PSEUDO_DMA;
+ flags = FLAG_NO_DMA_FIXUP;
+ ports = ncr_53c400a_ports;
+ magic = ncr_53c400a_magic;
+ break;
+ case BOARD_HP_C2502:
+ flags = FLAG_NO_DMA_FIXUP;
ports = ncr_53c400a_ports;
+ magic = hp_c2502_magic;
break;
case BOARD_DTC3181E:
- flags = FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E;
+ flags = FLAG_NO_DMA_FIXUP;
ports = dtc_3181e_ports;
+ magic = ncr_53c400a_magic;
break;
}
#ifndef SCSI_G_NCR5380_MEM
- if (ports) {
+ if (ports && magic) {
/* wakeup sequence for the NCR53C400A and DTC3181E */
/* Disable the adapter and look for a free io port */
- outb(0x59, 0x779);
- outb(0xb9, 0x379);
- outb(0xc5, 0x379);
- outb(0xae, 0x379);
- outb(0xa6, 0x379);
- outb(0x00, 0x379);
+ magic_configure(-1, 0, magic);
if (overrides[current_override].NCR5380_map_name != PORT_AUTO)
for (i = 0; ports[i]; i++) {
}
if (ports[i]) {
/* At this point we have our region reserved */
- outb(0x59, 0x779);
- outb(0xb9, 0x379);
- outb(0xc5, 0x379);
- outb(0xae, 0x379);
- outb(0xa6, 0x379);
- outb(0x80 | i, 0x379); /* set io port to be used */
+ magic_configure(i, 0, magic); /* no IRQ yet */
outb(0xc0, ports[i] + 9);
if (inb(ports[i] + 9) != 0x80)
continue;
- else
- overrides[current_override].NCR5380_map_name = ports[i];
+ overrides[current_override].NCR5380_map_name = ports[i];
+ port_idx = i;
} else
continue;
}
}
#endif
instance = scsi_register(tpnt, sizeof(struct NCR5380_hostdata));
- if (instance == NULL) {
-#ifndef SCSI_G_NCR5380_MEM
- release_region(overrides[current_override].NCR5380_map_name, region_size);
-#else
- iounmap(iomem);
- release_mem_region(base, NCR5380_region_size);
-#endif
- continue;
- }
+ if (instance == NULL)
+ goto out_release;
+ hostdata = shost_priv(instance);
- instance->NCR5380_instance_name = overrides[current_override].NCR5380_map_name;
#ifndef SCSI_G_NCR5380_MEM
+ instance->io_port = overrides[current_override].NCR5380_map_name;
instance->n_io_port = region_size;
+ hostdata->io_width = 1; /* 8-bit PDMA by default */
+
+ /*
+ * On NCR53C400 boards, NCR5380 registers are mapped 8 past
+ * the base address.
+ */
+ switch (overrides[current_override].board) {
+ case BOARD_NCR53C400:
+ instance->io_port += 8;
+ hostdata->c400_ctl_status = 0;
+ hostdata->c400_blk_cnt = 1;
+ hostdata->c400_host_buf = 4;
+ break;
+ case BOARD_DTC3181E:
+ hostdata->io_width = 2; /* 16-bit PDMA */
+ /* fall through */
+ case BOARD_NCR53C400A:
+ case BOARD_HP_C2502:
+ hostdata->c400_ctl_status = 9;
+ hostdata->c400_blk_cnt = 10;
+ hostdata->c400_host_buf = 8;
+ break;
+ }
#else
- ((struct NCR5380_hostdata *)instance->hostdata)->iomem = iomem;
+ instance->base = overrides[current_override].NCR5380_map_name;
+ hostdata->iomem = iomem;
+ switch (overrides[current_override].board) {
+ case BOARD_NCR53C400:
+ hostdata->c400_ctl_status = 0x100;
+ hostdata->c400_blk_cnt = 0x101;
+ hostdata->c400_host_buf = 0x104;
+ break;
+ case BOARD_DTC3181E:
+ case BOARD_NCR53C400A:
+ case BOARD_HP_C2502:
+ pr_err(DRV_MODULE_NAME ": unknown register offsets\n");
+ goto out_unregister;
+ }
#endif
- NCR5380_init(instance, flags);
+ if (NCR5380_init(instance, flags))
+ goto out_unregister;
+
+ switch (overrides[current_override].board) {
+ case BOARD_NCR53C400:
+ case BOARD_DTC3181E:
+ case BOARD_NCR53C400A:
+ case BOARD_HP_C2502:
+ NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+ }
+
+ NCR5380_maybe_reset_bus(instance);
if (overrides[current_override].irq != IRQ_AUTO)
instance->irq = overrides[current_override].irq;
if (instance->irq == 255)
instance->irq = NO_IRQ;
- if (instance->irq != NO_IRQ)
+ if (instance->irq != NO_IRQ) {
+#ifndef SCSI_G_NCR5380_MEM
+ /* set IRQ for HP C2502 */
+ if (overrides[current_override].board == BOARD_HP_C2502)
+ magic_configure(port_idx, instance->irq, magic);
+#endif
if (request_irq(instance->irq, generic_NCR5380_intr,
0, "NCR5380", instance)) {
printk(KERN_WARNING "scsi%d : IRQ%d not free, interrupts disabled\n", instance->host_no, instance->irq);
instance->irq = NO_IRQ;
}
+ }
if (instance->irq == NO_IRQ) {
printk(KERN_INFO "scsi%d : interrupts not enabled. for better interactive performance,\n", instance->host_no);
++count;
}
return count;
+
+out_unregister:
+ scsi_unregister(instance);
+out_release:
+#ifndef SCSI_G_NCR5380_MEM
+ release_region(overrides[current_override].NCR5380_map_name, region_size);
+#else
+ iounmap(iomem);
+ release_mem_region(base, NCR5380_region_size);
+#endif
+ return count;
}
/**
static int generic_NCR5380_release_resources(struct Scsi_Host *instance)
{
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
if (instance->irq != NO_IRQ)
free_irq(instance->irq, instance);
NCR5380_exit(instance);
-
#ifndef SCSI_G_NCR5380_MEM
- release_region(instance->NCR5380_instance_name, instance->n_io_port);
+ release_region(instance->io_port, instance->n_io_port);
#else
iounmap(((struct NCR5380_hostdata *)instance->hostdata)->iomem);
- release_mem_region(instance->NCR5380_instance_name, NCR5380_region_size);
+ release_mem_region(instance->base, NCR5380_region_size);
#endif
-
-
return 0;
}
}
#endif
-#ifdef NCR53C400_PSEUDO_DMA
+#ifdef PSEUDO_DMA
/**
* NCR5380_pread - pseudo DMA read
static inline int NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int blocks = len / 128;
int start = 0;
- int bl;
-
- NCR5380_local_declare();
- NCR5380_setup(instance);
- NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE | CSR_TRANS_DIR);
- NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+ NCR5380_write(hostdata->c400_ctl_status, CSR_BASE | CSR_TRANS_DIR);
+ NCR5380_write(hostdata->c400_blk_cnt, blocks);
while (1) {
- if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+ if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
- }
- if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+ if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
return -1;
}
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY);
+ while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+ ; /* FIXME - no timeout */
#ifndef SCSI_G_NCR5380_MEM
- {
- int i;
- for (i = 0; i < 128; i++)
- dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
- }
+ if (hostdata->io_width == 2)
+ insw(instance->io_port + hostdata->c400_host_buf,
+ dst + start, 64);
+ else
+ insb(instance->io_port + hostdata->c400_host_buf,
+ dst + start, 128);
#else
/* implies SCSI_G_NCR5380_MEM */
- memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+ memcpy_fromio(dst + start,
+ hostdata->iomem + NCR53C400_host_buffer, 128);
#endif
start += 128;
blocks--;
}
if (blocks) {
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
- {
- // FIXME - no timeout
- }
+ while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
+ ; /* FIXME - no timeout */
#ifndef SCSI_G_NCR5380_MEM
- {
- int i;
- for (i = 0; i < 128; i++)
- dst[start + i] = NCR5380_read(C400_HOST_BUFFER);
- }
+ if (hostdata->io_width == 2)
+ insw(instance->io_port + hostdata->c400_host_buf,
+ dst + start, 64);
+ else
+ insb(instance->io_port + hostdata->c400_host_buf,
+ dst + start, 128);
#else
/* implies SCSI_G_NCR5380_MEM */
- memcpy_fromio(dst + start, iomem + NCR53C400_host_buffer, 128);
+ memcpy_fromio(dst + start,
+ hostdata->iomem + NCR53C400_host_buffer, 128);
#endif
start += 128;
blocks--;
}
- if (!(NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
+ if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
printk("53C400r: no 53C80 gated irq after transfer");
-#if 0
- /*
- * DON'T DO THIS - THEY NEVER ARRIVE!
- */
- printk("53C400r: Waiting for 53C80 registers\n");
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG)
+ /* wait for 53C80 registers to be available */
+ while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
;
-#endif
+
if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
printk(KERN_ERR "53C400r: no end dma signal\n");
- NCR5380_write(MODE_REG, MR_BASE);
- NCR5380_read(RESET_PARITY_INTERRUPT_REG);
return 0;
}
static inline int NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
int blocks = len / 128;
int start = 0;
- int bl;
- int i;
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
- NCR5380_write(C400_CONTROL_STATUS_REG, CSR_BASE);
- NCR5380_write(C400_BLOCK_COUNTER_REG, blocks);
+ NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+ NCR5380_write(hostdata->c400_blk_cnt, blocks);
while (1) {
- if (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ) {
+ if (NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ) {
printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, blocks=%d\n", start, blocks);
return -1;
}
- if ((bl = NCR5380_read(C400_BLOCK_COUNTER_REG)) == 0) {
+ if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
- }
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+ while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
; // FIXME - timeout
#ifndef SCSI_G_NCR5380_MEM
- {
- for (i = 0; i < 128; i++)
- NCR5380_write(C400_HOST_BUFFER, src[start + i]);
- }
+ if (hostdata->io_width == 2)
+ outsw(instance->io_port + hostdata->c400_host_buf,
+ src + start, 64);
+ else
+ outsb(instance->io_port + hostdata->c400_host_buf,
+ src + start, 128);
#else
/* implies SCSI_G_NCR5380_MEM */
- memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+ memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+ src + start, 128);
#endif
start += 128;
blocks--;
}
if (blocks) {
- while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_HOST_BUF_NOT_RDY)
+ while (NCR5380_read(hostdata->c400_ctl_status) & CSR_HOST_BUF_NOT_RDY)
; // FIXME - no timeout
#ifndef SCSI_G_NCR5380_MEM
- {
- for (i = 0; i < 128; i++)
- NCR5380_write(C400_HOST_BUFFER, src[start + i]);
- }
+ if (hostdata->io_width == 2)
+ outsw(instance->io_port + hostdata->c400_host_buf,
+ src + start, 64);
+ else
+ outsb(instance->io_port + hostdata->c400_host_buf,
+ src + start, 128);
#else
/* implies SCSI_G_NCR5380_MEM */
- memcpy_toio(iomem + NCR53C400_host_buffer, src + start, 128);
+ memcpy_toio(hostdata->iomem + NCR53C400_host_buffer,
+ src + start, 128);
#endif
start += 128;
blocks--;
}
-#if 0
- printk("53C400w: waiting for registers to be available\n");
- THEY NEVER DO ! while (NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_53C80_REG);
- printk("53C400w: Got em\n");
-#endif
-
- /* Let's wait for this instead - could be ugly */
- /* All documentation says to check for this. Maybe my hardware is too
- * fast. Waiting for it seems to work fine! KLL
- */
- while (!(i = NCR5380_read(C400_CONTROL_STATUS_REG) & CSR_GATED_53C80_IRQ))
- ; // FIXME - no timeout
-
- /*
- * I know. i is certainly != 0 here but the loop is new. See previous
- * comment.
- */
- if (i) {
- if (!((i = NCR5380_read(BUS_AND_STATUS_REG)) & BASR_END_DMA_TRANSFER))
- printk(KERN_ERR "53C400w: No END OF DMA bit - WHOOPS! BASR=%0x\n", i);
- } else
- printk(KERN_ERR "53C400w: no 53C80 gated irq after transfer (last block)\n");
+ /* wait for 53C80 registers to be available */
+ while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
+ udelay(4); /* DTC436 chip hangs without this */
+ /* FIXME - no timeout */
+ }
-#if 0
if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
printk(KERN_ERR "53C400w: no end dma signal\n");
}
-#endif
+
while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
; // TIMEOUT
return 0;
}
-#endif /* PSEUDO_DMA */
+
+static int generic_NCR5380_dma_xfer_len(struct scsi_cmnd *cmd)
+{
+ int transfersize = cmd->transfersize;
+
+ /* Limit transfers to 32K, for xx400 & xx406
+ * pseudoDMA that transfers in 128 bytes blocks.
+ */
+ if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
+ !(cmd->SCp.this_residual % transfersize))
+ transfersize = 32 * 1024;
+
+ /* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
+ if (transfersize % 128)
+ transfersize = 0;
+
+ return transfersize;
+}
+
+#endif /* PSEUDO_DMA */
/*
* Include the NCR5380 core code that we build our driver around
#include "NCR5380.c"
static struct scsi_host_template driver_template = {
- .show_info = generic_NCR5380_show_info,
- .name = "Generic NCR5380/NCR53C400 SCSI",
- .detect = generic_NCR5380_detect,
- .release = generic_NCR5380_release_resources,
- .info = generic_NCR5380_info,
- .queuecommand = generic_NCR5380_queue_command,
+ .proc_name = DRV_MODULE_NAME,
+ .name = "Generic NCR5380/NCR53C400 SCSI",
+ .detect = generic_NCR5380_detect,
+ .release = generic_NCR5380_release_resources,
+ .info = generic_NCR5380_info,
+ .queuecommand = generic_NCR5380_queue_command,
.eh_abort_handler = generic_NCR5380_abort,
.eh_bus_reset_handler = generic_NCR5380_bus_reset,
- .bios_param = NCR5380_BIOSPARAM,
- .can_queue = CAN_QUEUE,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING,
+ .bios_param = NCR5380_BIOSPARAM,
+ .can_queue = 16,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
-#include <linux/module.h>
+
#include "scsi_module.c"
module_param(ncr_irq, int, 0);
module_param(ncr_53c400, int, 0);
module_param(ncr_53c400a, int, 0);
module_param(dtc_3181e, int, 0);
+module_param(hp_c2502, int, 0);
MODULE_LICENSE("GPL");
#if !defined(SCSI_G_NCR5380_MEM) && defined(MODULE)
#ifndef GENERIC_NCR5380_H
#define GENERIC_NCR5380_H
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
#define BIOSPARAM
#define NCR5380_BIOSPARAM generic_NCR5380_biosparam
#else
#define NCR5380_BIOSPARAM NULL
#endif
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 16
-#endif
-
#define __STRVAL(x) #x
#define STRVAL(x) __STRVAL(x)
#ifndef SCSI_G_NCR5380_MEM
+#define DRV_MODULE_NAME "g_NCR5380"
-#define NCR5380_map_config port
#define NCR5380_map_type int
#define NCR5380_map_name port
-#define NCR5380_instance_name io_port
-#define NCR53C400_register_offset 0
-#define NCR53C400_address_adjust 8
-#ifdef NCR53C400
+#ifdef CONFIG_SCSI_GENERIC_NCR53C400
#define NCR5380_region_size 16
#else
#define NCR5380_region_size 8
#endif
-#define NCR5380_read(reg) (inb(NCR5380_map_name + (reg)))
-#define NCR5380_write(reg, value) (outb((value), (NCR5380_map_name + (reg))))
+#define NCR5380_read(reg) \
+ inb(instance->io_port + (reg))
+#define NCR5380_write(reg, value) \
+ outb(value, instance->io_port + (reg))
#define NCR5380_implementation_fields \
- NCR5380_map_type NCR5380_map_name
-
-#define NCR5380_local_declare() \
- register NCR5380_implementation_fields
-
-#define NCR5380_setup(instance) \
- NCR5380_map_name = (NCR5380_map_type)((instance)->NCR5380_instance_name)
+ int c400_ctl_status; \
+ int c400_blk_cnt; \
+ int c400_host_buf; \
+ int io_width;
#else
/* therefore SCSI_G_NCR5380_MEM */
+#define DRV_MODULE_NAME "g_NCR5380_mmio"
-#define NCR5380_map_config memory
#define NCR5380_map_type unsigned long
#define NCR5380_map_name base
-#define NCR5380_instance_name base
-#define NCR53C400_register_offset 0x108
-#define NCR53C400_address_adjust 0
#define NCR53C400_mem_base 0x3880
#define NCR53C400_host_buffer 0x3900
#define NCR5380_region_size 0x3a00
-#define NCR5380_read(reg) readb(iomem + NCR53C400_mem_base + (reg))
-#define NCR5380_write(reg, value) writeb(value, iomem + NCR53C400_mem_base + (reg))
+#define NCR5380_read(reg) \
+ readb(((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+ NCR53C400_mem_base + (reg))
+#define NCR5380_write(reg, value) \
+ writeb(value, ((struct NCR5380_hostdata *)shost_priv(instance))->iomem + \
+ NCR53C400_mem_base + (reg))
#define NCR5380_implementation_fields \
- NCR5380_map_type NCR5380_map_name; \
- void __iomem *iomem;
-
-#define NCR5380_local_declare() \
- register void __iomem *iomem
-
-#define NCR5380_setup(instance) \
- iomem = (((struct NCR5380_hostdata *)(instance)->hostdata)->iomem)
+ void __iomem *iomem; \
+ int c400_ctl_status; \
+ int c400_blk_cnt; \
+ int c400_host_buf;
#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) \
+ generic_NCR5380_dma_xfer_len(cmd)
+
#define NCR5380_intr generic_NCR5380_intr
#define NCR5380_queue_command generic_NCR5380_queue_command
#define NCR5380_abort generic_NCR5380_abort
#define BOARD_NCR53C400 1
#define BOARD_NCR53C400A 2
#define BOARD_DTC3181E 3
+#define BOARD_HP_C2502 4
-#endif /* ndef ASM */
#endif /* GENERIC_NCR5380_H */
/* ITCT header */
/* qw0 */
#define ITCT_HDR_DEV_TYPE_OFF 0
-#define ITCT_HDR_DEV_TYPE_MSK (0x3 << ITCT_HDR_DEV_TYPE_OFF)
+#define ITCT_HDR_DEV_TYPE_MSK (0x3ULL << ITCT_HDR_DEV_TYPE_OFF)
#define ITCT_HDR_VALID_OFF 2
-#define ITCT_HDR_VALID_MSK (0x1 << ITCT_HDR_VALID_OFF)
-#define ITCT_HDR_BREAK_REPLY_ENA_OFF 3
-#define ITCT_HDR_BREAK_REPLY_ENA_MSK (0x1 << ITCT_HDR_BREAK_REPLY_ENA_OFF)
+#define ITCT_HDR_VALID_MSK (0x1ULL << ITCT_HDR_VALID_OFF)
#define ITCT_HDR_AWT_CONTROL_OFF 4
-#define ITCT_HDR_AWT_CONTROL_MSK (0x1 << ITCT_HDR_AWT_CONTROL_OFF)
+#define ITCT_HDR_AWT_CONTROL_MSK (0x1ULL << ITCT_HDR_AWT_CONTROL_OFF)
#define ITCT_HDR_MAX_CONN_RATE_OFF 5
-#define ITCT_HDR_MAX_CONN_RATE_MSK (0xf << ITCT_HDR_MAX_CONN_RATE_OFF)
+#define ITCT_HDR_MAX_CONN_RATE_MSK (0xfULL << ITCT_HDR_MAX_CONN_RATE_OFF)
#define ITCT_HDR_VALID_LINK_NUM_OFF 9
-#define ITCT_HDR_VALID_LINK_NUM_MSK (0xf << ITCT_HDR_VALID_LINK_NUM_OFF)
+#define ITCT_HDR_VALID_LINK_NUM_MSK (0xfULL << ITCT_HDR_VALID_LINK_NUM_OFF)
#define ITCT_HDR_PORT_ID_OFF 13
-#define ITCT_HDR_PORT_ID_MSK (0x7 << ITCT_HDR_PORT_ID_OFF)
+#define ITCT_HDR_PORT_ID_MSK (0x7ULL << ITCT_HDR_PORT_ID_OFF)
#define ITCT_HDR_SMP_TIMEOUT_OFF 16
-#define ITCT_HDR_SMP_TIMEOUT_MSK (0xffff << ITCT_HDR_SMP_TIMEOUT_OFF)
-#define ITCT_HDR_MAX_BURST_BYTES_OFF 16
-#define ITCT_HDR_MAX_BURST_BYTES_MSK (0xffffffff << \
- ITCT_MAX_BURST_BYTES_OFF)
+#define ITCT_HDR_SMP_TIMEOUT_MSK (0xffffULL << ITCT_HDR_SMP_TIMEOUT_OFF)
/* qw1 */
#define ITCT_HDR_MAX_SAS_ADDR_OFF 0
#define ITCT_HDR_MAX_SAS_ADDR_MSK (0xffffffffffffffff << \
ITCT_HDR_MAX_SAS_ADDR_OFF)
/* qw2 */
#define ITCT_HDR_IT_NEXUS_LOSS_TL_OFF 0
-#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK (0xffff << \
+#define ITCT_HDR_IT_NEXUS_LOSS_TL_MSK (0xffffULL << \
ITCT_HDR_IT_NEXUS_LOSS_TL_OFF)
#define ITCT_HDR_BUS_INACTIVE_TL_OFF 16
-#define ITCT_HDR_BUS_INACTIVE_TL_MSK (0xffff << \
+#define ITCT_HDR_BUS_INACTIVE_TL_MSK (0xffffULL << \
ITCT_HDR_BUS_INACTIVE_TL_OFF)
#define ITCT_HDR_MAX_CONN_TL_OFF 32
-#define ITCT_HDR_MAX_CONN_TL_MSK (0xffff << \
+#define ITCT_HDR_MAX_CONN_TL_MSK (0xffffULL << \
ITCT_HDR_MAX_CONN_TL_OFF)
#define ITCT_HDR_REJ_OPEN_TL_OFF 48
-#define ITCT_HDR_REJ_OPEN_TL_MSK (0xffff << \
- ITCT_REJ_OPEN_TL_OFF)
+#define ITCT_HDR_REJ_OPEN_TL_MSK (0xffffULL << \
+ ITCT_HDR_REJ_OPEN_TL_OFF)
/* Err record header */
#define ERR_HDR_DMA_TX_ERR_TYPE_OFF 0
itct->sas_addr = __swab64(itct->sas_addr);
/* qw2 */
- itct->qw2 = cpu_to_le64((500 < ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
- (0xff00 < ITCT_HDR_BUS_INACTIVE_TL_OFF) |
- (0xff00 < ITCT_HDR_MAX_CONN_TL_OFF) |
- (0xff00 < ITCT_HDR_REJ_OPEN_TL_OFF));
+ itct->qw2 = cpu_to_le64((500ULL << ITCT_HDR_IT_NEXUS_LOSS_TL_OFF) |
+ (0xff00ULL << ITCT_HDR_BUS_INACTIVE_TL_OFF) |
+ (0xff00ULL << ITCT_HDR_MAX_CONN_TL_OFF) |
+ (0xff00ULL << ITCT_HDR_REJ_OPEN_TL_OFF));
}
static void free_device_v1_hw(struct hisi_hba *hisi_hba,
{
u64 dev_id = sas_dev->device_id;
struct hisi_sas_itct *itct = &hisi_hba->itct[dev_id];
- u32 qw0, reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
+ u64 qw0;
+ u32 reg_val = hisi_sas_read32(hisi_hba, CFG_AGING_TIME);
reg_val |= CFG_AGING_TIME_ITCT_REL_MSK;
hisi_sas_write32(hisi_hba, CFG_AGING_TIME, reg_val);
unsigned dp:1; /* Data phase present */
unsigned rd:1; /* Read data in data phase */
unsigned wanted:1; /* Parport sharing busy flag */
+ unsigned int dev_no; /* Device number */
wait_queue_head_t *waiting;
struct Scsi_Host *host;
struct list_head list;
static LIST_HEAD(imm_hosts);
+/*
+ * Finds the first available device number that can be alloted to the
+ * new imm device and returns the address of the previous node so that
+ * we can add to the tail and have a list in the ascending order.
+ */
+
+static inline imm_struct *find_parent(void)
+{
+ imm_struct *dev, *par = NULL;
+ unsigned int cnt = 0;
+
+ if (list_empty(&imm_hosts))
+ return NULL;
+
+ list_for_each_entry(dev, &imm_hosts, list) {
+ if (dev->dev_no != cnt)
+ return par;
+ cnt++;
+ par = dev;
+ }
+
+ return par;
+}
+
static int __imm_attach(struct parport *pb)
{
struct Scsi_Host *host;
- imm_struct *dev;
+ imm_struct *dev, *temp;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waiting);
DEFINE_WAIT(wait);
int ports;
int modes, ppb;
int err = -ENOMEM;
+ struct pardev_cb imm_cb;
init_waitqueue_head(&waiting);
dev->mode = IMM_AUTODETECT;
INIT_LIST_HEAD(&dev->list);
- dev->dev = parport_register_device(pb, "imm", NULL, imm_wakeup,
- NULL, 0, dev);
+ temp = find_parent();
+ if (temp)
+ dev->dev_no = temp->dev_no + 1;
+
+ memset(&imm_cb, 0, sizeof(imm_cb));
+ imm_cb.private = dev;
+ imm_cb.wakeup = imm_wakeup;
+ dev->dev = parport_register_dev_model(pb, "imm", &imm_cb, dev->dev_no);
if (!dev->dev)
goto out;
host->unique_id = pb->number;
*(imm_struct **)&host->hostdata = dev;
dev->host = host;
- list_add_tail(&dev->list, &imm_hosts);
+ if (!temp)
+ list_add_tail(&dev->list, &imm_hosts);
+ else
+ list_add_tail(&dev->list, &temp->list);
err = scsi_add_host(host, NULL);
if (err)
goto out2;
}
static struct parport_driver imm_driver = {
- .name = "imm",
- .attach = imm_attach,
- .detach = imm_detach,
+ .name = "imm",
+ .match_port = imm_attach,
+ .detach = imm_detach,
+ .devmodel = true,
};
static int __init imm_driver_init(void)
.store = ipr_store_reset_adapter
};
-static int ipr_iopoll(struct blk_iopoll *iop, int budget);
+static int ipr_iopoll(struct irq_poll *iop, int budget);
/**
* ipr_show_iopoll_weight - Show ipr polling mode
* @dev: class device struct
int i;
if (!ioa_cfg->sis64) {
- dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n");
+ dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n");
return -EINVAL;
}
if (kstrtoul(buf, 10, &user_iopoll_weight))
return -EINVAL;
if (user_iopoll_weight > 256) {
- dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n");
+ dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n");
return -EINVAL;
}
if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
- dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n");
+ dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n");
return strlen(buf);
}
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++)
- blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+ irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
}
spin_lock_irqsave(shost->host_lock, lock_flags);
ioa_cfg->iopoll_weight = user_iopoll_weight;
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++) {
- blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+ irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
ioa_cfg->iopoll_weight, ipr_iopoll);
- blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
}
}
spin_unlock_irqrestore(shost->host_lock, lock_flags);
struct ipr_sglist *sglist;
char fname[100];
char *src;
- int len, result, dnld_size;
+ int result, dnld_size;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- len = snprintf(fname, 99, "%s", buf);
- fname[len-1] = '\0';
+ snprintf(fname, sizeof(fname), "%s", buf);
if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
return num_hrrq;
}
-static int ipr_iopoll(struct blk_iopoll *iop, int budget)
+static int ipr_iopoll(struct irq_poll *iop, int budget)
{
struct ipr_ioa_cfg *ioa_cfg;
struct ipr_hrr_queue *hrrq;
completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
if (completed_ops < budget)
- blk_iopoll_complete(iop);
+ irq_poll_complete(iop);
spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
hrrq->toggle_bit) {
- if (!blk_iopoll_sched_prep(&hrrq->iopoll))
- blk_iopoll_sched(&hrrq->iopoll);
+ irq_poll_sched(&hrrq->iopoll);
spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
return IRQ_HANDLED;
}
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++) {
- blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+ irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
ioa_cfg->iopoll_weight, ipr_iopoll);
- blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
}
}
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
ioa_cfg->iopoll_weight = 0;
for (i = 1; i < ioa_cfg->hrrq_num; i++)
- blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+ irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
}
while (ioa_cfg->in_reset_reload) {
#include <linux/libata.h>
#include <linux/list.h>
#include <linux/kref.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
u8 allow_cmds:1;
u8 removing_ioa:1;
- struct blk_iopoll iopoll;
+ struct irq_poll iopoll;
};
/* Command packet structure */
*/
#include <linux/types.h>
-#include <linux/delay.h>
#include <linux/module.h>
#include <linux/ioport.h>
#include <linux/init.h>
#define PSEUDO_DMA
#define NCR5380_implementation_fields unsigned char *pdma_base
-#define NCR5380_local_declare() struct Scsi_Host *_instance
-#define NCR5380_setup(instance) _instance = instance
-#define NCR5380_read(reg) macscsi_read(_instance, reg)
-#define NCR5380_write(reg, value) macscsi_write(_instance, reg, value)
+#define NCR5380_read(reg) macscsi_read(instance, reg)
+#define NCR5380_write(reg, value) macscsi_write(instance, reg, value)
#define NCR5380_pread macscsi_pread
#define NCR5380_pwrite macscsi_pwrite
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
#define NCR5380_intr macscsi_intr
#define NCR5380_queue_command macscsi_queue_command
#include "NCR5380.h"
-#define RESET_BOOT
-
static int setup_can_queue = -1;
module_param(setup_can_queue, int, 0);
static int setup_cmd_per_lun = -1;
module_param(setup_use_tagged_queuing, int, 0);
static int setup_hostid = -1;
module_param(setup_hostid, int, 0);
-
-/* Time (in jiffies) to wait after a reset; the SCSI standard calls for 250ms,
- * we usually do 0.5s to be on the safe side. But Toshiba CD-ROMs once more
- * need ten times the standard value... */
-#define TOSHIBA_DELAY
-
-#ifdef TOSHIBA_DELAY
-#define AFTER_RESET_DELAY (5*HZ/2)
-#else
-#define AFTER_RESET_DELAY (HZ/2)
-#endif
+static int setup_toshiba_delay = -1;
+module_param(setup_toshiba_delay, int, 0);
/*
* NCR 5380 register access functions
#ifndef MODULE
static int __init mac_scsi_setup(char *str)
{
- int ints[7];
+ int ints[8];
(void)get_options(str, ARRAY_SIZE(ints), ints);
- if (ints[0] < 1 || ints[0] > 6) {
- pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>]]]]]\n");
+ if (ints[0] < 1) {
+ pr_err("Usage: mac5380=<can_queue>[,<cmd_per_lun>[,<sg_tablesize>[,<hostid>[,<use_tags>[,<use_pdma>[,<toshiba_delay>]]]]]]\n");
return 0;
}
if (ints[0] >= 1)
setup_use_tagged_queuing = ints[5];
if (ints[0] >= 6)
setup_use_pdma = ints[6];
+ if (ints[0] >= 7)
+ setup_toshiba_delay = ints[7];
return 1;
}
__setup("mac5380=", mac_scsi_setup);
#endif /* !MODULE */
-#ifdef RESET_BOOT
-/*
- * Our 'bus reset on boot' function
- */
-
-static void mac_scsi_reset_boot(struct Scsi_Host *instance)
-{
- unsigned long end;
-
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
- /*
- * Do a SCSI reset to clean up the bus during initialization. No messing
- * with the queues, interrupts, or locks necessary here.
- */
-
- printk(KERN_INFO "Macintosh SCSI: resetting the SCSI bus..." );
-
- /* get in phase */
- NCR5380_write( TARGET_COMMAND_REG,
- PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
- /* assert RST */
- NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
- /* The min. reset hold time is 25us, so 40us should be enough */
- udelay( 50 );
- /* reset RST and interrupt */
- NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
- NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
- for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
- barrier();
-
- printk(KERN_INFO " done\n" );
-}
-#endif
-
#ifdef PSEUDO_DMA
/*
Pseudo-DMA: (Ove Edlund)
unsigned char *d;
unsigned char *s;
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
s = hostdata->pdma_base + (INPUT_DATA_REG << 4);
d = dst;
unsigned char *s;
unsigned char *d;
- NCR5380_local_declare();
- NCR5380_setup(instance);
-
s = src;
d = hostdata->pdma_base + (OUTPUT_DATA_REG << 4);
#define PFX DRV_MODULE_NAME ": "
static struct scsi_host_template mac_scsi_template = {
- .module = THIS_MODULE,
- .proc_name = DRV_MODULE_NAME,
- .show_info = macscsi_show_info,
- .write_info = macscsi_write_info,
- .name = "Macintosh NCR5380 SCSI",
- .info = macscsi_info,
- .queuecommand = macscsi_queue_command,
- .eh_abort_handler = macscsi_abort,
- .eh_bus_reset_handler = macscsi_bus_reset,
- .can_queue = 16,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING
+ .module = THIS_MODULE,
+ .proc_name = DRV_MODULE_NAME,
+ .show_info = macscsi_show_info,
+ .write_info = macscsi_write_info,
+ .name = "Macintosh NCR5380 SCSI",
+ .info = macscsi_info,
+ .queuecommand = macscsi_queue_command,
+ .eh_abort_handler = macscsi_abort,
+ .eh_bus_reset_handler = macscsi_bus_reset,
+ .can_queue = 16,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
static int __init mac_scsi_probe(struct platform_device *pdev)
} else
host_flags |= FLAG_NO_PSEUDO_DMA;
-#ifdef RESET_BOOT
- mac_scsi_reset_boot(instance);
-#endif
-
#ifdef SUPPORT_TAGS
host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
#endif
+ host_flags |= setup_toshiba_delay > 0 ? FLAG_TOSHIBA_DELAY : 0;
- NCR5380_init(instance, host_flags);
+ error = NCR5380_init(instance, host_flags);
+ if (error)
+ goto fail_init;
if (instance->irq != NO_IRQ) {
error = request_irq(instance->irq, macscsi_intr, IRQF_SHARED,
goto fail_irq;
}
+ NCR5380_maybe_reset_bus(instance);
+
error = scsi_add_host(instance, NULL);
if (error)
goto fail_host;
free_irq(instance->irq, instance);
fail_irq:
NCR5380_exit(instance);
+fail_init:
scsi_host_put(instance);
return error;
}
/*
* The following call will block till a kioc is available
+ * or return NULL if the list head is empty for the pointer
+ * of type mraid_mmapt passed to mraid_mm_alloc_kioc
*/
kioc = mraid_mm_alloc_kioc(adp);
+ if (!kioc)
+ return -ENXIO;
/*
* User sent the old mimd_t ioctl packet. Convert it to uioc_t.
#define PSEUDO_DMA
-#define UNSAFE /* Not unsafe for PAS16 -- use it */
-#define PDEBUG 0
/*
* This driver adapted from Drew Eckhardt's Trantor T128 driver
#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/proc_fs.h>
#include <asm/io.h>
#include <asm/dma.h>
#include <linux/blkdev.h>
-#include <linux/delay.h>
#include <linux/interrupt.h>
-#include <linux/stat.h>
#include <linux/init.h>
#include <scsi/scsi_host.h>
#include "NCR5380.h"
-static unsigned short pas16_addr = 0;
-static int pas16_irq = 0;
+static unsigned short pas16_addr;
+static int pas16_irq;
static const int scsi_irq_translate[] =
* START_DMA_INITIATOR_RECEIVE_REG wo
*/
};
-/*----------------------------------------------------------------*/
-/* the following will set the monitor border color (useful to find
- where something crashed or gets stuck at */
-/* 1 = blue
- 2 = green
- 3 = cyan
- 4 = red
- 5 = magenta
- 6 = yellow
- 7 = white
-*/
-#if 1
-#define rtrc(i) {inb(0x3da); outb(0x31, 0x3c0); outb((i), 0x3c0);}
-#else
-#define rtrc(i) {}
-#endif
/*
outb( 0x01, io_port + P_TIMEOUT_STATUS_REG_OFFSET ); /* Reset TC */
outb( 0x01, io_port + WAIT_STATE ); /* 1 Wait state */
- NCR5380_read( RESET_PARITY_INTERRUPT_REG );
+ inb(io_port + pas16_offset[RESET_PARITY_INTERRUPT_REG]);
/* Set the SCSI interrupt pointer without mucking up the sound
* interrupt pointer in the same byte.
* put in an additional test to try to weed them out.
*/
- outb( 0x01, io_port + WAIT_STATE ); /* 1 Wait state */
- NCR5380_write( MODE_REG, 0x20 ); /* Is it really SCSI? */
- if( NCR5380_read( MODE_REG ) != 0x20 ) /* Write to a reg. */
- return 0; /* and try to read */
- NCR5380_write( MODE_REG, 0x00 ); /* it back. */
- if( NCR5380_read( MODE_REG ) != 0x00 )
- return 0;
+ outb(0x01, io_port + WAIT_STATE); /* 1 Wait state */
+ outb(0x20, io_port + pas16_offset[MODE_REG]); /* Is it really SCSI? */
+ if (inb(io_port + pas16_offset[MODE_REG]) != 0x20) /* Write to a reg. */
+ return 0; /* and try to read */
+ outb(0x00, io_port + pas16_offset[MODE_REG]); /* it back. */
+ if (inb(io_port + pas16_offset[MODE_REG]) != 0x00)
+ return 0;
return 1;
}
static int __init pas16_setup(char *str)
{
- static int commandline_current = 0;
+ static int commandline_current;
int i;
int ints[10];
static int __init pas16_detect(struct scsi_host_template *tpnt)
{
- static int current_override = 0;
- static unsigned short current_base = 0;
+ static int current_override;
+ static unsigned short current_base;
struct Scsi_Host *instance;
unsigned short io_port;
int count;
}
else
for (; !io_port && (current_base < NO_BASES); ++current_base) {
-#if (PDEBUG & PDEBUG_INIT)
- printk("scsi-pas16 : probing io_port %04x\n", (unsigned int) bases[current_base].io_port);
-#endif
+ dprintk(NDEBUG_INIT, "pas16: probing io_port 0x%04x\n",
+ (unsigned int)bases[current_base].io_port);
if ( !bases[current_base].noauto &&
pas16_hw_detect( current_base ) ){
io_port = bases[current_base].io_port;
init_board( io_port, default_irqs[ current_base ], 0 );
-#if (PDEBUG & PDEBUG_INIT)
- printk("scsi-pas16 : detected board.\n");
-#endif
+ dprintk(NDEBUG_INIT, "pas16: detected board\n");
}
}
-
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
- printk("scsi-pas16 : io_port = %04x\n", (unsigned int) io_port);
-#endif
+ dprintk(NDEBUG_INIT, "pas16: io_port = 0x%04x\n",
+ (unsigned int)io_port);
if (!io_port)
break;
instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
if(instance == NULL)
- break;
+ goto out;
instance->io_port = io_port;
- NCR5380_init(instance, 0);
+ if (NCR5380_init(instance, 0))
+ goto out_unregister;
+
+ NCR5380_maybe_reset_bus(instance);
if (overrides[current_override].irq != IRQ_AUTO)
instance->irq = overrides[current_override].irq;
outb( (inb(io_port + IO_CONFIG_3) & 0x0f), io_port + IO_CONFIG_3 );
}
-#if defined(PDEBUG) && (PDEBUG & PDEBUG_INIT)
- printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+ dprintk(NDEBUG_INIT, "scsi%d : irq = %d\n",
+ instance->host_no, instance->irq);
++current_override;
++count;
}
return count;
+
+out_unregister:
+ scsi_unregister(instance);
+out:
+ return count;
}
/*
if (shost->irq != NO_IRQ)
free_irq(shost->irq, shost);
NCR5380_exit(shost);
- if (shost->io_port && shost->n_io_port)
- release_region(shost->io_port, shost->n_io_port);
scsi_unregister(shost);
return 0;
}
static struct scsi_host_template driver_template = {
- .name = "Pro Audio Spectrum-16 SCSI",
- .detect = pas16_detect,
- .release = pas16_release,
- .proc_name = "pas16",
- .show_info = pas16_show_info,
- .write_info = pas16_write_info,
- .info = pas16_info,
- .queuecommand = pas16_queue_command,
- .eh_abort_handler = pas16_abort,
- .eh_bus_reset_handler = pas16_bus_reset,
- .bios_param = pas16_biosparam,
- .can_queue = CAN_QUEUE,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING,
+ .name = "Pro Audio Spectrum-16 SCSI",
+ .detect = pas16_detect,
+ .release = pas16_release,
+ .proc_name = "pas16",
+ .show_info = pas16_show_info,
+ .write_info = pas16_write_info,
+ .info = pas16_info,
+ .queuecommand = pas16_queue_command,
+ .eh_abort_handler = pas16_abort,
+ .eh_bus_reset_handler = pas16_bus_reset,
+ .bios_param = pas16_biosparam,
+ .can_queue = 32,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
#include "scsi_module.c"
#ifndef PAS16_H
#define PAS16_H
-#define PDEBUG_INIT 0x1
-#define PDEBUG_TRANSFER 0x2
-
#define PAS16_DEFAULT_BASE_1 0x388
#define PAS16_DEFAULT_BASE_2 0x384
#define PAS16_DEFAULT_BASE_3 0x38c
#define OPERATION_MODE_1 0xec03
#define IO_CONFIG_3 0xf002
+#define NCR5380_implementation_fields /* none */
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
-#define NCR5380_implementation_fields \
- volatile unsigned short io_port
-
-#define NCR5380_local_declare() \
- volatile unsigned short io_port
+#define PAS16_io_port(reg) (instance->io_port + pas16_offset[(reg)])
-#define NCR5380_setup(instance) \
- io_port = (instance)->io_port
-
-#define PAS16_io_port(reg) ( io_port + pas16_offset[(reg)] )
-
-#if !(PDEBUG & PDEBUG_TRANSFER)
#define NCR5380_read(reg) ( inb(PAS16_io_port(reg)) )
#define NCR5380_write(reg, value) ( outb((value),PAS16_io_port(reg)) )
-#else
-#define NCR5380_read(reg) \
- (((unsigned char) printk("scsi%d : read register %d at io_port %04x\n"\
- , instance->hostno, (reg), PAS16_io_port(reg))), inb( PAS16_io_port(reg)) )
-
-#define NCR5380_write(reg, value) \
- (printk("scsi%d : write %02x to register %d at io_port %04x\n", \
- instance->hostno, (value), (reg), PAS16_io_port(reg)), \
- outb( (value),PAS16_io_port(reg) ) )
-
-#endif
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
#define NCR5380_intr pas16_intr
-#define do_NCR5380_intr do_pas16_intr
#define NCR5380_queue_command pas16_queue_command
#define NCR5380_abort pas16_abort
#define NCR5380_bus_reset pas16_bus_reset
#define PAS16_IRQS 0xd4a8
-#endif /* ndef ASM */
#endif /* PAS16_H */
{"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
{"Promise", "", NULL, BLIST_SPARSELUN},
{"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
+ {"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
{"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
{"REGAL", "CDC-4X", NULL, BLIST_MAX5LUN | BLIST_SINGLELUN},
{"SanDisk", "ImageMate CF-SD1", NULL, BLIST_FORCELUN},
MODULE_PARM_DESC(storvsc_ringbuffer_size, "Ring buffer size (bytes)");
module_param(storvsc_vcpus_per_sub_channel, int, S_IRUGO);
-MODULE_PARM_DESC(vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
+MODULE_PARM_DESC(storvsc_vcpus_per_sub_channel, "Ratio of VCPUs to subchannels");
/*
* Timeout in seconds for all devices managed by this driver.
*/
#define NCR5380_queue_command sun3scsi_queue_command
#define NCR5380_bus_reset sun3scsi_bus_reset
#define NCR5380_abort sun3scsi_abort
-#define NCR5380_show_info sun3scsi_show_info
#define NCR5380_info sun3scsi_info
#define NCR5380_dma_read_setup(instance, data, count) \
- sun3scsi_dma_setup(data, count, 0)
+ sun3scsi_dma_setup(instance, data, count, 0)
#define NCR5380_dma_write_setup(instance, data, count) \
- sun3scsi_dma_setup(data, count, 1)
+ sun3scsi_dma_setup(instance, data, count, 1)
#define NCR5380_dma_residual(instance) \
sun3scsi_dma_residual(instance)
#define NCR5380_dma_xfer_len(instance, cmd, phase) \
static int setup_hostid = -1;
module_param(setup_hostid, int, 0);
-/* #define RESET_BOOT */
-
-#define AFTER_RESET_DELAY (HZ/2)
-
/* ms to wait after hitting dma regs */
#define SUN3_DMA_DELAY 10
static unsigned char *sun3_scsi_regp;
static volatile struct sun3_dma_regs *dregs;
static struct sun3_udc_regs *udc_regs;
-static unsigned char *sun3_dma_orig_addr = NULL;
-static unsigned long sun3_dma_orig_count = 0;
-static int sun3_dma_active = 0;
-static unsigned long last_residual = 0;
-static struct Scsi_Host *default_instance;
+static unsigned char *sun3_dma_orig_addr;
+static unsigned long sun3_dma_orig_count;
+static int sun3_dma_active;
+static unsigned long last_residual;
/*
* NCR 5380 register access functions
}
#endif
-#ifdef RESET_BOOT
-static void sun3_scsi_reset_boot(struct Scsi_Host *instance)
-{
- unsigned long end;
-
- /*
- * Do a SCSI reset to clean up the bus during initialization. No
- * messing with the queues, interrupts, or locks necessary here.
- */
-
- printk( "Sun3 SCSI: resetting the SCSI bus..." );
-
- /* switch off SCSI IRQ - catch an interrupt without IRQ bit set else */
-// sun3_disable_irq( IRQ_SUN3_SCSI );
-
- /* get in phase */
- NCR5380_write( TARGET_COMMAND_REG,
- PHASE_SR_TO_TCR( NCR5380_read(STATUS_REG) ));
-
- /* assert RST */
- NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_RST );
-
- /* The min. reset hold time is 25us, so 40us should be enough */
- udelay( 50 );
-
- /* reset RST and interrupt */
- NCR5380_write( INITIATOR_COMMAND_REG, ICR_BASE );
- NCR5380_read( RESET_PARITY_INTERRUPT_REG );
-
- for( end = jiffies + AFTER_RESET_DELAY; time_before(jiffies, end); )
- barrier();
-
- /* switch on SCSI IRQ again */
-// sun3_enable_irq( IRQ_SUN3_SCSI );
-
- printk( " done\n" );
-}
-#endif
-
// safe bits for the CSR
#define CSR_GOOD 0x060f
-static irqreturn_t scsi_sun3_intr(int irq, void *dummy)
+static irqreturn_t scsi_sun3_intr(int irq, void *dev)
{
+ struct Scsi_Host *instance = dev;
unsigned short csr = dregs->csr;
int handled = 0;
#endif
if(csr & ~CSR_GOOD) {
- if(csr & CSR_DMA_BUSERR) {
- printk("scsi%d: bus error in dma\n", default_instance->host_no);
- }
-
- if(csr & CSR_DMA_CONFLICT) {
- printk("scsi%d: dma conflict\n", default_instance->host_no);
- }
+ if (csr & CSR_DMA_BUSERR)
+ shost_printk(KERN_ERR, instance, "bus error in DMA\n");
+ if (csr & CSR_DMA_CONFLICT)
+ shost_printk(KERN_ERR, instance, "DMA conflict\n");
handled = 1;
}
if(csr & (CSR_SDB_INT | CSR_DMA_INT)) {
- NCR5380_intr(irq, dummy);
+ NCR5380_intr(irq, dev);
handled = 1;
}
return IRQ_RETVAL(handled);
}
-/*
- * Debug stuff - to be called on NMI, or sysrq key. Use at your own risk;
- * reentering NCR5380_print_status seems to have ugly side effects
- */
-
-/* this doesn't seem to get used at all -- sam */
-#if 0
-void sun3_sun3_debug (void)
-{
- unsigned long flags;
-
- if (default_instance) {
- local_irq_save(flags);
- NCR5380_print_status(default_instance);
- local_irq_restore(flags);
- }
-}
-#endif
-
-
/* sun3scsi_dma_setup() -- initialize the dma controller for a read/write */
-static unsigned long sun3scsi_dma_setup(void *data, unsigned long count, int write_flag)
+static unsigned long sun3scsi_dma_setup(struct Scsi_Host *instance,
+ void *data, unsigned long count, int write_flag)
{
void *addr;
dregs->csr |= CSR_FIFO;
if(dregs->fifo_count != count) {
- printk("scsi%d: fifo_mismatch %04x not %04x\n",
- default_instance->host_no, dregs->fifo_count,
- (unsigned int) count);
- NCR5380_dprint(NDEBUG_DMA, default_instance);
+ shost_printk(KERN_ERR, instance, "FIFO mismatch %04x not %04x\n",
+ dregs->fifo_count, (unsigned int) count);
+ NCR5380_dprint(NDEBUG_DMA, instance);
}
/* setup udc */
}
-#ifndef SUN3_SCSI_VME
-static inline unsigned long sun3scsi_dma_count(struct Scsi_Host *instance)
-{
- unsigned short resid;
-
- dregs->udc_addr = 0x32;
- udelay(SUN3_DMA_DELAY);
- resid = dregs->udc_data;
- udelay(SUN3_DMA_DELAY);
- resid *= 2;
-
- return (unsigned long) resid;
-}
-#endif
-
static inline unsigned long sun3scsi_dma_residual(struct Scsi_Host *instance)
{
return last_residual;
}
}
- count = sun3scsi_dma_count(default_instance);
+ dregs->udc_addr = 0x32;
+ udelay(SUN3_DMA_DELAY);
+ count = 2 * dregs->udc_data;
+ udelay(SUN3_DMA_DELAY);
fifo = dregs->fifo_count;
last_residual = fifo;
static struct scsi_host_template sun3_scsi_template = {
.module = THIS_MODULE,
.proc_name = DRV_MODULE_NAME,
- .show_info = sun3scsi_show_info,
.name = SUN3_SCSI_NAME,
.info = sun3scsi_info,
.queuecommand = sun3scsi_queue_command,
- .eh_abort_handler = sun3scsi_abort,
- .eh_bus_reset_handler = sun3scsi_bus_reset,
+ .eh_abort_handler = sun3scsi_abort,
+ .eh_bus_reset_handler = sun3scsi_bus_reset,
.can_queue = 16,
.this_id = 7,
.sg_tablesize = SG_NONE,
.cmd_per_lun = 2,
- .use_clustering = DISABLE_CLUSTERING
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
};
static int __init sun3_scsi_probe(struct platform_device *pdev)
error = -ENOMEM;
goto fail_alloc;
}
- default_instance = instance;
instance->io_port = (unsigned long)ioaddr;
instance->irq = irq->start;
host_flags |= setup_use_tagged_queuing > 0 ? FLAG_TAGGED_QUEUING : 0;
#endif
- NCR5380_init(instance, host_flags);
+ error = NCR5380_init(instance, host_flags);
+ if (error)
+ goto fail_init;
error = request_irq(instance->irq, scsi_sun3_intr, 0,
"NCR5380", instance);
dregs->ivect = VME_DATA24 | (instance->irq & 0xff);
#endif
-#ifdef RESET_BOOT
- sun3_scsi_reset_boot(instance);
-#endif
+ NCR5380_maybe_reset_bus(instance);
error = scsi_add_host(instance, NULL);
if (error)
free_irq(instance->irq, instance);
fail_irq:
NCR5380_exit(instance);
+fail_init:
scsi_host_put(instance);
fail_alloc:
if (udc_regs)
* 15 9-11
*/
-#include <linux/signal.h>
#include <linux/io.h>
#include <linux/blkdev.h>
#include <linux/interrupt.h>
-#include <linux/stat.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/delay.h>
#include <scsi/scsi_host.h>
#include "t128.h"
static int __init t128_setup(char *str)
{
- static int commandline_current = 0;
+ static int commandline_current;
int i;
int ints[10];
static int __init t128_detect(struct scsi_host_template *tpnt)
{
- static int current_override = 0, current_base = 0;
+ static int current_override, current_base;
struct Scsi_Host *instance;
unsigned long base;
void __iomem *p;
base = 0;
} else
for (; !base && (current_base < NO_BASES); ++current_base) {
-#if (TDEBUG & TDEBUG_INIT)
- printk("scsi-t128 : probing address %08x\n", bases[current_base].address);
-#endif
+ dprintk(NDEBUG_INIT, "t128: probing address 0x%08x\n",
+ bases[current_base].address);
if (bases[current_base].noauto)
continue;
p = ioremap(bases[current_base].address, 0x2000);
signatures[sig].string,
strlen(signatures[sig].string))) {
base = bases[current_base].address;
-#if (TDEBUG & TDEBUG_INIT)
- printk("scsi-t128 : detected board.\n");
-#endif
+ dprintk(NDEBUG_INIT, "t128: detected board\n");
goto found;
}
iounmap(p);
}
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
- printk("scsi-t128 : base = %08x\n", (unsigned int) base);
-#endif
+ dprintk(NDEBUG_INIT, "t128: base = 0x%08x\n", (unsigned int)base);
if (!base)
break;
found:
instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
if(instance == NULL)
- break;
-
+ goto out_unmap;
+
instance->base = base;
((struct NCR5380_hostdata *)instance->hostdata)->base = p;
- NCR5380_init(instance, 0);
+ if (NCR5380_init(instance, 0))
+ goto out_unregister;
+
+ NCR5380_maybe_reset_bus(instance);
if (overrides[current_override].irq != IRQ_AUTO)
instance->irq = overrides[current_override].irq;
printk("scsi%d : please jumper the board for a free IRQ.\n", instance->host_no);
}
-#if defined(TDEBUG) && (TDEBUG & TDEBUG_INIT)
- printk("scsi%d : irq = %d\n", instance->host_no, instance->irq);
-#endif
+ dprintk(NDEBUG_INIT, "scsi%d: irq = %d\n",
+ instance->host_no, instance->irq);
++current_override;
++count;
}
return count;
+
+out_unregister:
+ scsi_unregister(instance);
+out_unmap:
+ iounmap(p);
+ return count;
}
static int t128_release(struct Scsi_Host *shost)
{
- NCR5380_local_declare();
- NCR5380_setup(shost);
+ struct NCR5380_hostdata *hostdata = shost_priv(shost);
+
if (shost->irq != NO_IRQ)
free_irq(shost->irq, shost);
NCR5380_exit(shost);
- if (shost->io_port && shost->n_io_port)
- release_region(shost->io_port, shost->n_io_port);
scsi_unregister(shost);
- iounmap(base);
+ iounmap(hostdata->base);
return 0;
}
* timeout.
*/
-static inline int NCR5380_pread (struct Scsi_Host *instance, unsigned char *dst,
- int len) {
- NCR5380_local_declare();
- void __iomem *reg;
+static inline int
+NCR5380_pread(struct Scsi_Host *instance, unsigned char *dst, int len)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ void __iomem *reg, *base = hostdata->base;
unsigned char *d = dst;
register int i = len;
- NCR5380_setup(instance);
reg = base + T_DATA_REG_OFFSET;
#if 0
* timeout.
*/
-static inline int NCR5380_pwrite (struct Scsi_Host *instance, unsigned char *src,
- int len) {
- NCR5380_local_declare();
- void __iomem *reg;
+static inline int
+NCR5380_pwrite(struct Scsi_Host *instance, unsigned char *src, int len)
+{
+ struct NCR5380_hostdata *hostdata = shost_priv(instance);
+ void __iomem *reg, *base = hostdata->base;
unsigned char *s = src;
register int i = len;
- NCR5380_setup(instance);
reg = base + T_DATA_REG_OFFSET;
#if 0
#include "NCR5380.c"
static struct scsi_host_template driver_template = {
- .name = "Trantor T128/T128F/T228",
- .detect = t128_detect,
- .release = t128_release,
- .proc_name = "t128",
- .show_info = t128_show_info,
- .write_info = t128_write_info,
- .info = t128_info,
- .queuecommand = t128_queue_command,
- .eh_abort_handler = t128_abort,
- .eh_bus_reset_handler = t128_bus_reset,
- .bios_param = t128_biosparam,
- .can_queue = CAN_QUEUE,
- .this_id = 7,
- .sg_tablesize = SG_ALL,
- .cmd_per_lun = CMD_PER_LUN,
- .use_clustering = DISABLE_CLUSTERING,
+ .name = "Trantor T128/T128F/T228",
+ .detect = t128_detect,
+ .release = t128_release,
+ .proc_name = "t128",
+ .show_info = t128_show_info,
+ .write_info = t128_write_info,
+ .info = t128_info,
+ .queuecommand = t128_queue_command,
+ .eh_abort_handler = t128_abort,
+ .eh_bus_reset_handler = t128_bus_reset,
+ .bios_param = t128_biosparam,
+ .can_queue = 32,
+ .this_id = 7,
+ .sg_tablesize = SG_ALL,
+ .cmd_per_lun = 2,
+ .use_clustering = DISABLE_CLUSTERING,
+ .cmd_size = NCR5380_CMD_SIZE,
+ .max_sectors = 128,
};
#include "scsi_module.c"
#ifndef T128_H
#define T128_H
-#define TDEBUG 0
-#define TDEBUG_INIT 0x1
-#define TDEBUG_TRANSFER 0x2
-
/*
* The trantor boards are memory mapped. They use an NCR5380 or
* equivalent (my sample board had part second sourced from ZILOG).
#define T_DATA_REG_OFFSET 0x1e00 /* rw 512 bytes long */
-#ifndef ASM
-
-#ifndef CMD_PER_LUN
-#define CMD_PER_LUN 2
-#endif
-
-#ifndef CAN_QUEUE
-#define CAN_QUEUE 32
-#endif
-
#define NCR5380_implementation_fields \
void __iomem *base
-#define NCR5380_local_declare() \
- void __iomem *base
-
-#define NCR5380_setup(instance) \
- base = ((struct NCR5380_hostdata *)(instance->hostdata))->base
+#define T128_address(reg) \
+ (((struct NCR5380_hostdata *)shost_priv(instance))->base + T_5380_OFFSET + ((reg) * 0x20))
-#define T128_address(reg) (base + T_5380_OFFSET + ((reg) * 0x20))
-
-#if !(TDEBUG & TDEBUG_TRANSFER)
#define NCR5380_read(reg) readb(T128_address(reg))
#define NCR5380_write(reg, value) writeb((value),(T128_address(reg)))
-#else
-#define NCR5380_read(reg) \
- (((unsigned char) printk("scsi%d : read register %d at address %08x\n"\
- , instance->hostno, (reg), T128_address(reg))), readb(T128_address(reg)))
-
-#define NCR5380_write(reg, value) { \
- printk("scsi%d : write %02x to register %d at address %08x\n", \
- instance->hostno, (value), (reg), T128_address(reg)); \
- writeb((value), (T128_address(reg))); \
-}
-#endif
+
+#define NCR5380_dma_xfer_len(instance, cmd, phase) (cmd->transfersize)
#define NCR5380_intr t128_intr
-#define do_NCR5380_intr do_t128_intr
#define NCR5380_queue_command t128_queue_command
#define NCR5380_abort t128_abort
#define NCR5380_bus_reset t128_bus_reset
#define T128_IRQS 0xc4a8
-#endif /* ndef ASM */
#endif /* T128_H */
source "drivers/soc/qcom/Kconfig"
source "drivers/soc/rockchip/Kconfig"
source "drivers/soc/sunxi/Kconfig"
+source "drivers/soc/tegra/Kconfig"
source "drivers/soc/ti/Kconfig"
source "drivers/soc/versatile/Kconfig"
struct spm_driver_data *drv = NULL;
struct device_node *cpu_node, *saw_node;
int cpu;
- bool found;
+ bool found = 0;
for_each_possible_cpu(cpu) {
cpu_node = of_cpu_device_node_get(cpu);
--- /dev/null
+if ARCH_TEGRA
+
+# 32-bit ARM SoCs
+if ARM
+
+config ARCH_TEGRA_2x_SOC
+ bool "Enable support for Tegra20 family"
+ select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
+ select ARM_ERRATA_720789
+ select ARM_ERRATA_754327 if SMP
+ select ARM_ERRATA_764369 if SMP
+ select PINCTRL_TEGRA20
+ select PL310_ERRATA_727915 if CACHE_L2X0
+ select PL310_ERRATA_769419 if CACHE_L2X0
+ select TEGRA_TIMER
+ help
+ Support for NVIDIA Tegra AP20 and T20 processors, based on the
+ ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_3x_SOC
+ bool "Enable support for Tegra30 family"
+ select ARM_ERRATA_754322
+ select ARM_ERRATA_764369 if SMP
+ select PINCTRL_TEGRA30
+ select PL310_ERRATA_769419 if CACHE_L2X0
+ select TEGRA_TIMER
+ help
+ Support for NVIDIA Tegra T30 processor family, based on the
+ ARM CortexA9MP CPU and the ARM PL310 L2 cache controller
+
+config ARCH_TEGRA_114_SOC
+ bool "Enable support for Tegra114 family"
+ select ARM_ERRATA_798181 if SMP
+ select ARM_L1_CACHE_SHIFT_6
+ select HAVE_ARM_ARCH_TIMER
+ select PINCTRL_TEGRA114
+ select TEGRA_TIMER
+ help
+ Support for NVIDIA Tegra T114 processor family, based on the
+ ARM CortexA15MP CPU
+
+config ARCH_TEGRA_124_SOC
+ bool "Enable support for Tegra124 family"
+ select ARM_L1_CACHE_SHIFT_6
+ select HAVE_ARM_ARCH_TIMER
+ select PINCTRL_TEGRA124
+ select TEGRA_TIMER
+ help
+ Support for NVIDIA Tegra T124 processor family, based on the
+ ARM CortexA15MP CPU
+
+endif
+
+# 64-bit ARM SoCs
+if ARM64
+
+config ARCH_TEGRA_132_SOC
+ bool "NVIDIA Tegra132 SoC"
+ select PINCTRL_TEGRA124
+ help
+ Enable support for NVIDIA Tegra132 SoC, based on the Denver
+ ARMv8 CPU. The Tegra132 SoC is similar to the Tegra124 SoC,
+ but contains an NVIDIA Denver CPU complex in place of
+ Tegra124's "4+1" Cortex-A15 CPU complex.
+
+config ARCH_TEGRA_210_SOC
+ bool "NVIDIA Tegra210 SoC"
+ select PINCTRL_TEGRA210
+ help
+ Enable support for the NVIDIA Tegra210 SoC. Also known as Tegra X1,
+ the Tegra210 has four Cortex-A57 cores paired with four Cortex-A53
+ cores in a switched configuration. It features a GPU of the Maxwell
+ architecture with support for DX11, SM4, OpenGL 4.5, OpenGL ES 3.1
+ and providing 256 CUDA cores. It supports hardware-accelerated en-
+ and decoding of various video standards including H.265, H.264 and
+ VP8 at 4K resolution and up to 60 fps.
+
+ Besides the multimedia features it also comes with a variety of I/O
+ controllers, such as GPIO, I2C, SPI, SDHCI, PCIe, SATA and XHCI, to
+ name only a few.
+
+endif
+endif
#define LIBCFS_FREE(ptr, size) \
do { \
- int s = (size); \
if (unlikely((ptr) == NULL)) { \
CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
- "%s:%d\n", s, __FILE__, __LINE__); \
+ "%s:%d\n", (int)(size), __FILE__, __LINE__); \
break; \
} \
- if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \
- vfree(ptr); \
- else \
- kfree(ptr); \
+ kvfree(ptr); \
} while (0)
/******************************************************************************/
static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
{
- struct ib_device_attr *attr;
- int rc;
-
/* It's safe to assume a HCA can handle a page size
* matching that of the native system */
hdev->ibh_page_shift = PAGE_SHIFT;
hdev->ibh_page_size = 1 << PAGE_SHIFT;
hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1);
- LIBCFS_ALLOC(attr, sizeof(*attr));
- if (attr == NULL) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- rc = ib_query_device(hdev->ibh_ibdev, attr);
- if (rc == 0)
- hdev->ibh_mr_size = attr->max_mr_size;
-
- LIBCFS_FREE(attr, sizeof(*attr));
-
- if (rc != 0) {
- CERROR("Failed to query IB device: %d\n", rc);
- return rc;
- }
-
+ hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
if (hdev->ibh_mr_size == ~0ULL) {
hdev->ibh_mr_shift = 64;
return 0;
int api32 = ll_need_32bit_api(sbi);
loff_t ret = -EINVAL;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (origin) {
case SEEK_SET:
break;
goto out;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
/* update time if requested */
rc = 0;
if (llss->ia2.ia_valid != 0) {
- mutex_lock(&llss->inode1->i_mutex);
+ inode_lock(llss->inode1);
rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
- mutex_unlock(&llss->inode1->i_mutex);
+ inode_unlock(llss->inode1);
}
if (llss->ia1.ia_valid != 0) {
int rc1;
- mutex_lock(&llss->inode2->i_mutex);
+ inode_lock(llss->inode2);
rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
- mutex_unlock(&llss->inode2->i_mutex);
+ inode_unlock(llss->inode2);
if (rc == 0)
rc = rc1;
}
ATTR_MTIME | ATTR_MTIME_SET |
ATTR_ATIME | ATTR_ATIME_SET;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
rc = ll_setattr_raw(file->f_path.dentry, attr, true);
if (rc == -ENODATA)
rc = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(attr);
free_hss:
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* catch async errors that were recorded back when async writeback
* failed for pages in this mapping. */
fd->fd_write_failed = false;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
struct lov_stripe_md;
-extern spinlock_t inode_lock;
-
extern struct dentry *llite_root;
extern struct kset *llite_kset;
return -ENOMEM;
if (!S_ISDIR(inode->i_mode))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
memcpy(&op_data->op_attr, attr, sizeof(*attr));
ll_finish_md_op_data(op_data);
if (!S_ISDIR(inode->i_mode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
inode_dio_wait(inode);
}
goto out;
}
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
rc = ll_dir_read(dir, &lgd.ctx);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
if (!rc && !lgd.lgd_found)
rc = -ENOENT;
out:
* be asked to write less pages once, this purely depends on
* implementation. Anyway, we should be careful to avoid deadlocking.
*/
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
cl_io_fini(env, io);
return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
}
struct inode *inode = vmpage->mapping->host;
loff_t pos;
- if (mutex_trylock(&inode->i_mutex)) {
- mutex_unlock(&(inode)->i_mutex);
+ if (inode_trylock(inode)) {
+ inode_unlock((inode));
/* this is too bad. Someone is trying to write the
* page w/o holding inode mutex. This means we can
* 1. Need inode mutex to operate transient pages.
*/
if (iov_iter_rw(iter) == READ)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
LASSERT(obj->cob_transient_pages == 0);
while (iov_iter_count(iter)) {
out:
LASSERT(obj->cob_transient_pages == 0);
if (iov_iter_rw(iter) == READ)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (tot_bytes > 0) {
if (iov_iter_rw(iter) == WRITE) {
struct inode *inode = ccc_object_inode(io->ci_obj);
int result = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (cl_io_is_trunc(io))
result = vvp_io_setattr_trunc(env, ios, inode,
io->u.ci_setattr.sa_attr.lvb_size);
* because osc has already notified to destroy osc_extents. */
vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
static void vvp_io_setattr_fini(const struct lu_env *env,
{
struct inode *inode = ccc_object_inode(page->cp_obj);
- LASSERT(!mutex_trylock(&inode->i_mutex));
+ LASSERT(!inode_trylock(inode));
}
static int vvp_transient_page_own(const struct lu_env *env,
struct inode *inode = ccc_object_inode(slice->cpl_obj);
int locked;
- locked = !mutex_trylock(&inode->i_mutex);
+ locked = !inode_trylock(inode);
if (!locked)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return locked ? -EBUSY : -ENODATA;
}
struct ccc_object *clobj = cl2ccc(clp->cp_obj);
vvp_page_fini_common(cp);
- LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+ LASSERT(!inode_trylock(clobj->cob_inode));
clobj->cob_transient_pages--;
}
} else {
struct ccc_object *clobj = cl2ccc(obj);
- LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+ LASSERT(!inode_trylock(clobj->cob_inode));
cl_page_slice_add(page, &cpg->cpg_cl, obj,
&vvp_transient_page_ops);
clobj->cob_transient_pages++;
case C2_WR_TYPE_RDMA_READ:
entry->opcode = IB_WC_RDMA_READ;
break;
- case C2_WR_TYPE_BIND_MW:
- entry->opcode = IB_WC_BIND_MW;
- break;
case C2_WR_TYPE_RECV:
entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
entry->opcode = IB_WC_RECV;
C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
}
-static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 * iova_start)
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
{
struct c2_mr *mr;
u64 *page_list;
- u32 total_len;
- int err, i, j, k, page_shift, pbl_depth;
+ const u32 total_len = 0xffffffff; /* AMSO1100 limit */
+ int err, page_shift, pbl_depth, i;
+ u64 kva = 0;
- pbl_depth = 0;
- total_len = 0;
+ pr_debug("%s:%u\n", __func__, __LINE__);
- page_shift = PAGE_SHIFT;
/*
- * If there is only 1 buffer we assume this could
- * be a map of all phy mem...use a 32k page_shift.
+ * This is a map of all phy mem...use a 32k page_shift.
*/
- if (num_phys_buf == 1)
- page_shift += 3;
-
- for (i = 0; i < num_phys_buf; i++) {
-
- if (offset_in_page(buffer_list[i].addr)) {
- pr_debug("Unaligned Memory Buffer: 0x%x\n",
- (unsigned int) buffer_list[i].addr);
- return ERR_PTR(-EINVAL);
- }
-
- if (!buffer_list[i].size) {
- pr_debug("Invalid Buffer Size\n");
- return ERR_PTR(-EINVAL);
- }
-
- total_len += buffer_list[i].size;
- pbl_depth += ALIGN(buffer_list[i].size,
- BIT(page_shift)) >> page_shift;
- }
+ page_shift = PAGE_SHIFT + 3;
+ pbl_depth = ALIGN(total_len, BIT(page_shift)) >> page_shift;
page_list = vmalloc(sizeof(u64) * pbl_depth);
if (!page_list) {
return ERR_PTR(-ENOMEM);
}
- for (i = 0, j = 0; i < num_phys_buf; i++) {
-
- int naddrs;
-
- naddrs = ALIGN(buffer_list[i].size,
- BIT(page_shift)) >> page_shift;
- for (k = 0; k < naddrs; k++)
- page_list[j++] = (buffer_list[i].addr +
- (k << page_shift));
- }
+ for (i = 0; i < pbl_depth; i++)
+ page_list[i] = (i << page_shift);
mr = kmalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
return ERR_PTR(-ENOMEM);
}
- mr->pd = to_c2pd(ib_pd);
+ mr->pd = to_c2pd(pd);
mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__func__, page_shift, pbl_depth, total_len,
- (unsigned long long) *iova_start,
+ (unsigned long long) kva,
(unsigned long long) page_list[0],
(unsigned long long) page_list[pbl_depth-1]);
- err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), page_list,
BIT(page_shift), pbl_depth,
- total_len, 0, iova_start,
+ total_len, 0, &kva,
c2_convert_access(acc), mr);
vfree(page_list);
if (err) {
return &mr->ibmr;
}
-static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct ib_phys_buf bl;
- u64 kva = 0;
-
- pr_debug("%s:%u\n", __func__, __LINE__);
-
- /* AMSO1100 limit */
- bl.size = 0xffffffff;
- bl.addr = 0;
- return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
-}
-
static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
dev->ibdev.destroy_cq = c2_destroy_cq;
dev->ibdev.poll_cq = c2_poll_cq;
dev->ibdev.get_dma_mr = c2_get_dma_mr;
- dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
dev->ibdev.reg_user_mr = c2_reg_user_mr;
dev->ibdev.dereg_mr = c2_dereg_mr;
dev->ibdev.get_port_immutable = c2_port_immutable;
union {
struct { /* type EHCA_MR_PGI_PHYS section */
- int num_phys_buf;
- struct ib_phys_buf *phys_buf_array;
- u64 next_buf;
+ u64 addr;
+ u16 size;
} phy;
struct { /* type EHCA_MR_PGI_USER section */
struct ib_umem *region;
struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags, u64 *iova_start);
-
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int mr_access_flags,
struct ib_udata *udata);
-int ehca_rereg_phys_mr(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf, int mr_access_flags, u64 *iova_start);
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
int ehca_dereg_mr(struct ib_mr *mr);
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
-
int ehca_dealloc_mw(struct ib_mw *mw);
struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
shca->ib_device.req_notify_cq = ehca_req_notify_cq;
/* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */
shca->ib_device.get_dma_mr = ehca_get_dma_mr;
- shca->ib_device.reg_phys_mr = ehca_reg_phys_mr;
shca->ib_device.reg_user_mr = ehca_reg_user_mr;
- shca->ib_device.query_mr = ehca_query_mr;
shca->ib_device.dereg_mr = ehca_dereg_mr;
- shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr;
shca->ib_device.alloc_mw = ehca_alloc_mw;
- shca->ib_device.bind_mw = ehca_bind_mw;
shca->ib_device.dealloc_mw = ehca_dealloc_mw;
shca->ib_device.alloc_fmr = ehca_alloc_fmr;
shca->ib_device.map_phys_fmr = ehca_map_phys_fmr;
/*----------------------------------------------------------------------*/
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start)
-{
- struct ib_mr *ib_mr;
- int ret;
- struct ehca_mr *e_mr;
- struct ehca_shca *shca =
- container_of(pd->device, struct ehca_shca, ib_device);
- struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-
- u64 size;
-
- if ((num_phys_buf <= 0) || !phys_buf_array) {
- ehca_err(pd->device, "bad input values: num_phys_buf=%x "
- "phys_buf_array=%p", num_phys_buf, phys_buf_array);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
- if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
- ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
- /*
- * Remote Write Access requires Local Write Access
- * Remote Atomic Access requires Local Write Access
- */
- ehca_err(pd->device, "bad input values: mr_access_flags=%x",
- mr_access_flags);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
-
- /* check physical buffer list and calculate size */
- ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
- iova_start, &size);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit0;
- }
- if ((size == 0) ||
- (((u64)iova_start + size) < (u64)iova_start)) {
- ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
- size, iova_start);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
-
- e_mr = ehca_mr_new();
- if (!e_mr) {
- ehca_err(pd->device, "out of memory");
- ib_mr = ERR_PTR(-ENOMEM);
- goto reg_phys_mr_exit0;
- }
-
- /* register MR on HCA */
- if (ehca_mr_is_maxmr(size, iova_start)) {
- e_mr->flags |= EHCA_MR_FLAG_MAXMR;
- ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
- e_pd, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit1;
- }
- } else {
- struct ehca_mr_pginfo pginfo;
- u32 num_kpages;
- u32 num_hwpages;
- u64 hw_pgsize;
-
- num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
- PAGE_SIZE);
- /* for kernel space we try most possible pgsize */
- hw_pgsize = ehca_get_max_hwpage_size(shca);
- num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
- hw_pgsize);
- memset(&pginfo, 0, sizeof(pginfo));
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_kpages = num_kpages;
- pginfo.hwpage_size = hw_pgsize;
- pginfo.num_hwpages = num_hwpages;
- pginfo.u.phy.num_phys_buf = num_phys_buf;
- pginfo.u.phy.phys_buf_array = phys_buf_array;
- pginfo.next_hwpage =
- ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-
- ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
- e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit1;
- }
- }
-
- /* successful registration of all pages */
- return &e_mr->ib.ib_mr;
-
-reg_phys_mr_exit1:
- ehca_mr_delete(e_mr);
-reg_phys_mr_exit0:
- if (IS_ERR(ib_mr))
- ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
- "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
- PTR_ERR(ib_mr), pd, phys_buf_array,
- num_phys_buf, mr_access_flags, iova_start);
- return ib_mr;
-} /* end ehca_reg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int mr_access_flags,
struct ib_udata *udata)
/*----------------------------------------------------------------------*/
-int ehca_rereg_phys_mr(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start)
-{
- int ret;
-
- struct ehca_shca *shca =
- container_of(mr->device, struct ehca_shca, ib_device);
- struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- u64 new_size;
- u64 *new_start;
- u32 new_acl;
- struct ehca_pd *new_pd;
- u32 tmp_lkey, tmp_rkey;
- unsigned long sl_flags;
- u32 num_kpages = 0;
- u32 num_hwpages = 0;
- struct ehca_mr_pginfo pginfo;
-
- if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
- /* TODO not supported, because PHYP rereg hCall needs pages */
- ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
- "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- if (mr_rereg_mask & IB_MR_REREG_PD) {
- if (!pd) {
- ehca_err(mr->device, "rereg with bad pd, pd=%p "
- "mr_rereg_mask=%x", pd, mr_rereg_mask);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- }
-
- if ((mr_rereg_mask &
- ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
- (mr_rereg_mask == 0)) {
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- /* check other parameters */
- if (e_mr == shca->maxmr) {
- /* should be impossible, however reject to be sure */
- ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
- "shca->maxmr=%p mr->lkey=%x",
- mr, shca->maxmr, mr->lkey);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
- if (e_mr->flags & EHCA_MR_FLAG_FMR) {
- ehca_err(mr->device, "not supported for FMR, mr=%p "
- "flags=%x", mr, e_mr->flags);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- if (!phys_buf_array || num_phys_buf <= 0) {
- ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
- " phys_buf_array=%p num_phys_buf=%x",
- mr_rereg_mask, phys_buf_array, num_phys_buf);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- }
- if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */
- (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
- ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
- /*
- * Remote Write Access requires Local Write Access
- * Remote Atomic Access requires Local Write Access
- */
- ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
- "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- /* set requested values dependent on rereg request */
- spin_lock_irqsave(&e_mr->mrlock, sl_flags);
- new_start = e_mr->start;
- new_size = e_mr->size;
- new_acl = e_mr->acl;
- new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
-
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
-
- new_start = iova_start; /* change address */
- /* check physical buffer list and calculate size */
- ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
- num_phys_buf, iova_start,
- &new_size);
- if (ret)
- goto rereg_phys_mr_exit1;
- if ((new_size == 0) ||
- (((u64)iova_start + new_size) < (u64)iova_start)) {
- ehca_err(mr->device, "bad input values: new_size=%llx "
- "iova_start=%p", new_size, iova_start);
- ret = -EINVAL;
- goto rereg_phys_mr_exit1;
- }
- num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
- new_size, PAGE_SIZE);
- num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
- new_size, hw_pgsize);
- memset(&pginfo, 0, sizeof(pginfo));
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_kpages = num_kpages;
- pginfo.hwpage_size = hw_pgsize;
- pginfo.num_hwpages = num_hwpages;
- pginfo.u.phy.num_phys_buf = num_phys_buf;
- pginfo.u.phy.phys_buf_array = phys_buf_array;
- pginfo.next_hwpage =
- ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
- }
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- new_acl = mr_access_flags;
- if (mr_rereg_mask & IB_MR_REREG_PD)
- new_pd = container_of(pd, struct ehca_pd, ib_pd);
-
- ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
- new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
- if (ret)
- goto rereg_phys_mr_exit1;
-
- /* successful reregistration */
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mr->pd = pd;
- mr->lkey = tmp_lkey;
- mr->rkey = tmp_rkey;
-
-rereg_phys_mr_exit1:
- spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-rereg_phys_mr_exit0:
- if (ret)
- ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
- "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
- "iova_start=%p",
- ret, mr, mr_rereg_mask, pd, phys_buf_array,
- num_phys_buf, mr_access_flags, iova_start);
- return ret;
-} /* end ehca_rereg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
- int ret = 0;
- u64 h_ret;
- struct ehca_shca *shca =
- container_of(mr->device, struct ehca_shca, ib_device);
- struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- unsigned long sl_flags;
- struct ehca_mr_hipzout_parms hipzout;
-
- if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
- ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
- "e_mr->flags=%x", mr, e_mr, e_mr->flags);
- ret = -EINVAL;
- goto query_mr_exit0;
- }
-
- memset(mr_attr, 0, sizeof(struct ib_mr_attr));
- spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-
- h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
- if (h_ret != H_SUCCESS) {
- ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
- "hca_hndl=%llx mr_hndl=%llx lkey=%x",
- h_ret, mr, shca->ipz_hca_handle.handle,
- e_mr->ipz_mr_handle.handle, mr->lkey);
- ret = ehca2ib_return_code(h_ret);
- goto query_mr_exit1;
- }
- mr_attr->pd = mr->pd;
- mr_attr->device_virt_addr = hipzout.vaddr;
- mr_attr->size = hipzout.len;
- mr_attr->lkey = hipzout.lkey;
- mr_attr->rkey = hipzout.rkey;
- ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
-
-query_mr_exit1:
- spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-query_mr_exit0:
- if (ret)
- ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
- ret, mr, mr_attr);
- return ret;
-} /* end ehca_query_mr() */
-
-/*----------------------------------------------------------------------*/
-
int ehca_dereg_mr(struct ib_mr *mr)
{
int ret = 0;
/*----------------------------------------------------------------------*/
-int ehca_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- /* TODO: not supported up to now */
- ehca_gen_err("bind MW currently not supported by HCAD");
-
- return -EPERM;
-} /* end ehca_bind_mw() */
-
-/*----------------------------------------------------------------------*/
-
int ehca_dealloc_mw(struct ib_mw *mw)
{
u64 h_ret;
u64 *iova_start;
u64 size_maxmr;
struct ehca_mr_pginfo pginfo;
- struct ib_phys_buf ib_pbuf;
u32 num_kpages;
u32 num_hwpages;
u64 hw_pgsize;
/* register internal max-MR on HCA */
size_maxmr = ehca_mr_len;
iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
- ib_pbuf.addr = 0;
- ib_pbuf.size = size_maxmr;
num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
PAGE_SIZE);
hw_pgsize = ehca_get_max_hwpage_size(shca);
pginfo.num_kpages = num_kpages;
pginfo.num_hwpages = num_hwpages;
pginfo.hwpage_size = hw_pgsize;
- pginfo.u.phy.num_phys_buf = 1;
- pginfo.u.phy.phys_buf_array = &ib_pbuf;
+ pginfo.u.phy.addr = 0;
+ pginfo.u.phy.size = size_maxmr;
ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
&pginfo, &e_mr->ib.ib_mr.lkey,
e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
e_mr->ib.ib_mr.uobject = NULL;
atomic_inc(&(e_pd->ib_pd.usecnt));
- atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
*e_maxmr = e_mr;
return 0;
/*----------------------------------------------------------------------*/
-/*
- * check physical buffer array of MR verbs for validness and
- * calculates MR size
- */
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- u64 *iova_start,
- u64 *size)
-{
- struct ib_phys_buf *pbuf = phys_buf_array;
- u64 size_count = 0;
- u32 i;
-
- if (num_phys_buf == 0) {
- ehca_gen_err("bad phys buf array len, num_phys_buf=0");
- return -EINVAL;
- }
- /* check first buffer */
- if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
- ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
- "pbuf->addr=%llx pbuf->size=%llx",
- iova_start, pbuf->addr, pbuf->size);
- return -EINVAL;
- }
- if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
- (num_phys_buf > 1)) {
- ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
- "pbuf->size=%llx", pbuf->addr, pbuf->size);
- return -EINVAL;
- }
-
- for (i = 0; i < num_phys_buf; i++) {
- if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
- ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
- "pbuf->size=%llx",
- i, pbuf->addr, pbuf->size);
- return -EINVAL;
- }
- if (((i > 0) && /* not 1st */
- (i < (num_phys_buf - 1)) && /* not last */
- (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
- ehca_gen_err("bad size, i=%x pbuf->size=%llx",
- i, pbuf->size);
- return -EINVAL;
- }
- size_count += pbuf->size;
- pbuf++;
- }
-
- *size = size_count;
- return 0;
-} /* end ehca_mr_chk_buf_and_calc_size() */
-
-/*----------------------------------------------------------------------*/
-
/* check page list of map FMR verb for validness */
int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
u64 *page_list,
u32 number, u64 *kpage)
{
int ret = 0;
- struct ib_phys_buf *pbuf;
+ u64 addr = pginfo->u.phy.addr;
+ u64 size = pginfo->u.phy.size;
u64 num_hw, offs_hw;
u32 i = 0;
- /* loop over desired phys_buf_array entries */
- while (i < number) {
- pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
- num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
- pbuf->size, pginfo->hwpage_size);
- offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
- pginfo->hwpage_size;
- while (pginfo->next_hwpage < offs_hw + num_hw) {
- /* sanity check */
- if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
- (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
- ehca_gen_err("kpage_cnt >= num_kpages, "
- "kpage_cnt=%llx num_kpages=%llx "
- "hwpage_cnt=%llx "
- "num_hwpages=%llx i=%x",
- pginfo->kpage_cnt,
- pginfo->num_kpages,
- pginfo->hwpage_cnt,
- pginfo->num_hwpages, i);
- return -EFAULT;
- }
- *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
- (pginfo->next_hwpage * pginfo->hwpage_size);
- if ( !(*kpage) && pbuf->addr ) {
- ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
- "next_hwpage=%llx", pbuf->addr,
- pbuf->size, pginfo->next_hwpage);
- return -EFAULT;
- }
- (pginfo->hwpage_cnt)++;
- (pginfo->next_hwpage)++;
- if (PAGE_SIZE >= pginfo->hwpage_size) {
- if (pginfo->next_hwpage %
- (PAGE_SIZE / pginfo->hwpage_size) == 0)
- (pginfo->kpage_cnt)++;
- } else
- pginfo->kpage_cnt += pginfo->hwpage_size /
- PAGE_SIZE;
- kpage++;
- i++;
- if (i >= number) break;
+ num_hw = NUM_CHUNKS((addr % pginfo->hwpage_size) + size,
+ pginfo->hwpage_size);
+ offs_hw = (addr & ~(pginfo->hwpage_size - 1)) / pginfo->hwpage_size;
+
+ while (pginfo->next_hwpage < offs_hw + num_hw) {
+ /* sanity check */
+ if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
+ (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
+ ehca_gen_err("kpage_cnt >= num_kpages, "
+ "kpage_cnt=%llx num_kpages=%llx "
+ "hwpage_cnt=%llx "
+ "num_hwpages=%llx i=%x",
+ pginfo->kpage_cnt,
+ pginfo->num_kpages,
+ pginfo->hwpage_cnt,
+ pginfo->num_hwpages, i);
+ return -EFAULT;
}
- if (pginfo->next_hwpage >= offs_hw + num_hw) {
- (pginfo->u.phy.next_buf)++;
- pginfo->next_hwpage = 0;
+ *kpage = (addr & ~(pginfo->hwpage_size - 1)) +
+ (pginfo->next_hwpage * pginfo->hwpage_size);
+ if ( !(*kpage) && addr ) {
+ ehca_gen_err("addr=%llx size=%llx "
+ "next_hwpage=%llx", addr,
+ size, pginfo->next_hwpage);
+ return -EFAULT;
}
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ if (PAGE_SIZE >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (PAGE_SIZE / pginfo->hwpage_size) == 0)
+ (pginfo->kpage_cnt)++;
+ } else
+ pginfo->kpage_cnt += pginfo->hwpage_size /
+ PAGE_SIZE;
+ kpage++;
+ i++;
+ if (i >= number) break;
}
+ if (pginfo->next_hwpage >= offs_hw + num_hw) {
+ pginfo->next_hwpage = 0;
+ }
+
return ret;
}
int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- u64 *iova_start,
- u64 *size);
-
int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
u64 *page_list,
int list_len);
static const u8 ib_wc_opcode[255] = {
[0x01] = IB_WC_RECV+1,
[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
- [0x04] = IB_WC_BIND_MW+1,
[0x08] = IB_WC_FETCH_ADD+1,
[0x10] = IB_WC_COMP_SWAP+1,
[0x20] = IB_WC_RDMA_WRITE+1,
rval = init_mregion(&mr->mr, pd, count);
if (rval)
goto bail;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
+
rval = hfi1_alloc_lkey(&mr->mr, 0);
if (rval)
goto bail_mregion;
goto done;
}
-/**
- * hfi1_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct hfi1_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- goto bail;
- }
-
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.access_flags = acc;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == HFI1_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
/**
* hfi1_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
ibdev->poll_cq = hfi1_poll_cq;
ibdev->req_notify_cq = hfi1_req_notify_cq;
ibdev->get_dma_mr = hfi1_get_dma_mr;
- ibdev->reg_phys_mr = hfi1_reg_phys_mr;
ibdev->reg_user_mr = hfi1_reg_user_mr;
ibdev->dereg_mr = hfi1_dereg_mr;
ibdev->alloc_mr = hfi1_alloc_mr;
struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
{
int error;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
error = ipathfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return error;
}
int ret;
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
dir = lookup_one_len(unit, root, strlen(unit));
ret = simple_rmdir(d_inode(root), dir);
bail:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
return ret;
}
}
mr->mr.mapsz = m;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
if (!ipath_alloc_lkey(lk_table, &mr->mr))
goto bail;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
return mr;
}
-/**
- * ipath_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct ipath_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
- if (mr == NULL) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- mr->mr.pd = pd;
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.length = 0;
- mr->mr.offset = 0;
- mr->mr.access_flags = acc;
- mr->mr.max_segs = num_phys_buf;
- mr->umem = NULL;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == IPATH_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
dev->poll_cq = ipath_poll_cq;
dev->req_notify_cq = ipath_req_notify_cq;
dev->get_dma_mr = ipath_get_dma_mr;
- dev->reg_phys_mr = ipath_reg_phys_mr;
dev->reg_user_mr = ipath_reg_user_mr;
dev->dereg_mr = ipath_dereg_mr;
dev->alloc_fmr = ipath_alloc_fmr;
struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
}
bip = bio_integrity_alloc(bio, GFP_NOIO, cmd->t_prot_nents);
- if (!bip) {
+ if (IS_ERR(bip)) {
pr_err("Unable to allocate bio_integrity_payload\n");
- return -ENOMEM;
+ return PTR_ERR(bip);
}
bip->bip_iter.bi_size = (cmd->data_length / dev->dev_attrib.block_size) *
/* Braswell thermal reporting device */
#define PCI_DEVICE_ID_PROC_BSW_THERMAL 0x22DC
+/* Broxton thermal reporting device */
+#define PCI_DEVICE_ID_PROC_BXT0_THERMAL 0x0A8C
+#define PCI_DEVICE_ID_PROC_BXT1_THERMAL 0x1A8C
+#define PCI_DEVICE_ID_PROC_BXTX_THERMAL 0x4A8C
+#define PCI_DEVICE_ID_PROC_BXTP_THERMAL 0x5A8C
+
struct power_config {
u32 index;
u32 min_uw;
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_HSB_THERMAL)},
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_SKL_THERMAL)},
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BSW_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT0_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT1_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTX_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTP_THERMAL)},
{ 0, },
};
/* dev ops for Wildcat Point */
-static struct pch_dev_ops pch_dev_ops_wpt = {
+static const struct pch_dev_ops pch_dev_ops_wpt = {
.hw_init = pch_wpt_init,
.get_temp = pch_wpt_get_temp,
};
#define rcar_has_irq_support(priv) ((priv)->common->base)
#define rcar_id_to_shift(priv) ((priv)->id * 8)
-#ifdef DEBUG
-# define rcar_force_update_temp(priv) 1
-#else
-# define rcar_force_update_temp(priv) 0
-#endif
+static const struct of_device_id rcar_thermal_dt_ids[] = {
+ { .compatible = "renesas,rcar-thermal", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
/*
* basic functions
static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
{
struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+ int tmp;
+ int ret;
- if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv))
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ return ret;
mutex_lock(&priv->lock);
- *temp = MCELSIUS((priv->ctemp * 5) - 65);
+ tmp = MCELSIUS((priv->ctemp * 5) - 65);
mutex_unlock(&priv->lock);
+ if ((tmp < MCELSIUS(-45)) || (tmp > MCELSIUS(125))) {
+ struct device *dev = rcar_priv_to_dev(priv);
+
+ dev_err(dev, "it couldn't measure temperature correctly\n");
+ return -EIO;
+ }
+
+ *temp = tmp;
+
return 0;
}
unsigned long flags;
u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */
+ if (!rcar_has_irq_support(priv))
+ return;
+
spin_lock_irqsave(&common->lock, flags);
rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask);
{
struct rcar_thermal_priv *priv;
int cctemp, nctemp;
+ int ret;
priv = container_of(work, struct rcar_thermal_priv, work.work);
rcar_thermal_get_temp(priv->zone, &cctemp);
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ return;
+
rcar_thermal_irq_enable(priv);
rcar_thermal_get_temp(priv->zone, &nctemp);
struct rcar_thermal_priv *priv;
rcar_thermal_for_each_priv(priv, common) {
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_disable(priv);
+ rcar_thermal_irq_disable(priv);
thermal_zone_device_unregister(priv->zone);
}
mutex_init(&priv->lock);
INIT_LIST_HEAD(&priv->list);
INIT_DELAYED_WORK(&priv->work, rcar_thermal_work);
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ goto error_unregister;
priv->zone = thermal_zone_device_register("rcar_thermal",
1, 0, priv,
goto error_unregister;
}
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_enable(priv);
+ rcar_thermal_irq_enable(priv);
list_move_tail(&priv->list, &common->head);
return ret;
}
-static const struct of_device_id rcar_thermal_dt_ids[] = {
- { .compatible = "renesas,rcar-thermal", },
- {},
-};
-MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
-
static struct platform_driver rcar_thermal_driver = {
.driver = {
.name = "rcar_thermal",
};
/**
- * the system Temperature Sensors tshut(tshut) polarity
+ * The system Temperature Sensors tshut(tshut) polarity
* the bit 8 is tshut polarity.
* 0: low active, 1: high active
*/
};
/**
-* The conversion table has the adc value and temperature.
-* ADC_DECREMENT is the adc value decremnet.(e.g. v2_code_table)
-* ADC_INCREMNET is the adc value incremnet.(e.g. v3_code_table)
-*/
+ * The conversion table has the adc value and temperature.
+ * ADC_DECREMENT: the adc value is of diminishing.(e.g. v2_code_table)
+ * ADC_INCREMENT: the adc value is incremental.(e.g. v3_code_table)
+ */
enum adc_sort_mode {
ADC_DECREMENT = 0,
ADC_INCREMENT,
*/
#define SOC_MAX_SENSORS 2
+/**
+ * struct chip_tsadc_table: hold information about chip-specific differences
+ * @id: conversion table
+ * @length: size of conversion table
+ * @data_mask: mask to apply on data inputs
+ * @mode: sort mode of this adc variant (incrementing or decrementing)
+ */
struct chip_tsadc_table {
const struct tsadc_table *id;
-
- /* the array table size*/
unsigned int length;
-
- /* that analogic mask data */
u32 data_mask;
-
- /* the sort mode is adc value that increment or decrement in table */
enum adc_sort_mode mode;
};
#define TSADCV2_SHUT_2GPIO_SRC_EN(chn) BIT(4 + (chn))
#define TSADCV2_SHUT_2CRU_SRC_EN(chn) BIT(8 + (chn))
+#define TSADCV1_INT_PD_CLEAR_MASK ~BIT(16)
#define TSADCV2_INT_PD_CLEAR_MASK ~BIT(8)
#define TSADCV2_DATA_MASK 0xfff
int temp;
};
+/**
+ * Note:
+ * Code to Temperature mapping of the Temperature sensor is a piece wise linear
+ * curve.Any temperature, code faling between to 2 give temperatures can be
+ * linearly interpolated.
+ * Code to Temperature mapping should be updated based on sillcon results.
+ */
+static const struct tsadc_table v1_code_table[] = {
+ {TSADCV3_DATA_MASK, -40000},
+ {436, -40000},
+ {431, -35000},
+ {426, -30000},
+ {421, -25000},
+ {416, -20000},
+ {411, -15000},
+ {406, -10000},
+ {401, -5000},
+ {395, 0},
+ {390, 5000},
+ {385, 10000},
+ {380, 15000},
+ {375, 20000},
+ {370, 25000},
+ {364, 30000},
+ {359, 35000},
+ {354, 40000},
+ {349, 45000},
+ {343, 50000},
+ {338, 55000},
+ {333, 60000},
+ {328, 65000},
+ {322, 70000},
+ {317, 75000},
+ {312, 80000},
+ {307, 85000},
+ {301, 90000},
+ {296, 95000},
+ {291, 100000},
+ {286, 105000},
+ {280, 110000},
+ {275, 115000},
+ {270, 120000},
+ {264, 125000},
+};
+
static const struct tsadc_table v2_code_table[] = {
{TSADCV2_DATA_MASK, -40000},
{3800, -40000},
{TSADCV3_DATA_MASK, 125000},
};
+static const struct tsadc_table v4_code_table[] = {
+ {TSADCV3_DATA_MASK, -40000},
+ {431, -40000},
+ {426, -35000},
+ {421, -30000},
+ {415, -25000},
+ {410, -20000},
+ {405, -15000},
+ {399, -10000},
+ {394, -5000},
+ {389, 0},
+ {383, 5000},
+ {378, 10000},
+ {373, 15000},
+ {367, 20000},
+ {362, 25000},
+ {357, 30000},
+ {351, 35000},
+ {346, 40000},
+ {340, 45000},
+ {335, 50000},
+ {330, 55000},
+ {324, 60000},
+ {319, 65000},
+ {313, 70000},
+ {308, 75000},
+ {302, 80000},
+ {297, 85000},
+ {291, 90000},
+ {286, 95000},
+ {281, 100000},
+ {275, 105000},
+ {270, 110000},
+ {264, 115000},
+ {259, 120000},
+ {253, 125000},
+};
+
static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
int temp)
{
regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
}
+static void rk_tsadcv1_irq_ack(void __iomem *regs)
+{
+ u32 val;
+
+ val = readl_relaxed(regs + TSADCV2_INT_PD);
+ writel_relaxed(val & TSADCV1_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+}
+
static void rk_tsadcv2_irq_ack(void __iomem *regs)
{
u32 val;
writel_relaxed(val, regs + TSADCV2_INT_EN);
}
+static const struct rockchip_tsadc_chip rk3228_tsadc_data = {
+ .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+ .chn_num = 1, /* one channel for tsadc */
+
+ .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+ .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+ .tshut_temp = 95000,
+
+ .initialize = rk_tsadcv2_initialize,
+ .irq_ack = rk_tsadcv1_irq_ack,
+ .control = rk_tsadcv2_control,
+ .get_temp = rk_tsadcv2_get_temp,
+ .set_tshut_temp = rk_tsadcv2_tshut_temp,
+ .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+ .table = {
+ .id = v1_code_table,
+ .length = ARRAY_SIZE(v1_code_table),
+ .data_mask = TSADCV3_DATA_MASK,
+ .mode = ADC_DECREMENT,
+ },
+};
+
static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
.chn_id[SENSOR_CPU] = 1, /* cpu sensor is channel 1 */
.chn_id[SENSOR_GPU] = 2, /* gpu sensor is channel 2 */
},
};
+static const struct rockchip_tsadc_chip rk3399_tsadc_data = {
+ .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+ .chn_id[SENSOR_GPU] = 1, /* gpu sensor is channel 1 */
+ .chn_num = 2, /* two channels for tsadc */
+
+ .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+ .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+ .tshut_temp = 95000,
+
+ .initialize = rk_tsadcv2_initialize,
+ .irq_ack = rk_tsadcv1_irq_ack,
+ .control = rk_tsadcv2_control,
+ .get_temp = rk_tsadcv2_get_temp,
+ .set_tshut_temp = rk_tsadcv2_tshut_temp,
+ .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+ .table = {
+ .id = v4_code_table,
+ .length = ARRAY_SIZE(v4_code_table),
+ .data_mask = TSADCV3_DATA_MASK,
+ .mode = ADC_DECREMENT,
+ },
+};
+
static const struct of_device_id of_rockchip_thermal_match[] = {
+ {
+ .compatible = "rockchip,rk3228-tsadc",
+ .data = (void *)&rk3228_tsadc_data,
+ },
{
.compatible = "rockchip,rk3288-tsadc",
.data = (void *)&rk3288_tsadc_data,
.compatible = "rockchip,rk3368-tsadc",
.data = (void *)&rk3368_tsadc_data,
},
+ {
+ .compatible = "rockchip,rk3399-tsadc",
+ .data = (void *)&rk3399_tsadc_data,
+ },
{ /* end */ },
};
MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
return 0;
}
-/*
+/**
* Reset TSADC Controller, reset all tsadc registers.
*/
static void rockchip_thermal_reset_controller(struct reset_control *reset)
next_target = instance->target;
dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
+ if (!instance->initialized) {
+ if (throttle) {
+ next_target = (cur_state + 1) >= instance->upper ?
+ instance->upper :
+ ((cur_state + 1) < instance->lower ?
+ instance->lower : (cur_state + 1));
+ } else {
+ next_target = THERMAL_NO_TARGET;
+ }
+
+ return next_target;
+ }
+
switch (trend) {
case THERMAL_TREND_RAISING:
if (throttle) {
dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
old_target, (int)instance->target);
- if (old_target == instance->target)
+ if (instance->initialized && old_target == instance->target)
continue;
/* Activate a passive thermal instance */
instance->target == THERMAL_NO_TARGET)
update_passive_instance(tz, trip_type, -1);
-
+ instance->initialized = true;
instance->cdev->updated = false; /* cdev needs update */
}
#include <linux/of.h>
#include <net/netlink.h>
#include <net/genetlink.h>
+#include <linux/suspend.h>
#define CREATE_TRACE_POINTS
#include <trace/events/thermal.h>
static DEFINE_MUTEX(thermal_list_lock);
static DEFINE_MUTEX(thermal_governor_lock);
+static atomic_t in_suspend;
+
static struct thermal_governor *def_governor;
static struct thermal_governor *__find_governor(const char *name)
mutex_unlock(&tz->lock);
trace_thermal_temperature(tz);
- dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
- tz->last_temperature, tz->temperature);
+ if (tz->last_temperature == THERMAL_TEMP_INVALID)
+ dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
+ tz->temperature);
+ else
+ dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+ tz->last_temperature, tz->temperature);
+}
+
+static void thermal_zone_device_reset(struct thermal_zone_device *tz)
+{
+ struct thermal_instance *pos;
+
+ tz->temperature = THERMAL_TEMP_INVALID;
+ tz->passive = 0;
+ list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+ pos->initialized = false;
}
void thermal_zone_device_update(struct thermal_zone_device *tz)
{
int count;
+ if (atomic_read(&in_suspend))
+ return;
+
if (!tz->ops->get_temp)
return;
return -EINVAL;
ret = tz->ops->set_trip_temp(tz, trip, temperature);
+ if (ret)
+ return ret;
- return ret ? ret : count;
+ thermal_zone_device_update(tz);
+
+ return count;
}
static ssize_t
if (!result) {
list_add_tail(&dev->tz_node, &tz->thermal_instances);
list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
+ atomic_set(&tz->need_update, 1);
}
mutex_unlock(&cdev->lock);
mutex_unlock(&tz->lock);
const struct thermal_cooling_device_ops *ops)
{
struct thermal_cooling_device *cdev;
+ struct thermal_zone_device *pos = NULL;
int result;
if (type && strlen(type) >= THERMAL_NAME_LENGTH)
/* Update binding information for 'this' new cdev */
bind_cdev(cdev);
+ mutex_lock(&thermal_list_lock);
+ list_for_each_entry(pos, &thermal_tz_list, node)
+ if (atomic_cmpxchg(&pos->need_update, 1, 0))
+ thermal_zone_device_update(pos);
+ mutex_unlock(&thermal_list_lock);
+
return cdev;
}
tz->trips = trips;
tz->passive_delay = passive_delay;
tz->polling_delay = polling_delay;
+ /* A new thermal zone needs to be updated anyway. */
+ atomic_set(&tz->need_update, 1);
dev_set_name(&tz->device, "thermal_zone%d", tz->id);
result = device_register(&tz->device);
INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
- thermal_zone_device_update(tz);
+ thermal_zone_device_reset(tz);
+ /* Update the new thermal zone and mark it as already updated. */
+ if (atomic_cmpxchg(&tz->need_update, 1, 0))
+ thermal_zone_device_update(tz);
return tz;
thermal_gov_power_allocator_unregister();
}
+static int thermal_pm_notify(struct notifier_block *nb,
+ unsigned long mode, void *_unused)
+{
+ struct thermal_zone_device *tz;
+
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_RESTORE_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ atomic_set(&in_suspend, 1);
+ break;
+ case PM_POST_HIBERNATION:
+ case PM_POST_RESTORE:
+ case PM_POST_SUSPEND:
+ atomic_set(&in_suspend, 0);
+ list_for_each_entry(tz, &thermal_tz_list, node) {
+ thermal_zone_device_reset(tz);
+ thermal_zone_device_update(tz);
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static struct notifier_block thermal_pm_nb = {
+ .notifier_call = thermal_pm_notify,
+};
+
static int __init thermal_init(void)
{
int result;
if (result)
goto exit_netlink;
+ result = register_pm_notifier(&thermal_pm_nb);
+ if (result)
+ pr_warn("Thermal: Can not register suspend notifier, return %d\n",
+ result);
+
return 0;
exit_netlink:
static void __exit thermal_exit(void)
{
+ unregister_pm_notifier(&thermal_pm_nb);
of_thermal_destroy_zones();
genetlink_exit();
class_unregister(&thermal_class);
struct thermal_zone_device *tz;
struct thermal_cooling_device *cdev;
int trip;
+ bool initialized;
unsigned long upper; /* Highest cooling state for this trip point */
unsigned long lower; /* Lowest cooling state for this trip point */
unsigned long target; /* expected cooling state */
unsigned long flags;
int tx_list_empty;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
spin_lock_irqsave(&dev->lock, flags);
tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
spin_unlock_irqrestore(&dev->lock, flags);
wait_event_interruptible(dev->tx_flush_wait,
(likely(list_empty(&dev->tx_reqs_active))));
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
spin_unlock_irq (&dev->lock);
/* break link to dcache */
- mutex_lock (&parent->i_mutex);
+ inode_lock(parent);
d_delete (dentry);
dput (dentry);
- mutex_unlock (&parent->i_mutex);
+ inode_unlock(parent);
spin_lock_irq (&dev->lock);
}
if (!access_ok(VERIFY_WRITE, buf, nbytes))
return -EFAULT;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
list_for_each_entry_safe(req, tmp_req, queue, queue) {
len = snprintf(tmpbuf, sizeof(tmpbuf),
"%8p %08x %c%c%c %5d %c%c%c\n",
nbytes -= len;
buf += len;
}
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return actual;
}
u32 *data;
int ret = -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
udc = inode->i_private;
data = kmalloc(inode->i_size, GFP_KERNEL);
if (!data)
ret = 0;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
struct inode *inode = file_inode(file);
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = simple_read_from_buffer(buf, nbytes, ppos,
file->private_data,
file_inode(file)->i_size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
depends on ARCH_TEGRA
select USB_EHCI_ROOT_HUB_TT
select USB_PHY
+ select USB_ULPI
+ select USB_ULPI_VIEWPORT
help
This driver enables support for the internal USB Host Controllers
found in NVIDIA Tegra SoCs. The controllers are EHCI compliant.
if (!info->fbdefio)
return 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Kill off the delayed work */
cancel_delayed_work_sync(&info->deferred_work);
/* Run it immediately */
schedule_delayed_work(&info->deferred_work, 0);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
if (retval)
return retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
v9fs_blank_wstat(&wstat);
retval = p9_client_wstat(fid, &wstat);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
if (retval)
return retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
retval = p9_client_fsync(fid, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (inode->i_size != AFFS_I(inode)->mmu_private)
affs_truncate(inode);
affs_free_prealloc(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = write_inode_now(inode, 0);
err = sync_blockdev(inode->i_sb->s_bdev);
if (!ret)
ret = err;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
const struct file_operations affs_file_operations = {
fl->fl_type = F_UNLCK;
- mutex_lock(&vnode->vfs_inode.i_mutex);
+ inode_lock(&vnode->vfs_inode);
/* check local lock records first */
ret = 0;
}
error:
- mutex_unlock(&vnode->vfs_inode.i_mutex);
+ inode_unlock(&vnode->vfs_inode);
_leave(" = %d [%hd]", ret, fl->fl_type);
return ret;
}
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* use a writeback record as a marker in the queue - when this reaches
* the front of the queue, all the outstanding writes are either
afs_put_writeback(wb);
_leave(" = %d", ret);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
struct timespec now;
unsigned int ia_valid = attr->ia_valid;
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
case 3:
/* Delete this handler. */
root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
kill_node(e);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
break;
default:
return PTR_ERR(e);
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = lookup_one_len(e->name, root, strlen(e->name));
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
out2:
dput(dentry);
out:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
if (err) {
case 3:
/* Delete all handlers. */
root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
while (!list_empty(&entries))
kill_node(list_entry(entries.next, Node, list));
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
break;
default:
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+ if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return;
invalidate_bh_lrus();
struct inode *bd_inode = bdev_file_inode(file);
loff_t retval;
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
return retval;
}
{
unsigned bsize = bdev_logical_block_size(bdev);
- mutex_lock(&bdev->bd_inode->i_mutex);
+ inode_lock(bdev->bd_inode);
i_size_write(bdev->bd_inode, size);
- mutex_unlock(&bdev->bd_inode->i_mutex);
+ inode_unlock(bdev->bd_inode);
while (bsize < PAGE_CACHE_SIZE) {
if (size & bsize)
break;
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
struct block_device *bdev = I_BDEV(bd_inode);
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
bdev->bd_map_count++;
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
}
static void blkdev_vm_close(struct vm_area_struct *vma)
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
struct block_device *bdev = I_BDEV(bd_inode);
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
bdev->bd_map_count--;
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
}
static const struct vm_operations_struct blkdev_dax_vm_ops = {
struct block_device *bdev = I_BDEV(bd_inode);
file_accessed(file);
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
bdev->bd_map_count++;
if (IS_DAX(bd_inode)) {
vma->vm_ops = &blkdev_dax_vm_ops;
} else {
vma->vm_ops = &blkdev_default_vm_ops;
}
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
return 0;
}
*/
static void __merge_refs(struct list_head *head, int mode)
{
- struct __prelim_ref *ref1;
+ struct __prelim_ref *pos1;
- list_for_each_entry(ref1, head, list) {
- struct __prelim_ref *ref2 = ref1, *tmp;
+ list_for_each_entry(pos1, head, list) {
+ struct __prelim_ref *pos2 = pos1, *tmp;
- list_for_each_entry_safe_continue(ref2, tmp, head, list) {
- struct __prelim_ref *xchg;
+ list_for_each_entry_safe_continue(pos2, tmp, head, list) {
+ struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
struct extent_inode_elem *eie;
if (!ref_for_same_block(ref1, ref2))
spinlock_t delayed_iput_lock;
struct list_head delayed_iputs;
- struct rw_semaphore delayed_iput_sem;
+ struct mutex cleaner_delayed_iput_mutex;
/* this protects tree_mod_seq_list */
spinlock_t tree_mod_seq_lock;
int __get_raid_index(u64 flags);
int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const u64 type);
em = lookup_extent_mapping(em_tree, start, (u64)-1);
if (!em)
break;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++)
if (srcdev == map->stripes[i].dev)
map->stripes[i].dev = tgtdev;
#include <asm/cpufeature.h>
#endif
+#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
+ BTRFS_HEADER_FLAG_RELOC |\
+ BTRFS_SUPER_FLAG_ERROR |\
+ BTRFS_SUPER_FLAG_SEEDING |\
+ BTRFS_SUPER_FLAG_METADUMP)
+
static const struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
ret = get_anon_bdev(&root->anon_dev);
if (ret)
goto free_writers;
+
+ mutex_lock(&root->objectid_mutex);
+ ret = btrfs_find_highest_objectid(root,
+ &root->highest_objectid);
+ if (ret) {
+ mutex_unlock(&root->objectid_mutex);
+ goto free_root_dev;
+ }
+
+ ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+ mutex_unlock(&root->objectid_mutex);
+
return 0;
+free_root_dev:
+ free_anon_bdev(root->anon_dev);
free_writers:
btrfs_free_subvolume_writers(root->subv_writers);
fail:
goto sleep;
}
+ mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
btrfs_run_delayed_iputs(root);
+ mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
+
again = btrfs_clean_one_deleted_snapshot(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
mutex_init(&fs_info->delete_unused_bgs_mutex);
mutex_init(&fs_info->reloc_mutex);
mutex_init(&fs_info->delalloc_root_mutex);
+ mutex_init(&fs_info->cleaner_delayed_iput_mutex);
seqlock_init(&fs_info->profiles_lock);
- init_rwsem(&fs_info->delayed_iput_sem);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
goto fail_alloc;
}
- /*
- * Leafsize and nodesize were always equal, this is only a sanity check.
- */
- if (le32_to_cpu(disk_super->__unused_leafsize) !=
- btrfs_super_nodesize(disk_super)) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksizes don't match. node %d leaf %d\n",
- btrfs_super_nodesize(disk_super),
- le32_to_cpu(disk_super->__unused_leafsize));
- err = -EINVAL;
- goto fail_alloc;
- }
- if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) {
- printk(KERN_ERR "BTRFS: couldn't mount because metadata "
- "blocksize (%d) was too large\n",
- btrfs_super_nodesize(disk_super));
- err = -EINVAL;
- goto fail_alloc;
- }
-
features = btrfs_super_incompat_flags(disk_super);
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
- if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
- printk(KERN_ERR "BTRFS: valid FS not found on %s\n", sb->s_id);
- goto fail_sb_buffer;
- }
-
- if (sectorsize != PAGE_SIZE) {
- printk(KERN_ERR "BTRFS: incompatible sector size (%lu) "
- "found on %s\n", (unsigned long)sectorsize, sb->s_id);
- goto fail_sb_buffer;
- }
-
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
tree_root->commit_root = btrfs_root_node(tree_root);
btrfs_set_root_refs(&tree_root->root_item, 1);
+ mutex_lock(&tree_root->objectid_mutex);
+ ret = btrfs_find_highest_objectid(tree_root,
+ &tree_root->highest_objectid);
+ if (ret) {
+ mutex_unlock(&tree_root->objectid_mutex);
+ goto recovery_tree_root;
+ }
+
+ ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+ mutex_unlock(&tree_root->objectid_mutex);
+
ret = btrfs_read_roots(fs_info, tree_root);
if (ret)
goto recovery_tree_root;
int read_only)
{
struct btrfs_super_block *sb = fs_info->super_copy;
+ u64 nodesize = btrfs_super_nodesize(sb);
+ u64 sectorsize = btrfs_super_sectorsize(sb);
int ret = 0;
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
+ printk(KERN_ERR "BTRFS: no valid FS found\n");
+ ret = -EINVAL;
+ }
+ if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
+ printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+ btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
}
/*
- * The common minimum, we don't know if we can trust the nodesize/sectorsize
- * items yet, they'll be verified later. Issue just a warning.
+ * Check sectorsize and nodesize first, other check will need it.
+ * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here.
*/
- if (!IS_ALIGNED(btrfs_super_root(sb), 4096))
+ if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
+ sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+ ret = -EINVAL;
+ }
+ /* Only PAGE SIZE is supported yet */
+ if (sectorsize != PAGE_CACHE_SIZE) {
+ printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
+ sectorsize, PAGE_CACHE_SIZE);
+ ret = -EINVAL;
+ }
+ if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
+ nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
+ printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+ ret = -EINVAL;
+ }
+ if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
+ printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
+ le32_to_cpu(sb->__unused_leafsize),
+ nodesize);
+ ret = -EINVAL;
+ }
+
+ /* Root alignment check */
+ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
btrfs_super_root(sb));
- if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096))
+ ret = -EINVAL;
+ }
+ if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
btrfs_super_chunk_root(sb));
- if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096))
- printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
- btrfs_super_log_root(sb));
-
- /*
- * Check the lower bound, the alignment and other constraints are
- * checked later.
- */
- if (btrfs_super_nodesize(sb) < 4096) {
- printk(KERN_ERR "BTRFS: nodesize too small: %u < 4096\n",
- btrfs_super_nodesize(sb));
ret = -EINVAL;
}
- if (btrfs_super_sectorsize(sb) < 4096) {
- printk(KERN_ERR "BTRFS: sectorsize too small: %u < 4096\n",
- btrfs_super_sectorsize(sb));
+ if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
+ printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
+ btrfs_super_log_root(sb));
ret = -EINVAL;
}
!atomic_read(&root->fs_info->open_ioctl_trans)) {
need_commit--;
- if (need_commit > 0)
+ if (need_commit > 0) {
+ btrfs_start_delalloc_roots(fs_info, 0, -1);
btrfs_wait_ordered_roots(fs_info, -1);
+ }
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
if (ret)
return ret;
/*
- * make sure that all running delayed iput are
- * done
+ * The cleaner kthread might still be doing iput
+ * operations. Wait for it to finish so that
+ * more space is released.
*/
- down_write(&root->fs_info->delayed_iput_sem);
- up_write(&root->fs_info->delayed_iput_sem);
+ mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex);
+ mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex);
goto again;
} else {
btrfs_end_transaction(trans, root);
* more device items and remove one chunk item), but this is done at
* btrfs_remove_chunk() through a call to check_system_chunk().
*/
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
num_items = 3 + map->num_stripes;
free_extent_map(em);
disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
- return 1;
+ return -EINVAL;
features = btrfs_super_incompat_flags(disk_super);
if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
}
return 1;
}
+
+static int wait_snapshoting_atomic_t(atomic_t *a)
+{
+ schedule();
+ return 0;
+}
+
+void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
+{
+ while (true) {
+ int ret;
+
+ ret = btrfs_start_write_no_snapshoting(root);
+ if (ret)
+ break;
+ wait_on_atomic_t(&root->will_be_snapshoted,
+ wait_snapshoting_atomic_t,
+ TASK_UNINTERRUPTIBLE);
+ }
+}
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
- kfree(em->bdev);
+ kfree(em->map_lookup);
kmem_cache_free(extent_map_cache, em);
}
}
u64 block_len;
u64 generation;
unsigned long flags;
- struct block_device *bdev;
+ union {
+ struct block_device *bdev;
+
+ /*
+ * used for chunk mappings
+ * flags & EXTENT_FLAG_FS_MAPPING must be set
+ */
+ struct map_lookup *map_lookup;
+ };
atomic_t refs;
unsigned int compress_type;
struct list_head list;
/* simple helper to fault in pages and copy. This should go away
* and be replaced with calls into generic code.
*/
-static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
- size_t write_bytes,
+static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
struct page **prepared_pages,
struct iov_iter *i)
{
ret = 0;
}
- copied = btrfs_copy_from_user(pos, num_pages,
- write_bytes, pages, i);
+ copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
/*
* if we have trouble faulting in the pages, fall
loff_t pos;
size_t count;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = generic_write_checks(iocb, from);
if (err <= 0) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
* to stop this write operation to ensure FS consistency.
*/
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
err = -EROFS;
goto out;
}
end_pos = round_up(pos + count, root->sectorsize);
err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
}
iocb->ki_pos = pos + num_written;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* We also have to set last_sub_trans to the current log transid,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
atomic_inc(&root->log_batch);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
ret = start_ordered_ops(inode, start, end);
}
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
atomic_inc(&root->log_batch);
*/
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
trans->sync = true;
* file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe.
*/
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If any of the ordered extents had an error, just return it to user
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
ret = find_first_non_hole(inode, &offset, &len);
if (ret < 0)
truncated_page = true;
ret = btrfs_truncate_page(inode, offset, 0, 0);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
ret = btrfs_wait_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
ret = btrfs_end_transaction(trans, root);
}
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret && !err)
err = ret;
return err;
if (ret < 0)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = inode_newsize_ok(inode, alloc_end);
if (ret)
goto out;
* So this is completely used as cleanup.
*/
btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* Let go of our reservation. */
btrfs_free_reserved_data_space(inode, alloc_start,
alloc_end - alloc_start);
struct inode *inode = file->f_mapping->host;
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_END:
case SEEK_CUR:
case SEEK_DATA:
case SEEK_HOLE:
if (offset >= i_size_read(inode)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
ret = find_desired_extent(inode, &offset, whence);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
return ret;
}
-static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid)
{
struct btrfs_path *path;
int ret;
int ret;
mutex_lock(&root->objectid_mutex);
- if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) {
- ret = btrfs_find_highest_objectid(root,
- &root->highest_objectid);
- if (ret)
- goto out;
- }
-
if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
ret = -ENOSPC;
goto out;
struct btrfs_trans_handle *trans);
int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid);
+int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid);
#endif
{
struct btrfs_fs_info *fs_info = root->fs_info;
- down_read(&fs_info->delayed_iput_sem);
spin_lock(&fs_info->delayed_iput_lock);
while (!list_empty(&fs_info->delayed_iputs)) {
struct btrfs_inode *inode;
spin_lock(&fs_info->delayed_iput_lock);
}
spin_unlock(&fs_info->delayed_iput_lock);
- up_read(&root->fs_info->delayed_iput_sem);
}
/*
return err;
}
-static int wait_snapshoting_atomic_t(atomic_t *a)
-{
- schedule();
- return 0;
-}
-
-static void wait_for_snapshot_creation(struct btrfs_root *root)
-{
- while (true) {
- int ret;
-
- ret = btrfs_start_write_no_snapshoting(root);
- if (ret)
- break;
- wait_on_atomic_t(&root->will_be_snapshoted,
- wait_snapshoting_atomic_t,
- TASK_UNINTERRUPTIBLE);
- }
-}
-
static int btrfs_setsize(struct inode *inode, struct iattr *attr)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
* truncation, it must capture all writes that happened before
* this truncation.
*/
- wait_for_snapshot_creation(root);
+ btrfs_wait_for_snapshot_creation(root);
ret = btrfs_cont_expand(inode, oldsize, newsize);
if (ret) {
btrfs_end_write_no_snapshoting(root);
* not unlock the i_mutex at this case.
*/
if (offset + count <= inode->i_size) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
relock = true;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (wakeup)
inode_dio_end(inode);
if (relock)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
return ret;
}
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ip_oldflags = ip->flags;
i_oldflags = inode->i_flags;
}
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(file);
return ret;
}
goto fail;
}
+ mutex_lock(&new_root->objectid_mutex);
+ new_root->highest_objectid = new_dirid;
+ mutex_unlock(&new_root->objectid_mutex);
+
/*
* insert the directory item
*/
out_dput:
dput(dentry);
out_unlock:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return error;
}
ra_index += cluster;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
BTRFS_I(inode)->force_compress = compress_type;
ret = cluster_pages_for_defrag(inode, pages, i, cluster);
if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out_ra;
}
defrag_count += ret;
balance_dirty_pages_ratelimited(inode->i_mapping);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (newer_than) {
if (newer_off == (u64)-1)
out_ra:
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (!file)
kfree(ra);
goto out_dput;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Don't allow to delete a subvolume with send in progress. This is
spin_unlock(&dest->root_item_lock);
}
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!err) {
d_invalidate(dentry);
btrfs_invalidate_inodes(dest);
out_dput:
dput(dentry);
out_unlock_dir:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
out_drop_write:
mnt_drop_write_file(file);
out:
static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
{
- mutex_unlock(&inode1->i_mutex);
- mutex_unlock(&inode2->i_mutex);
+ inode_unlock(inode1);
+ inode_unlock(inode2);
}
static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
if (inode1 < inode2)
swap(inode1, inode2);
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(inode1, I_MUTEX_PARENT);
+ inode_lock_nested(inode2, I_MUTEX_CHILD);
}
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
return 0;
if (same_inode) {
- mutex_lock(&src->i_mutex);
+ inode_lock(src);
ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret)
btrfs_cmp_data_free(&cmp);
out_unlock:
if (same_inode)
- mutex_unlock(&src->i_mutex);
+ inode_unlock(src);
else
btrfs_double_inode_unlock(src, dst);
if (!same_inode) {
btrfs_double_inode_lock(src, inode);
} else {
- mutex_lock(&src->i_mutex);
+ inode_lock(src);
}
/* determine range to clone */
if (!same_inode)
btrfs_double_inode_unlock(src, inode);
else
- mutex_unlock(&src->i_mutex);
+ inode_unlock(src);
return ret;
}
return 1;
}
+static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe,
+ int index)
+{
+ return stripe * rbio->stripe_npages + index;
+}
+
+/*
+ * these are just the pages from the rbio array, not from anything
+ * the FS sent down to us
+ */
+static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe,
+ int index)
+{
+ return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)];
+}
+
/*
* helper to index into the pstripe
*/
static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
{
- index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
- return rbio->stripe_pages[index];
+ return rbio_stripe_page(rbio, rbio->nr_data, index);
}
/*
{
if (rbio->nr_data + 1 == rbio->real_stripes)
return NULL;
-
- index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
- PAGE_CACHE_SHIFT;
- return rbio->stripe_pages[index];
+ return rbio_stripe_page(rbio, rbio->nr_data + 1, index);
}
/*
{
struct btrfs_raid_bio *rbio = bio->bi_private;
int err = bio->bi_error;
+ int max_errors;
if (err)
fail_bio_stripe(rbio, bio);
err = 0;
/* OK, we have read all the stripes we need to. */
- if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
+ max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
+ 0 : rbio->bbio->max_errors;
+ if (atomic_read(&rbio->error) > max_errors)
err = -EIO;
rbio_orig_end_io(rbio, err);
*/
static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
{
- unsigned long nr = stripe_len * nr_stripes;
- return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
+ return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
}
/*
void *p;
rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2 +
- DIV_ROUND_UP(stripe_npages, BITS_PER_LONG / 8),
- GFP_NOFS);
+ DIV_ROUND_UP(stripe_npages, BITS_PER_LONG) *
+ sizeof(long), GFP_NOFS);
if (!rbio)
return ERR_PTR(-ENOMEM);
if (!page)
return -ENOMEM;
rbio->stripe_pages[i] = page;
- ClearPageUptodate(page);
}
return 0;
}
-/* allocate pages for just the p/q stripes */
+/* only allocate pages for p/q stripes */
static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
{
int i;
struct page *page;
- i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
+ i = rbio_stripe_page_index(rbio, rbio->nr_data, 0);
for (; i < rbio->nr_pages; i++) {
if (rbio->stripe_pages[i])
}
}
-/*
- * these are just the pages from the rbio array, not from anything
- * the FS sent down to us
- */
-static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
-{
- int index;
- index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
- index += page;
- return rbio->stripe_pages[index];
-}
-
/*
* helper function to walk our bio list and populate the bio_pages array with
* the result. This seems expensive, but it is faster than constantly
{
struct btrfs_bio *bbio = rbio->bbio;
void *pointers[rbio->real_stripes];
- int stripe_len = rbio->stripe_len;
int nr_data = rbio->nr_data;
int stripe;
int pagenr;
int q_stripe = -1;
struct bio_list bio_list;
struct bio *bio;
- int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
int ret;
bio_list_init(&bio_list);
else
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *p;
/* first collect one page from each data stripe */
for (stripe = 0; stripe < nr_data; stripe++) {
* everything else.
*/
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
if (!bbio->tgtdev_map[stripe])
continue;
- for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
int bios_to_read = 0;
struct bio_list bio_list;
int ret;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
int pagenr;
int stripe;
struct bio *bio;
* stripe
*/
for (stripe = 0; stripe < rbio->nr_data; stripe++) {
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;
/*
* we want to find all the pages missing from
int pagenr, stripe;
void **pointers;
int faila = -1, failb = -1;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
struct page *page;
int err;
int i;
index_rbio_pages(rbio);
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
/*
* Now we just use bitmap to mark the horizontal stripes in
* which we have data when doing parity scrub.
* other endio functions will fiddle the uptodate bits
*/
if (rbio->operation == BTRFS_RBIO_WRITE) {
- for (i = 0; i < nr_pages; i++) {
+ for (i = 0; i < rbio->stripe_npages; i++) {
if (faila != -1) {
page = rbio_stripe_page(rbio, faila, i);
SetPageUptodate(page);
int bios_to_read = 0;
struct bio_list bio_list;
int ret;
- int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
int pagenr;
int stripe;
struct bio *bio;
continue;
}
- for (pagenr = 0; pagenr < nr_pages; pagenr++) {
+ for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *p;
/*
if (!page)
return -ENOMEM;
rbio->stripe_pages[index] = page;
- ClearPageUptodate(page);
}
}
return 0;
}
-/*
- * end io function used by finish_rmw. When we finally
- * get here, we've written a full stripe
- */
-static void raid_write_parity_end_io(struct bio *bio)
-{
- struct btrfs_raid_bio *rbio = bio->bi_private;
- int err = bio->bi_error;
-
- if (bio->bi_error)
- fail_bio_stripe(rbio, bio);
-
- bio_put(bio);
-
- if (!atomic_dec_and_test(&rbio->stripes_pending))
- return;
-
- err = 0;
-
- if (atomic_read(&rbio->error))
- err = -EIO;
-
- rbio_orig_end_io(rbio, err);
-}
-
static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
int need_check)
{
break;
bio->bi_private = rbio;
- bio->bi_end_io = raid_write_parity_end_io;
+ bio->bi_end_io = raid_write_end_io;
submit_bio(WRITE, bio);
}
return;
int ret = 0;
BUG_ON(cluster->start != cluster->boundary[0]);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = btrfs_check_data_free_space(inode, cluster->start,
cluster->end + 1 - cluster->start);
btrfs_free_reserved_data_space(inode, cluster->start,
cluster->end + 1 - cluster->start);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
static inline int scrub_calc_parity_bitmap_len(int nsectors)
{
- return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+ return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * sizeof(long);
}
static void scrub_parity_get(struct scrub_parity *sparity)
return ret;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (em->start != chunk_offset)
goto out;
return PTR_ERR(inode);
/* Avoid truncate/dio/punch hole.. */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode_dio_wait(inode);
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
}
ret = COPY_COMPLETE;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
return ret;
}
int ret = 0;
char *compress_type;
bool compress_force = false;
+ enum btrfs_compression_type saved_compress_type;
+ bool saved_compress_force;
+ int no_compress = 0;
cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
/* Fallthrough */
case Opt_compress:
case Opt_compress_type:
+ saved_compress_type = btrfs_test_opt(root, COMPRESS) ?
+ info->compress_type : BTRFS_COMPRESS_NONE;
+ saved_compress_force =
+ btrfs_test_opt(root, FORCE_COMPRESS);
if (token == Opt_compress ||
token == Opt_compress_force ||
strcmp(args[0].from, "zlib") == 0) {
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
+ no_compress = 0;
} else if (strcmp(args[0].from, "lzo") == 0) {
compress_type = "lzo";
info->compress_type = BTRFS_COMPRESS_LZO;
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
btrfs_set_fs_incompat(info, COMPRESS_LZO);
+ no_compress = 0;
} else if (strncmp(args[0].from, "no", 2) == 0) {
compress_type = "no";
btrfs_clear_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
compress_force = false;
+ no_compress++;
} else {
ret = -EINVAL;
goto out;
}
if (compress_force) {
- btrfs_set_and_info(root, FORCE_COMPRESS,
- "force %s compression",
- compress_type);
+ btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
} else {
- if (!btrfs_test_opt(root, COMPRESS))
- btrfs_info(root->fs_info,
- "btrfs: use %s compression",
- compress_type);
/*
* If we remount from compress-force=xxx to
* compress=xxx, we need clear FORCE_COMPRESS
*/
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
}
+ if ((btrfs_test_opt(root, COMPRESS) &&
+ (info->compress_type != saved_compress_type ||
+ compress_force != saved_compress_force)) ||
+ (!btrfs_test_opt(root, COMPRESS) &&
+ no_compress == 1)) {
+ btrfs_info(root->fs_info,
+ "%s %s compression",
+ (compress_force) ? "force" : "use",
+ compress_type);
+ }
+ compress_force = false;
break;
case Opt_ssd:
btrfs_set_and_info(root, SSD,
},
};
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
[BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
[BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
spin_lock_init(&dev->reada_lock);
atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0);
+ btrfs_device_data_ordered_init(dev);
INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
struct map_lookup *map;
int i;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
u64 end;
free_extent_map(em);
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
lock_chunks(root->fs_info->chunk_root);
check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root);
if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
btrfs_warn(fs_info,
- "metatdata profile 0x%llx has lower redundancy than data profile 0x%llx",
+ "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
bctl->meta.target, bctl->data.target);
}
goto error;
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = start;
em->len = num_bytes;
em->block_start = 0;
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
item_size = btrfs_chunk_item_size(map->num_stripes);
stripe_size = em->orig_block_len;
if (!em)
return 1;
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
if (map->stripes[i].dev->missing) {
miss_ndevs++;
return 1;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
ret = map->num_stripes;
else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
len = map->stripe_len * nr_data_stripes(map);
free_extent_map(em);
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
ret = 1;
free_extent_map(em);
return -EINVAL;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
offset = logical - em->start;
stripe_len = map->stripe_len;
* target drive.
*/
for (i = 0; i < tmp_num_stripes; i++) {
- if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
- /*
- * In case of DUP, in order to keep it
- * simple, only add the mirror with the
- * lowest physical address
- */
- if (found &&
- physical_of_found <=
- tmp_bbio->stripes[i].physical)
- continue;
- index_srcdev = i;
- found = 1;
- physical_of_found =
- tmp_bbio->stripes[i].physical;
- }
+ if (tmp_bbio->stripes[i].dev->devid != srcdev_devid)
+ continue;
+
+ /*
+ * In case of DUP, in order to keep it simple, only add
+ * the mirror with the lowest physical address
+ */
+ if (found &&
+ physical_of_found <= tmp_bbio->stripes[i].physical)
+ continue;
+
+ index_srcdev = i;
+ found = 1;
+ physical_of_found = tmp_bbio->stripes[i].physical;
}
- if (found) {
- mirror_num = index_srcdev + 1;
- patch_the_first_stripe_for_dev_replace = 1;
- physical_to_patch_in_first_stripe = physical_of_found;
- } else {
+ btrfs_put_bbio(tmp_bbio);
+
+ if (!found) {
WARN_ON(1);
ret = -EIO;
- btrfs_put_bbio(tmp_bbio);
goto out;
}
- btrfs_put_bbio(tmp_bbio);
+ mirror_num = index_srcdev + 1;
+ patch_the_first_stripe_for_dev_replace = 1;
+ physical_to_patch_in_first_stripe = physical_of_found;
} else if (mirror_num > map->num_stripes) {
mirror_num = 0;
}
free_extent_map(em);
return -EIO;
}
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
length = em->len;
rmap_len = map->stripe_len;
bbio->fs_info = root->fs_info;
atomic_set(&bbio->stripes_pending, bbio->num_stripes);
- if (bbio->raid_map) {
+ if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
+ ((rw & WRITE) || (mirror_num > 1))) {
/* In this case, map_length has been set to the length of
a single stripe; not the whole write */
if (rw & WRITE) {
struct extent_map *em;
u64 logical;
u64 length;
+ u64 stripe_len;
u64 devid;
u8 uuid[BTRFS_UUID_SIZE];
int num_stripes;
logical = key->offset;
length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ /* Validation check */
+ if (!num_stripes) {
+ btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
+ num_stripes);
+ return -EIO;
+ }
+ if (!IS_ALIGNED(logical, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk logical %llu", logical);
+ return -EIO;
+ }
+ if (!length || !IS_ALIGNED(length, root->sectorsize)) {
+ btrfs_err(root->fs_info,
+ "invalid chunk length %llu", length);
+ return -EIO;
+ }
+ if (!is_power_of_2(stripe_len)) {
+ btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
+ stripe_len);
+ return -EIO;
+ }
+ if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk)) {
+ btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) &
+ btrfs_chunk_type(leaf, chunk));
+ return -EIO;
+ }
read_lock(&map_tree->map_tree.lock);
em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
em = alloc_extent_map();
if (!em)
return -ENOMEM;
- num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) {
free_extent_map(em);
}
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
- em->bdev = (struct block_device *)map;
+ em->map_lookup = map;
em->start = logical;
em->len = length;
em->orig_start = 0;
/* In order to kick the device replace finish process */
lock_chunks(root);
list_for_each_entry(em, &transaction->pending_chunks, list) {
- map = (struct map_lookup *)em->bdev;
+ map = em->map_lookup;
for (i = 0; i < map->num_stripes; i++) {
dev = map->stripes[i].dev;
* locks the inode's i_mutex before calling setxattr or removexattr.
*/
if (flags & XATTR_REPLACE) {
- ASSERT(mutex_is_locked(&inode->i_mutex));
+ ASSERT(inode_is_locked(inode));
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
name, name_len, 0);
if (!di)
return 0;
cachefiles_begin_secure(cache, &saved_cred);
- mutex_lock(&d_inode(object->backer)->i_mutex);
+ inode_lock(d_inode(object->backer));
/* if there's an extension to a partial page at the end of the backing
* file, we need to discard the partial page so that we pick up new
ret = notify_change(object->backer, &newattrs, NULL);
truncate_failed:
- mutex_unlock(&d_inode(object->backer)->i_mutex);
+ inode_unlock(d_inode(object->backer));
cachefiles_end_secure(cache, saved_cred);
if (ret == -EIO) {
cachefiles_mark_object_buried(cache, rep, why);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (ret == -EIO)
cachefiles_io_error(cache, "Unlink failed");
/* directories have to be moved to the graveyard */
_debug("move stale object to graveyard");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
try_again:
/* first step is to make up a grave dentry in the graveyard */
dir = dget_parent(object->dentry);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) {
/* object allocation for the same key preemptively deleted this
* object's file so that it could create its own file */
_debug("object preemptively buried");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = 0;
} else {
/* we need to check that our parent is _still_ our parent - it
/* it got moved, presumably by cachefilesd culling it,
* so it's no longer in the key path and we can ignore
* it */
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = 0;
}
}
/* search the current directory for the element name */
_debug("lookup '%s'", name);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
start = jiffies;
next = lookup_one_len(name, dir, nlen);
/* process the next component */
if (key) {
_debug("advance");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
dir = next;
next = NULL;
/* note that we're now using this object */
ret = cachefiles_mark_object_active(cache, object);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
dir = NULL;
cachefiles_io_error(cache, "Lookup failed");
next = NULL;
error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(next);
error_out2:
dput(dir);
_enter(",,%s", dirname);
/* search the current directory for the element name */
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
start = jiffies;
subdir = lookup_one_len(dirname, dir, strlen(dirname));
d_backing_inode(subdir)->i_ino);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
/* we need to make sure the subdir is a directory */
ASSERT(d_backing_inode(subdir));
return ERR_PTR(ret);
mkdir_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(subdir);
pr_err("mkdir %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
lookup_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = PTR_ERR(subdir);
pr_err("Lookup %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
nomem_d_alloc:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
}
// dir, filename);
/* look up the victim */
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
start = jiffies;
victim = lookup_one_len(filename, dir, strlen(filename));
* at the netfs's request whilst the cull was in progress
*/
if (d_is_negative(victim)) {
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
_leave(" = -ENOENT [absent]");
return ERR_PTR(-ENOENT);
object_in_use:
read_unlock(&cache->active_lock);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
//_leave(" = -EBUSY [in use]");
return ERR_PTR(-EBUSY);
lookup_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = PTR_ERR(victim);
if (ret == -ENOENT) {
/* file or dir now absent - probably retired by netfs */
return 0;
error_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
error:
dput(victim);
if (ret == -ENOENT) {
if (IS_ERR(victim))
return PTR_ERR(victim);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
//_leave(" = 0");
return 0;
return 0;
/* past end of file? */
- i_size = inode->i_size; /* caller holds i_mutex */
+ i_size = i_size_read(inode);
if (page_off >= i_size ||
(pos_in_page == 0 && (pos+len) >= i_size &&
page = grab_cache_page_write_begin(mapping, index, 0);
if (!page)
return -ENOMEM;
- *pagep = page;
dout("write_begin file %p inode %p page %p %d~%d\n", file,
inode, page, (int)pos, (int)len);
zero_user_segment(page, from+copied, len);
/* did file size increase? */
- /* (no need for i_size_read(); we caller holds i_mutex */
- if (pos+copied > inode->i_size)
+ if (pos+copied > i_size_read(inode))
check_cap = ceph_inode_set_size(inode, pos+copied);
if (!PageUptodate(page))
ret = VM_FAULT_NOPAGE;
if ((off > size) ||
- (page->mapping != inode->i_mapping))
+ (page->mapping != inode->i_mapping)) {
+ unlock_page(page);
goto out;
+ }
ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
- if (ret == 0) {
+ if (ret >= 0) {
/* success. we'll keep the page locked. */
set_page_dirty(page);
ret = VM_FAULT_LOCKED;
ret = VM_FAULT_SIGBUS;
}
out:
- if (ret != VM_FAULT_LOCKED)
- unlock_page(page);
if (ret == VM_FAULT_LOCKED ||
ci->i_inline_version != CEPH_INLINE_NONE) {
int dirty;
memset(&aux, 0, sizeof(aux));
aux.mtime = inode->i_mtime;
- aux.size = inode->i_size;
+ aux.size = i_size_read(inode);
memcpy(buffer, &aux, sizeof(aux));
uint64_t *size)
{
const struct ceph_inode_info* ci = cookie_netfs_data;
- const struct inode* inode = &ci->vfs_inode;
-
- *size = inode->i_size;
+ *size = i_size_read(&ci->vfs_inode);
}
static enum fscache_checkaux ceph_fscache_inode_check_aux(
memset(&aux, 0, sizeof(aux));
aux.mtime = inode->i_mtime;
- aux.size = inode->i_size;
+ aux.size = i_size_read(inode);
if (memcmp(data, &aux, sizeof(aux)) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
return;
/* Avoid multiple racing open requests */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (ci->fscache)
goto done;
ci, true);
fscache_check_consistency(ci->fscache);
done:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (datasync)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
ret = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
return ret;
loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
loff_t retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = -EINVAL;
switch (whence) {
case SEEK_CUR:
}
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
if (IS_ERR(req))
return PTR_ERR(req);
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
req->r_inode = d_inode(child);
ihold(d_inode(child));
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!err) {
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
}
enum {
- CHECK_EOF = 1,
- READ_INLINE = 2,
+ HAVE_RETRIED = 1,
+ CHECK_EOF = 2,
+ READ_INLINE = 3,
};
/*
static int striped_read(struct inode *inode,
u64 off, u64 len,
struct page **pages, int num_pages,
- int *checkeof, bool o_direct,
- unsigned long buf_align)
+ int *checkeof)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len, left;
- int io_align, page_align;
- int pages_left;
- int read;
+ loff_t i_size;
+ int page_align, pages_left;
+ int read, ret;
struct page **page_pos;
- int ret;
bool hit_stripe, was_short;
/*
page_pos = pages;
pages_left = num_pages;
read = 0;
- io_align = off & ~PAGE_MASK;
more:
- if (o_direct)
- page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
- else
- page_align = pos & ~PAGE_MASK;
+ page_align = pos & ~PAGE_MASK;
this_len = left;
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, pos, &this_len,
dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+ i_size = i_size_read(inode);
if (ret >= 0) {
int didpages;
- if (was_short && (pos + ret < inode->i_size)) {
- int zlen = min(this_len - ret,
- inode->i_size - pos - ret);
- int zoff = (o_direct ? buf_align : io_align) +
- read + ret;
+ if (was_short && (pos + ret < i_size)) {
+ int zlen = min(this_len - ret, i_size - pos - ret);
+ int zoff = (off & ~PAGE_MASK) + read + ret;
dout(" zero gap %llu to %llu\n",
pos + ret, pos + ret + zlen);
ceph_zero_page_vector_range(zoff, zlen, pages);
pages_left -= didpages;
/* hit stripe and need continue*/
- if (left && hit_stripe && pos < inode->i_size)
+ if (left && hit_stripe && pos < i_size)
goto more;
}
if (read > 0) {
ret = read;
/* did we bounce off eof? */
- if (pos + left > inode->i_size)
+ if (pos + left > i_size)
*checkeof = CHECK_EOF;
}
if (ret < 0)
return ret;
- if (iocb->ki_flags & IOCB_DIRECT) {
- while (iov_iter_count(i)) {
- size_t start;
- ssize_t n;
-
- n = dio_get_pagev_size(i);
- pages = dio_get_pages_alloc(i, n, &start, &num_pages);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
-
- ret = striped_read(inode, off, n,
- pages, num_pages, checkeof,
- 1, start);
-
- ceph_put_page_vector(pages, num_pages, true);
-
- if (ret <= 0)
- break;
- off += ret;
- iov_iter_advance(i, ret);
- if (ret < n)
+ num_pages = calc_pages_for(off, len);
+ pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+ ret = striped_read(inode, off, len, pages,
+ num_pages, checkeof);
+ if (ret > 0) {
+ int l, k = 0;
+ size_t left = ret;
+
+ while (left) {
+ size_t page_off = off & ~PAGE_MASK;
+ size_t copy = min_t(size_t, left,
+ PAGE_SIZE - page_off);
+ l = copy_page_to_iter(pages[k++], page_off, copy, i);
+ off += l;
+ left -= l;
+ if (l < copy)
break;
}
- } else {
- num_pages = calc_pages_for(off, len);
- pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
- ret = striped_read(inode, off, len, pages,
- num_pages, checkeof, 0, 0);
- if (ret > 0) {
- int l, k = 0;
- size_t left = ret;
-
- while (left) {
- size_t page_off = off & ~PAGE_MASK;
- size_t copy = min_t(size_t,
- PAGE_SIZE - page_off, left);
- l = copy_page_to_iter(pages[k++], page_off,
- copy, i);
- off += l;
- left -= l;
- if (l < copy)
- break;
- }
- }
- ceph_release_page_vector(pages, num_pages);
}
+ ceph_release_page_vector(pages, num_pages);
if (off > iocb->ki_pos) {
ret = off - iocb->ki_pos;
return ret;
}
+struct ceph_aio_request {
+ struct kiocb *iocb;
+ size_t total_len;
+ int write;
+ int error;
+ struct list_head osd_reqs;
+ unsigned num_reqs;
+ atomic_t pending_reqs;
+ struct timespec mtime;
+ struct ceph_cap_flush *prealloc_cf;
+};
+
+struct ceph_aio_work {
+ struct work_struct work;
+ struct ceph_osd_request *req;
+};
+
+static void ceph_aio_retry_work(struct work_struct *work);
+
+static void ceph_aio_complete(struct inode *inode,
+ struct ceph_aio_request *aio_req)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret;
+
+ if (!atomic_dec_and_test(&aio_req->pending_reqs))
+ return;
+
+ ret = aio_req->error;
+ if (!ret)
+ ret = aio_req->total_len;
+
+ dout("ceph_aio_complete %p rc %d\n", inode, ret);
+
+ if (ret >= 0 && aio_req->write) {
+ int dirty;
+
+ loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len;
+ if (endoff > i_size_read(inode)) {
+ if (ceph_inode_set_size(inode, endoff))
+ ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+ &aio_req->prealloc_cf);
+ spin_unlock(&ci->i_ceph_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+
+ }
+
+ ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
+ CEPH_CAP_FILE_RD));
+
+ aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+
+ ceph_free_cap_flush(aio_req->prealloc_cf);
+ kfree(aio_req);
+}
+
+static void ceph_aio_complete_req(struct ceph_osd_request *req,
+ struct ceph_msg *msg)
+{
+ int rc = req->r_result;
+ struct inode *inode = req->r_inode;
+ struct ceph_aio_request *aio_req = req->r_priv;
+ struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+ int num_pages = calc_pages_for((u64)osd_data->alignment,
+ osd_data->length);
+
+ dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
+ inode, rc, osd_data->length);
+
+ if (rc == -EOLDSNAPC) {
+ struct ceph_aio_work *aio_work;
+ BUG_ON(!aio_req->write);
+
+ aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS);
+ if (aio_work) {
+ INIT_WORK(&aio_work->work, ceph_aio_retry_work);
+ aio_work->req = req;
+ queue_work(ceph_inode_to_client(inode)->wb_wq,
+ &aio_work->work);
+ return;
+ }
+ rc = -ENOMEM;
+ } else if (!aio_req->write) {
+ if (rc == -ENOENT)
+ rc = 0;
+ if (rc >= 0 && osd_data->length > rc) {
+ int zoff = osd_data->alignment + rc;
+ int zlen = osd_data->length - rc;
+ /*
+ * If read is satisfied by single OSD request,
+ * it can pass EOF. Otherwise read is within
+ * i_size.
+ */
+ if (aio_req->num_reqs == 1) {
+ loff_t i_size = i_size_read(inode);
+ loff_t endoff = aio_req->iocb->ki_pos + rc;
+ if (endoff < i_size)
+ zlen = min_t(size_t, zlen,
+ i_size - endoff);
+ aio_req->total_len = rc + zlen;
+ }
+
+ if (zlen > 0)
+ ceph_zero_page_vector_range(zoff, zlen,
+ osd_data->pages);
+ }
+ }
+
+ ceph_put_page_vector(osd_data->pages, num_pages, false);
+ ceph_osdc_put_request(req);
+
+ if (rc < 0)
+ cmpxchg(&aio_req->error, 0, rc);
+
+ ceph_aio_complete(inode, aio_req);
+ return;
+}
+
+static void ceph_aio_retry_work(struct work_struct *work)
+{
+ struct ceph_aio_work *aio_work =
+ container_of(work, struct ceph_aio_work, work);
+ struct ceph_osd_request *orig_req = aio_work->req;
+ struct ceph_aio_request *aio_req = orig_req->r_priv;
+ struct inode *inode = orig_req->r_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_snap_context *snapc;
+ struct ceph_osd_request *req;
+ int ret;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (__ceph_have_pending_cap_snap(ci)) {
+ struct ceph_cap_snap *capsnap =
+ list_last_entry(&ci->i_cap_snaps,
+ struct ceph_cap_snap,
+ ci_item);
+ snapc = ceph_get_snap_context(capsnap->context);
+ } else {
+ BUG_ON(!ci->i_head_snapc);
+ snapc = ceph_get_snap_context(ci->i_head_snapc);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+
+ req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
+ false, GFP_NOFS);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ req = orig_req;
+ goto out;
+ }
+
+ req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
+ CEPH_OSD_FLAG_ONDISK |
+ CEPH_OSD_FLAG_WRITE;
+ req->r_base_oloc = orig_req->r_base_oloc;
+ req->r_base_oid = orig_req->r_base_oid;
+
+ req->r_ops[0] = orig_req->r_ops[0];
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+
+ ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
+ snapc, CEPH_NOSNAP, &aio_req->mtime);
+
+ ceph_put_snap_context(snapc);
+ ceph_osdc_put_request(orig_req);
+
+ req->r_callback = ceph_aio_complete_req;
+ req->r_inode = inode;
+ req->r_priv = aio_req;
+
+ ret = ceph_osdc_start_request(req->r_osdc, req, false);
+out:
+ if (ret < 0) {
+ BUG_ON(ret == -EOLDSNAPC);
+ req->r_result = ret;
+ ceph_aio_complete_req(req, NULL);
+ }
+
+ kfree(aio_work);
+}
+
/*
* Write commit request unsafe callback, called to tell us when a
* request is unsafe (that is, in flight--has been handed to the
}
-/*
- * Synchronous write, straight from __user pointer or user pages.
- *
- * If write spans object boundary, just do multiple writes. (For a
- * correct atomic write, we should e.g. take write locks on all
- * objects, rollback on failure, etc.)
- */
static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
- struct ceph_snap_context *snapc)
+ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+ struct ceph_snap_context *snapc,
+ struct ceph_cap_flush **pcf)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct ceph_vino vino;
struct ceph_osd_request *req;
struct page **pages;
- int num_pages;
- int written = 0;
+ struct ceph_aio_request *aio_req = NULL;
+ int num_pages = 0;
int flags;
- int check_caps = 0;
int ret;
struct timespec mtime = CURRENT_TIME;
- size_t count = iov_iter_count(from);
+ size_t count = iov_iter_count(iter);
+ loff_t pos = iocb->ki_pos;
+ bool write = iov_iter_rw(iter) == WRITE;
- if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+ if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
- dout("sync_direct_write on file %p %lld~%u\n", file, pos,
- (unsigned)count);
+ dout("sync_direct_read_write (%s) on file %p %lld~%u\n",
+ (write ? "write" : "read"), file, pos, (unsigned)count);
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
if (ret < 0)
return ret;
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- pos >> PAGE_CACHE_SHIFT,
- (pos + count) >> PAGE_CACHE_SHIFT);
- if (ret < 0)
- dout("invalidate_inode_pages2_range returned %d\n", ret);
+ if (write) {
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + count) >> PAGE_CACHE_SHIFT);
+ if (ret < 0)
+ dout("invalidate_inode_pages2_range returned %d\n", ret);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ flags = CEPH_OSD_FLAG_ORDERSNAP |
+ CEPH_OSD_FLAG_ONDISK |
+ CEPH_OSD_FLAG_WRITE;
+ } else {
+ flags = CEPH_OSD_FLAG_READ;
+ }
- while (iov_iter_count(from) > 0) {
- u64 len = dio_get_pagev_size(from);
- size_t start;
- ssize_t n;
+ while (iov_iter_count(iter) > 0) {
+ u64 size = dio_get_pagev_size(iter);
+ size_t start = 0;
+ ssize_t len;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len, 0,
- 2,/*include a 'startsync' command*/
- CEPH_OSD_OP_WRITE, flags, snapc,
+ vino, pos, &size, 0,
+ /*include a 'startsync' command*/
+ write ? 2 : 1,
+ write ? CEPH_OSD_OP_WRITE :
+ CEPH_OSD_OP_READ,
+ flags, snapc,
ci->i_truncate_seq,
ci->i_truncate_size,
false);
break;
}
- osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
-
- n = len;
- pages = dio_get_pages_alloc(from, len, &start, &num_pages);
+ len = size;
+ pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
if (IS_ERR(pages)) {
ceph_osdc_put_request(req);
ret = PTR_ERR(pages);
}
/*
- * throw out any page cache pages in this range. this
- * may block.
+ * To simplify error handling, allow AIO when IO within i_size
+ * or IO can be satisfied by single OSD request.
*/
- truncate_inode_pages_range(inode->i_mapping, pos,
- (pos+n) | (PAGE_CACHE_SIZE-1));
- osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
- false, false);
+ if (pos == iocb->ki_pos && !is_sync_kiocb(iocb) &&
+ (len == count || pos + count <= i_size_read(inode))) {
+ aio_req = kzalloc(sizeof(*aio_req), GFP_KERNEL);
+ if (aio_req) {
+ aio_req->iocb = iocb;
+ aio_req->write = write;
+ INIT_LIST_HEAD(&aio_req->osd_reqs);
+ if (write) {
+ aio_req->mtime = mtime;
+ swap(aio_req->prealloc_cf, *pcf);
+ }
+ }
+ /* ignore error */
+ }
+
+ if (write) {
+ /*
+ * throw out any page cache pages in this range. this
+ * may block.
+ */
+ truncate_inode_pages_range(inode->i_mapping, pos,
+ (pos+len) | (PAGE_CACHE_SIZE - 1));
+
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+ }
+
+
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
+ false, false);
- /* BUG_ON(vino.snap != CEPH_NOSNAP); */
ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
- ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (aio_req) {
+ aio_req->total_len += len;
+ aio_req->num_reqs++;
+ atomic_inc(&aio_req->pending_reqs);
+
+ req->r_callback = ceph_aio_complete_req;
+ req->r_inode = inode;
+ req->r_priv = aio_req;
+ list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
+
+ pos += len;
+ iov_iter_advance(iter, len);
+ continue;
+ }
+
+ ret = ceph_osdc_start_request(req->r_osdc, req, false);
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ size = i_size_read(inode);
+ if (!write) {
+ if (ret == -ENOENT)
+ ret = 0;
+ if (ret >= 0 && ret < len && pos + ret < size) {
+ int zlen = min_t(size_t, len - ret,
+ size - pos - ret);
+ ceph_zero_page_vector_range(start + ret, zlen,
+ pages);
+ ret += zlen;
+ }
+ if (ret >= 0)
+ len = ret;
+ }
+
ceph_put_page_vector(pages, num_pages, false);
ceph_osdc_put_request(req);
- if (ret)
+ if (ret < 0)
break;
- pos += n;
- written += n;
- iov_iter_advance(from, n);
- if (pos > i_size_read(inode)) {
- check_caps = ceph_inode_set_size(inode, pos);
- if (check_caps)
+ pos += len;
+ iov_iter_advance(iter, len);
+
+ if (!write && pos >= size)
+ break;
+
+ if (write && pos > size) {
+ if (ceph_inode_set_size(inode, pos))
ceph_check_caps(ceph_inode(inode),
CHECK_CAPS_AUTHONLY,
NULL);
}
}
- if (ret != -EOLDSNAPC && written > 0) {
+ if (aio_req) {
+ if (aio_req->num_reqs == 0) {
+ kfree(aio_req);
+ return ret;
+ }
+
+ ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR :
+ CEPH_CAP_FILE_RD);
+
+ while (!list_empty(&aio_req->osd_reqs)) {
+ req = list_first_entry(&aio_req->osd_reqs,
+ struct ceph_osd_request,
+ r_unsafe_item);
+ list_del_init(&req->r_unsafe_item);
+ if (ret >= 0)
+ ret = ceph_osdc_start_request(req->r_osdc,
+ req, false);
+ if (ret < 0) {
+ BUG_ON(ret == -EOLDSNAPC);
+ req->r_result = ret;
+ ceph_aio_complete_req(req, NULL);
+ }
+ }
+ return -EIOCBQUEUED;
+ }
+
+ if (ret != -EOLDSNAPC && pos > iocb->ki_pos) {
+ ret = pos - iocb->ki_pos;
iocb->ki_pos = pos;
- ret = written;
}
return ret;
}
-
/*
* Synchronous write, straight from __user pointer or user pages.
*
ceph_cap_string(got));
if (ci->i_inline_version == CEPH_INLINE_NONE) {
- /* hmm, this isn't really async... */
- ret = ceph_sync_read(iocb, to, &retry_op);
+ if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+ ret = ceph_direct_read_write(iocb, to,
+ NULL, NULL);
+ if (ret >= 0 && ret < len)
+ retry_op = CHECK_EOF;
+ } else {
+ ret = ceph_sync_read(iocb, to, &retry_op);
+ }
} else {
retry_op = READ_INLINE;
}
pinned_page = NULL;
}
ceph_put_cap_refs(ci, got);
- if (retry_op && ret >= 0) {
+ if (retry_op > HAVE_RETRIED && ret >= 0) {
int statret;
struct page *page = NULL;
loff_t i_size;
if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
ret < len) {
dout("sync_read hit hole, ppos %lld < size %lld"
- ", reading more\n", iocb->ki_pos,
- inode->i_size);
+ ", reading more\n", iocb->ki_pos, i_size);
read += ret;
len -= ret;
- retry_op = 0;
+ retry_op = HAVE_RETRIED;
goto again;
}
}
if (!prealloc_cf)
return -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
}
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
- inode, ceph_vinop(inode), pos, count, inode->i_size);
+ inode, ceph_vinop(inode), pos, count, i_size_read(inode));
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
else
(iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
struct ceph_snap_context *snapc;
struct iov_iter data;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
spin_lock(&ci->i_ceph_lock);
if (__ceph_have_pending_cap_snap(ci)) {
/* we might need to revert back to that point */
data = *from;
if (iocb->ki_flags & IOCB_DIRECT)
- written = ceph_sync_direct_write(iocb, &data, pos,
- snapc);
+ written = ceph_direct_read_write(iocb, &data, snapc,
+ &prealloc_cf);
else
written = ceph_sync_write(iocb, &data, pos, snapc);
if (written == -EOLDSNAPC) {
"got EOLDSNAPC, retrying\n",
inode, ceph_vinop(inode),
pos, (unsigned)count);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
goto retry_snap;
}
if (written > 0)
iov_iter_advance(from, written);
ceph_put_snap_context(snapc);
} else {
- loff_t old_size = inode->i_size;
+ loff_t old_size = i_size_read(inode);
/*
* No need to acquire the i_truncate_mutex. Because
* the MDS revokes Fwb caps before sending truncate
written = generic_perform_write(file, from, pos);
if (likely(written >= 0))
iocb->ki_pos = pos + written;
- if (inode->i_size > old_size)
+ if (i_size_read(inode) > old_size)
ceph_fscache_update_objectsize(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (written >= 0) {
goto out_unlocked;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out_unlocked:
ceph_free_cap_flush(prealloc_cf);
current->backing_dev_info = NULL;
static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
+ loff_t i_size;
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
}
}
+ i_size = i_size_read(inode);
switch (whence) {
case SEEK_END:
- offset += inode->i_size;
+ offset += i_size;
break;
case SEEK_CUR:
/*
offset += file->f_pos;
break;
case SEEK_DATA:
- if (offset >= inode->i_size) {
+ if (offset >= i_size) {
ret = -ENXIO;
goto out;
}
break;
case SEEK_HOLE:
- if (offset >= inode->i_size) {
+ if (offset >= i_size) {
ret = -ENXIO;
goto out;
}
- offset = inode->i_size;
+ offset = i_size;
break;
}
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
if (!prealloc_cf)
return -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (ceph_snap(inode) != CEPH_NOSNAP) {
ret = -EROFS;
ceph_put_cap_refs(ci, got);
unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ceph_free_cap_flush(prealloc_cf);
return ret;
}
if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
(truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
dout("size %lld -> %llu\n", inode->i_size, size);
- inode->i_size = size;
+ i_size_write(inode, size);
inode->i_blocks = (size + (1<<9) - 1) >> 9;
ci->i_reported_size = size;
if (truncate_seq != ci->i_truncate_seq) {
spin_unlock(&ci->i_ceph_lock);
err = -EINVAL;
- if (WARN_ON(symlen != inode->i_size))
+ if (WARN_ON(symlen != i_size_read(inode)))
goto out;
err = -ENOMEM;
spin_lock(&ci->i_ceph_lock);
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
- inode->i_size = size;
+ i_size_write(inode, size);
inode->i_blocks = (size + (1 << 9) - 1) >> 9;
/* tell the MDS if we are approaching max_size */
inode->i_size, attr->ia_size);
if ((issued & CEPH_CAP_FILE_EXCL) &&
attr->ia_size > inode->i_size) {
- inode->i_size = attr->ia_size;
+ i_size_write(inode, attr->ia_size);
inode->i_blocks =
(attr->ia_size + (1 << 9) - 1) >> 9;
inode->i_ctime = attr->ia_ctime;
vaf.fmt = fmt;
vaf.va = &args;
- pr_err("CIFS VFS: %pV", &vaf);
+ pr_err_ratelimited("CIFS VFS: %pV", &vaf);
va_end(args);
}
/* information message: e.g., configuration, major event */
#define cifs_dbg(type, fmt, ...) \
do { \
- if (type == FYI) { \
- if (cifsFYI & CIFS_INFO) { \
- pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \
- } \
+ if (type == FYI && cifsFYI & CIFS_INFO) { \
+ pr_debug_ratelimited("%s: " \
+ fmt, __FILE__, ##__VA_ARGS__); \
} else if (type == VFS) { \
cifs_vfs_err(fmt, ##__VA_ARGS__); \
} else if (type == NOISY && type != 0) { \
- pr_debug(fmt, ##__VA_ARGS__); \
+ pr_debug_ratelimited(fmt, ##__VA_ARGS__); \
} \
} while (0)
seq_printf(s, ",rsize=%u", cifs_sb->rsize);
seq_printf(s, ",wsize=%u", cifs_sb->wsize);
+ seq_printf(s, ",echo_interval=%lu",
+ tcon->ses->server->echo_interval / HZ);
/* convert actimeo and display it in seconds */
seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
while (*s && *s != sep)
s++;
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
child = lookup_one_len(p, dentry, s - p);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(dentry);
dentry = child;
} while (!IS_ERR(dentry));
ssize_t rc;
struct inode *inode = file_inode(iocb->ki_filp);
+ if (iocb->ki_filp->f_flags & O_DIRECT)
+ return cifs_user_readv(iocb, iter);
+
rc = cifs_revalidate_mapping(inode);
if (rc)
return rc;
ssize_t written;
int rc;
+ if (iocb->ki_filp->f_flags & O_DIRECT) {
+ written = cifs_user_writev(iocb, from);
+ if (written > 0 && CIFS_CACHE_READ(cinode)) {
+ cifs_zap_mapping(inode);
+ cifs_dbg(FYI,
+ "Set no oplock for inode=%p after a write operation\n",
+ inode);
+ cinode->oplock = 0;
+ }
+ return written;
+ }
+
written = cifs_get_writer(cinode);
if (written)
return written;
#define SERVER_NAME_LENGTH 40
#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
-/* SMB echo "timeout" -- FIXME: tunable? */
-#define SMB_ECHO_INTERVAL (60 * HZ)
+/* echo interval in seconds */
+#define SMB_ECHO_INTERVAL_MIN 1
+#define SMB_ECHO_INTERVAL_MAX 600
+#define SMB_ECHO_INTERVAL_DEFAULT 60
#include "cifspdu.h"
void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
/* verify the message */
- int (*check_message)(char *, unsigned int);
+ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
void (*downgrade_oplock)(struct TCP_Server_Info *,
struct cifsInodeInfo *, bool);
struct sockaddr_storage dstaddr; /* destination address */
struct sockaddr_storage srcaddr; /* allow binding to a local IP */
struct nls_table *local_nls;
+ unsigned int echo_interval; /* echo interval in secs */
};
#define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
#ifdef CONFIG_CIFS_SMB2
unsigned int max_read;
unsigned int max_write;
+ __u8 preauth_hash[512];
#endif /* CONFIG_CIFS_SMB2 */
+ unsigned long echo_interval;
};
static inline unsigned int
bool need_reconnect:1; /* connection reset, uid now invalid */
#ifdef CONFIG_CIFS_SMB2
__u16 session_flags;
- char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
+ __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+ __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
+ __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+ __u8 preauth_hash[512];
#endif /* CONFIG_CIFS_SMB2 */
};
struct smb_hdr *out_buf,
int *bytes_returned);
extern int cifs_reconnect(struct TCP_Server_Info *server);
-extern int checkSMB(char *buf, unsigned int length);
+extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
extern bool backup_cred(struct cifs_sb_info *);
extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
extern int calc_seckey(struct cifs_ses *);
-extern int generate_smb3signingkey(struct cifs_ses *);
+extern int generate_smb30signingkey(struct cifs_ses *);
+extern int generate_smb311signingkey(struct cifs_ses *);
#ifdef CONFIG_CIFS_WEAK_PW_HASH
extern int calc_lanman_hash(const char *password, const char *cryptkey,
Opt_cruid, Opt_gid, Opt_file_mode,
Opt_dirmode, Opt_port,
Opt_rsize, Opt_wsize, Opt_actimeo,
+ Opt_echo_interval,
/* Mount options which take string value */
Opt_user, Opt_pass, Opt_ip,
{ Opt_rsize, "rsize=%s" },
{ Opt_wsize, "wsize=%s" },
{ Opt_actimeo, "actimeo=%s" },
+ { Opt_echo_interval, "echo_interval=%s" },
{ Opt_blank_user, "user=" },
{ Opt_blank_user, "username=" },
server->session_key.response = NULL;
server->session_key.len = 0;
server->lstrp = jiffies;
- mutex_unlock(&server->srv_mutex);
/* mark submitted MIDs for retry and issue callback */
INIT_LIST_HEAD(&retry_list);
list_move(&mid_entry->qhead, &retry_list);
}
spin_unlock(&GlobalMid_Lock);
+ mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
list_for_each_safe(tmp, tmp2, &retry_list) {
int rc;
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, echo.work);
+ unsigned long echo_interval = server->echo_interval;
/*
* We cannot send an echo if it is disabled or until the
*/
if (!server->ops->need_neg || server->ops->need_neg(server) ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
- time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+ time_before(jiffies, server->lstrp + echo_interval - HZ))
goto requeue_echo;
rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
server->hostname);
requeue_echo:
- queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
+ queue_delayed_work(cifsiod_wq, &server->echo, echo_interval);
}
static bool
* a response in >60s.
*/
if (server->tcpStatus == CifsGood &&
- time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
- cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
- server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
+ time_after(jiffies, server->lstrp + 2 * server->echo_interval)) {
+ cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n",
+ server->hostname, (2 * server->echo_interval) / HZ);
cifs_reconnect(server);
wake_up(&server->response_q);
return true;
* 48 bytes is enough to display the header and a little bit
* into the payload for debugging purposes.
*/
- length = server->ops->check_message(buf, server->total_read);
+ length = server->ops->check_message(buf, server->total_read, server);
if (length != 0)
cifs_dump_mem("Bad SMB: ", buf,
min_t(unsigned int, server->total_read, 48));
goto cifs_parse_mount_err;
}
break;
+ case Opt_echo_interval:
+ if (get_option_ul(args, &option)) {
+ cifs_dbg(VFS, "%s: Invalid echo interval value\n",
+ __func__);
+ goto cifs_parse_mount_err;
+ }
+ vol->echo_interval = option;
+ break;
/* String Arguments */
if (!match_security(server, vol))
return 0;
+ if (server->echo_interval != vol->echo_interval)
+ return 0;
+
return 1;
}
tcp_ses->tcpStatus = CifsNew;
++tcp_ses->srv_count;
+ if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
+ volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX)
+ tcp_ses->echo_interval = volume_info->echo_interval * HZ;
+ else
+ tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
+
rc = ip_connect(tcp_ses);
if (rc < 0) {
cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
cifs_fscache_get_client_cookie(tcp_ses);
/* queue echo request delayed work */
- queue_delayed_work(cifsiod_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL);
+ queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
return tcp_ses;
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xid = get_xid();
}
free_xid(xid);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xid = get_xid();
}
free_xid(xid);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
* with a brlock that prevents writing.
*/
down_read(&cinode->lock_sem);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
rc = generic_write_checks(iocb, from);
if (rc <= 0)
else
rc = -EACCES;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (rc > 0) {
ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
}
} else
fattr.cf_uniqueid = iunique(sb, ROOT_I);
- } else
- fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ } else {
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+ validinum == false && server->ops->get_srv_inum) {
+ /*
+ * Pass a NULL tcon to ensure we don't make a round
+ * trip to the server. This only works for SMB2+.
+ */
+ tmprc = server->ops->get_srv_inum(xid,
+ NULL, cifs_sb, full_path,
+ &fattr.cf_uniqueid, data);
+ if (tmprc)
+ fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ } else
+ fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ }
/* query for SFU type info if supported and needed */
if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
} else {
/* we already have inode, update it */
+ /* if uniqueid is different, return error */
+ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+ CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+ rc = -ESTALE;
+ goto cgii_exit;
+ }
+
/* if filetype is different, return error */
if (unlikely(((*inode)->i_mode & S_IFMT) !=
(fattr.cf_mode & S_IFMT))) {
}
int
-checkSMB(char *buf, unsigned int total_read)
+checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
{
struct smb_hdr *smb = (struct smb_hdr *)buf;
__u32 rfclen = be32_to_cpu(smb->smb_buf_length);
* if buggy server returns . and .. late do we want to
* check for that here?
*/
+ *tmp_buf = 0;
rc = cifs_filldir(current_entry, file, ctx,
tmp_buf, max_len);
if (rc) {
* Make sure that this really is an SMB, that it is a response,
* and that the message ids match.
*/
- if ((*(__le32 *)hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
+ if ((hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
(mid == wire_mid)) {
if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
return 0;
cifs_dbg(VFS, "Received Request not response\n");
}
} else { /* bad signature or mid */
- if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
cifs_dbg(VFS, "Bad protocol string signature header %x\n",
- *(unsigned int *) hdr->ProtocolId);
+ le32_to_cpu(hdr->ProtocolId));
if (mid != wire_mid)
cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
mid, wire_mid);
};
int
-smb2_check_message(char *buf, unsigned int length)
+smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
{
struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
- __u64 mid = le64_to_cpu(hdr->MessageId);
+ __u64 mid;
__u32 len = get_rfc1002_length(buf);
__u32 clc_len; /* calculated length */
int command;
* ie Validate the wct via smb2_struct_sizes table above
*/
+ if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+ struct smb2_transform_hdr *thdr =
+ (struct smb2_transform_hdr *)buf;
+ struct cifs_ses *ses = NULL;
+ struct list_head *tmp;
+
+ /* decrypt frame now that it is completely read in */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &srvr->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ if (ses->Suid == thdr->SessionId)
+ break;
+
+ ses = NULL;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+ if (ses == NULL) {
+ cifs_dbg(VFS, "no decryption - session id not found\n");
+ return 1;
+ }
+ }
+
+
+ mid = le64_to_cpu(hdr->MessageId);
if (length < sizeof(struct smb2_pdu)) {
if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
pdu->StructureSize2 = 0;
/* return pointer to beginning of data area, ie offset from SMB start */
if ((*off != 0) && (*len != 0))
- return (char *)(&hdr->ProtocolId[0]) + *off;
+ return (char *)(&hdr->ProtocolId) + *off;
else
return NULL;
}
struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
__u64 wire_mid = le64_to_cpu(hdr->MessageId);
+ if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+ cifs_dbg(VFS, "encrypted frame parsing not supported yet");
+ return NULL;
+ }
+
spin_lock(&GlobalMid_Lock);
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
if ((mid->mid == wire_mid) &&
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
- .generate_signingkey = generate_smb3signingkey,
+ .generate_signingkey = generate_smb30signingkey,
.calc_signature = smb3_calc_signature,
.set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
- .generate_signingkey = generate_smb3signingkey,
+ .generate_signingkey = generate_smb311signingkey,
.calc_signature = smb3_calc_signature,
.set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
struct smb_version_values smb30_values = {
.version_string = SMB30_VERSION_STRING,
.protocol_id = SMB30_PROT_ID,
- .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
struct smb_version_values smb302_values = {
.version_string = SMB302_VERSION_STRING,
.protocol_id = SMB302_PROT_ID,
- .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
hdr->smb2_buf_length = cpu_to_be32(parmsize + sizeof(struct smb2_hdr)
- 4 /* RFC 1001 length field itself not counted */);
- hdr->ProtocolId[0] = 0xFE;
- hdr->ProtocolId[1] = 'S';
- hdr->ProtocolId[2] = 'M';
- hdr->ProtocolId[3] = 'B';
+ hdr->ProtocolId = SMB2_PROTO_NUMBER;
hdr->StructureSize = cpu_to_le16(64);
hdr->Command = smb2_cmd;
hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
goto ioctl_exit;
}
- memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
+ memcpy(*out_data,
+ (char *)&rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
*plen);
ioctl_exit:
free_rsp_buf(resp_buftype, rsp);
}
if (*buf) {
- memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset,
+ memcpy(*buf, (char *)&rsp->hdr.ProtocolId + rsp->DataOffset,
*nbytes);
free_rsp_buf(resp_buftype, iov[0].iov_base);
} else if (resp_buftype != CIFS_NO_BUFFER) {
#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
+#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
/*
* SMB2 Header Definition
__be32 smb2_buf_length; /* big endian on wire */
/* length is only two or three bytes - with
one or two byte type preceding it that MBZ */
- __u8 ProtocolId[4]; /* 0xFE 'S' 'M' 'B' */
+ __le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */
__le16 StructureSize; /* 64 */
__le16 CreditCharge; /* MBZ */
__le32 Status; /* Error from server */
one or two byte type preceding it that MBZ */
__u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */
__u8 Signature[16];
- __u8 Nonce[11];
- __u8 Reserved[5];
+ __u8 Nonce[16];
__le32 OriginalMessageSize;
__u16 Reserved1;
- __le16 EncryptionAlgorithm;
+ __le16 Flags; /* EncryptionAlgorithm */
__u64 SessionId;
} __packed;
*****************************************************************
*/
extern int map_smb2_to_linux_error(char *buf, bool log_err);
-extern int smb2_check_message(char *buf, unsigned int length);
+extern int smb2_check_message(char *buf, unsigned int length,
+ struct TCP_Server_Info *server);
extern unsigned int smb2_calc_size(void *buf);
extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
extern __le16 *cifs_convert_path_to_utf16(const char *from,
return rc;
}
-int
-generate_smb3signingkey(struct cifs_ses *ses)
+static int generate_key(struct cifs_ses *ses, struct kvec label,
+ struct kvec context, __u8 *key, unsigned int key_size)
{
unsigned char zero = 0x0;
__u8 i[4] = {0, 0, 0, 1};
unsigned char *hashptr = prfhash;
memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
- memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
+ memset(key, 0x0, key_size);
rc = smb3_crypto_shash_allocate(ses->server);
if (rc) {
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
- "SMB2AESCMAC", 12);
+ label.iov_base, label.iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
goto smb3signkey_ret;
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
- "SmbSign", 8);
+ context.iov_base, context.iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
goto smb3signkey_ret;
goto smb3signkey_ret;
}
- memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
+ memcpy(key, hashptr, key_size);
smb3signkey_ret:
return rc;
}
+struct derivation {
+ struct kvec label;
+ struct kvec context;
+};
+
+struct derivation_triplet {
+ struct derivation signing;
+ struct derivation encryption;
+ struct derivation decryption;
+};
+
+static int
+generate_smb3signingkey(struct cifs_ses *ses,
+ const struct derivation_triplet *ptriplet)
+{
+ int rc;
+
+ rc = generate_key(ses, ptriplet->signing.label,
+ ptriplet->signing.context, ses->smb3signingkey,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ rc = generate_key(ses, ptriplet->encryption.label,
+ ptriplet->encryption.context, ses->smb3encryptionkey,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ return generate_key(ses, ptriplet->decryption.label,
+ ptriplet->decryption.context,
+ ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+}
+
+int
+generate_smb30signingkey(struct cifs_ses *ses)
+
+{
+ struct derivation_triplet triplet;
+ struct derivation *d;
+
+ d = &triplet.signing;
+ d->label.iov_base = "SMB2AESCMAC";
+ d->label.iov_len = 12;
+ d->context.iov_base = "SmbSign";
+ d->context.iov_len = 8;
+
+ d = &triplet.encryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+ d = &triplet.decryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerOut";
+ d->context.iov_len = 10;
+
+ return generate_smb3signingkey(ses, &triplet);
+}
+
+int
+generate_smb311signingkey(struct cifs_ses *ses)
+
+{
+ struct derivation_triplet triplet;
+ struct derivation *d;
+
+ d = &triplet.signing;
+ d->label.iov_base = "SMB2AESCMAC";
+ d->label.iov_len = 12;
+ d->context.iov_base = "SmbSign";
+ d->context.iov_len = 8;
+
+ d = &triplet.encryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+ d = &triplet.decryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerOut";
+ d->context.iov_len = 10;
+
+ return generate_smb3signingkey(ses, &triplet);
+}
+
int
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
cifs_in_send_dec(server);
cifs_save_when_sent(mid);
- if (rc < 0)
+ if (rc < 0) {
server->sequence_number -= 2;
+ cifs_delete_mid(mid);
+ }
+
mutex_unlock(&server->srv_mutex);
if (rc == 0)
return 0;
- cifs_delete_mid(mid);
add_credits_and_wake_if(server, credits, optype);
return rc;
}
} while (0)
-#define CODA_FREE(ptr,size) \
- do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
+#define CODA_FREE(ptr, size) kvfree((ptr))
/* inode to cnode access functions */
if (host_file->f_op->iterate) {
struct inode *host_inode = file_inode(host_file);
- mutex_lock(&host_inode->i_mutex);
+ inode_lock(host_inode);
ret = -ENOENT;
if (!IS_DEADDIR(host_inode)) {
ret = host_file->f_op->iterate(host_file, ctx);
file_accessed(host_file);
}
- mutex_unlock(&host_inode->i_mutex);
+ inode_unlock(host_inode);
return ret;
}
/* Venus: we must read Venus dirents from a file */
host_file = cfi->cfi_container;
file_start_write(host_file);
- mutex_lock(&coda_inode->i_mutex);
+ inode_lock(coda_inode);
ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
- mutex_unlock(&coda_inode->i_mutex);
+ inode_unlock(coda_inode);
file_end_write(host_file);
return ret;
}
err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&coda_inode->i_mutex);
+ inode_lock(coda_inode);
cfi = CODA_FTOC(coda_file);
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
err = vfs_fsync(host_file, datasync);
if (!err && !datasync)
err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
- mutex_unlock(&coda_inode->i_mutex);
+ inode_unlock(coda_inode);
return err;
}
child = sd->s_dentry;
- mutex_lock(&d_inode(child)->i_mutex);
+ inode_lock(d_inode(child));
configfs_detach_group(sd->s_element);
d_inode(child)->i_flags |= S_DEAD;
dont_mount(child);
- mutex_unlock(&d_inode(child)->i_mutex);
+ inode_unlock(d_inode(child));
d_delete(child);
dput(child);
* the VFS may already have hit and used them. Thus,
* we must lock them as rmdir() would.
*/
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
configfs_remove_dir(item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
d_delete(dentry);
}
}
* We must also lock the inode to remove it safely in case of
* error, as rmdir() would.
*/
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
configfs_adjust_dir_dirent_depth_before_populate(sd);
ret = populate_groups(to_config_group(item));
if (ret) {
dont_mount(dentry);
}
configfs_adjust_dir_dirent_depth_after_populate(sd);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (ret)
d_delete(dentry);
}
* subsystem is really registered, and so we need to lock out
* configfs_[un]register_subsystem().
*/
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
subsys_sd = configfs_find_subsys_dentry(root->d_fsdata, s_item);
if (!subsys_sd) {
ret = configfs_do_depend_item(subsys_sd->s_dentry, target);
out_unlock_fs:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
/*
* If we succeeded, the fs is pinned via other methods. If not,
* additional locking to prevent other subsystem from being
* unregistered
*/
- mutex_lock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+ inode_lock(d_inode(root->cg_item.ci_dentry));
/*
* As we are trying to depend item from other subsystem
* We were called from subsystem other than our target so we
* took some locks so now it's time to release them
*/
- mutex_unlock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+ inode_unlock(d_inode(root->cg_item.ci_dentry));
return ret;
}
down_write(&configfs_rename_sem);
parent = item->parent->dentry;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
if (!IS_ERR(new_dentry)) {
error = -EEXIST;
dput(new_dentry);
}
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
up_write(&configfs_rename_sem);
return error;
struct configfs_dirent * parent_sd = dentry->d_fsdata;
int err;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
/*
* Fake invisibility if dir belongs to a group/default groups hierarchy
* being attached
else
err = 0;
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return err;
}
struct dentry * dentry = file->f_path.dentry;
struct configfs_dirent * cursor = file->private_data;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
spin_lock(&configfs_dirent_lock);
list_del_init(&cursor->s_sibling);
spin_unlock(&configfs_dirent_lock);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
release_configfs_dirent(cursor);
{
struct dentry * dentry = file->f_path.dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
switch (whence) {
case 1:
offset += file->f_pos;
if (offset >= 0)
break;
default:
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return -EINVAL;
}
if (offset != file->f_pos) {
spin_unlock(&configfs_dirent_lock);
}
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return offset;
}
parent = parent_group->cg_item.ci_dentry;
- mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
ret = create_default_group(parent_group, group);
if (!ret) {
spin_lock(&configfs_dirent_lock);
configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
spin_unlock(&configfs_dirent_lock);
}
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return ret;
}
EXPORT_SYMBOL(configfs_register_group);
struct dentry *dentry = group->cg_item.ci_dentry;
struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
- mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
spin_lock(&configfs_dirent_lock);
configfs_detach_prep(dentry, NULL);
spin_unlock(&configfs_dirent_lock);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
d_delete(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
dput(dentry);
sd = root->d_fsdata;
link_group(to_config_group(sd->s_element), group);
- mutex_lock_nested(&d_inode(root)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
err = -ENOMEM;
dentry = d_alloc_name(root, group->cg_item.ci_name);
}
}
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
if (err) {
unlink_group(group);
return;
}
- mutex_lock_nested(&d_inode(root)->i_mutex,
+ inode_lock_nested(d_inode(root),
I_MUTEX_PARENT);
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
if (configfs_detach_prep(dentry, NULL)) {
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
d_delete(dentry);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(dentry);
umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
int error = 0;
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_NORMAL);
+ inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL);
error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
CONFIGFS_ITEM_ATTR);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
return error;
}
umode_t mode = (bin_attr->cb_attr.ca_mode & S_IALLUGO) | S_IFREG;
int error = 0;
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
+ inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL);
error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode,
CONFIGFS_ITEM_BIN_ATTR);
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
return error;
}
/* no inode means this hasn't been made visible yet */
return;
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (!sd->s_element)
continue;
break;
}
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
}
#include <linux/memcontrol.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/uio.h>
loff_t end = pos + iov_iter_count(iter);
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
struct address_space *mapping = inode->i_mapping;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = filemap_write_and_wait_range(mapping, pos, end - 1);
if (retval) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
}
retval = dax_io(inode, iter, pos, end, get_block, &bh);
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if ((retval > 0) && end_io)
end_io(iocb, pos, retval, bh.b_private);
return 0;
}
+#define NO_SECTOR -1
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+
+static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
+ sector_t sector, bool pmd_entry, bool dirty)
+{
+ struct radix_tree_root *page_tree = &mapping->page_tree;
+ pgoff_t pmd_index = DAX_PMD_INDEX(index);
+ int type, error = 0;
+ void *entry;
+
+ WARN_ON_ONCE(pmd_entry && !dirty);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ spin_lock_irq(&mapping->tree_lock);
+
+ entry = radix_tree_lookup(page_tree, pmd_index);
+ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
+ index = pmd_index;
+ goto dirty;
+ }
+
+ entry = radix_tree_lookup(page_tree, index);
+ if (entry) {
+ type = RADIX_DAX_TYPE(entry);
+ if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
+ type != RADIX_DAX_PMD)) {
+ error = -EIO;
+ goto unlock;
+ }
+
+ if (!pmd_entry || type == RADIX_DAX_PMD)
+ goto dirty;
+
+ /*
+ * We only insert dirty PMD entries into the radix tree. This
+ * means we don't need to worry about removing a dirty PTE
+ * entry and inserting a clean PMD entry, thus reducing the
+ * range we would flush with a follow-up fsync/msync call.
+ */
+ radix_tree_delete(&mapping->page_tree, index);
+ mapping->nrexceptional--;
+ }
+
+ if (sector == NO_SECTOR) {
+ /*
+ * This can happen during correct operation if our pfn_mkwrite
+ * fault raced against a hole punch operation. If this
+ * happens the pte that was hole punched will have been
+ * unmapped and the radix tree entry will have been removed by
+ * the time we are called, but the call will still happen. We
+ * will return all the way up to wp_pfn_shared(), where the
+ * pte_same() check will fail, eventually causing page fault
+ * to be retried by the CPU.
+ */
+ goto unlock;
+ }
+
+ error = radix_tree_insert(page_tree, index,
+ RADIX_DAX_ENTRY(sector, pmd_entry));
+ if (error)
+ goto unlock;
+
+ mapping->nrexceptional++;
+ dirty:
+ if (dirty)
+ radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ unlock:
+ spin_unlock_irq(&mapping->tree_lock);
+ return error;
+}
+
+static int dax_writeback_one(struct block_device *bdev,
+ struct address_space *mapping, pgoff_t index, void *entry)
+{
+ struct radix_tree_root *page_tree = &mapping->page_tree;
+ int type = RADIX_DAX_TYPE(entry);
+ struct radix_tree_node *node;
+ struct blk_dax_ctl dax;
+ void **slot;
+ int ret = 0;
+
+ spin_lock_irq(&mapping->tree_lock);
+ /*
+ * Regular page slots are stabilized by the page lock even
+ * without the tree itself locked. These unlocked entries
+ * need verification under the tree lock.
+ */
+ if (!__radix_tree_lookup(page_tree, index, &node, &slot))
+ goto unlock;
+ if (*slot != entry)
+ goto unlock;
+
+ /* another fsync thread may have already written back this entry */
+ if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+ goto unlock;
+
+ if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
+ dax.sector = RADIX_DAX_SECTOR(entry);
+ dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+ spin_unlock_irq(&mapping->tree_lock);
+
+ /*
+ * We cannot hold tree_lock while calling dax_map_atomic() because it
+ * eventually calls cond_resched().
+ */
+ ret = dax_map_atomic(bdev, &dax);
+ if (ret < 0)
+ return ret;
+
+ if (WARN_ON_ONCE(ret < dax.size)) {
+ ret = -EIO;
+ goto unmap;
+ }
+
+ wb_cache_pmem(dax.addr, dax.size);
+
+ spin_lock_irq(&mapping->tree_lock);
+ radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
+ spin_unlock_irq(&mapping->tree_lock);
+ unmap:
+ dax_unmap_atomic(bdev, &dax);
+ return ret;
+
+ unlock:
+ spin_unlock_irq(&mapping->tree_lock);
+ return ret;
+}
+
+/*
+ * Flush the mapping to the persistent domain within the byte range of [start,
+ * end]. This is required by data integrity operations to ensure file data is
+ * on persistent storage prior to completion of the operation.
+ */
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+ loff_t end)
+{
+ struct inode *inode = mapping->host;
+ struct block_device *bdev = inode->i_sb->s_bdev;
+ pgoff_t start_index, end_index, pmd_index;
+ pgoff_t indices[PAGEVEC_SIZE];
+ struct pagevec pvec;
+ bool done = false;
+ int i, ret = 0;
+ void *entry;
+
+ if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
+ return -EIO;
+
+ start_index = start >> PAGE_CACHE_SHIFT;
+ end_index = end >> PAGE_CACHE_SHIFT;
+ pmd_index = DAX_PMD_INDEX(start_index);
+
+ rcu_read_lock();
+ entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
+ rcu_read_unlock();
+
+ /* see if the start of our range is covered by a PMD entry */
+ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+ start_index = pmd_index;
+
+ tag_pages_for_writeback(mapping, start_index, end_index);
+
+ pagevec_init(&pvec, 0);
+ while (!done) {
+ pvec.nr = find_get_entries_tag(mapping, start_index,
+ PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
+ pvec.pages, indices);
+
+ if (pvec.nr == 0)
+ break;
+
+ for (i = 0; i < pvec.nr; i++) {
+ if (indices[i] > end_index) {
+ done = true;
+ break;
+ }
+
+ ret = dax_writeback_one(bdev, mapping, indices[i],
+ pvec.pages[i]);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ wmb_pmem();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
+
static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
}
dax_unmap_atomic(bdev, &dax);
+ error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
+ vmf->flags & FAULT_FLAG_WRITE);
+ if (error)
+ goto out;
+
error = vm_insert_mixed(vma, vaddr, dax.pfn);
out:
memset(&bh, 0, sizeof(bh));
block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
+ bh.b_bdev = inode->i_sb->s_bdev;
bh.b_size = PAGE_SIZE;
repeat:
delete_from_page_cache(page);
unlock_page(page);
page_cache_release(page);
+ page = NULL;
}
/*
struct block_device *bdev;
pgoff_t size, pgoff;
sector_t block;
- int result = 0;
+ int error, result = 0;
+ bool alloc = false;
/* dax pmd mappings require pfn_t_devmap() */
if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
}
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
bh.b_size = PMD_SIZE;
- if (get_block(inode, block, &bh, write) != 0)
+
+ if (get_block(inode, block, &bh, 0) != 0)
return VM_FAULT_SIGBUS;
+
+ if (!buffer_mapped(&bh) && write) {
+ if (get_block(inode, block, &bh, 1) != 0)
+ return VM_FAULT_SIGBUS;
+ alloc = true;
+ }
+
bdev = bh.b_bdev;
- i_mmap_lock_read(mapping);
/*
* If the filesystem isn't willing to tell us the length of a hole,
*/
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
dax_pmd_dbg(&bh, address, "allocated block too small");
- goto fallback;
+ return VM_FAULT_FALLBACK;
}
/*
* If we allocated new storage, make sure no process has any
* zero pages covering this hole
*/
- if (buffer_new(&bh)) {
- i_mmap_unlock_read(mapping);
- unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
- i_mmap_lock_read(mapping);
+ if (alloc) {
+ loff_t lstart = pgoff << PAGE_SHIFT;
+ loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
+
+ truncate_pagecache_range(inode, lstart, lend);
}
+ i_mmap_lock_read(mapping);
+
/*
* If a truncate happened while we were allocating blocks, we may
* leave blocks allocated to the file that are beyond EOF. We can't
goto out;
}
if ((pgoff | PG_PMD_COLOUR) >= size) {
- dax_pmd_dbg(&bh, address, "pgoff unaligned");
+ dax_pmd_dbg(&bh, address,
+ "offset + huge page size > file size");
goto fallback;
}
}
dax_unmap_atomic(bdev, &dax);
+ /*
+ * For PTE faults we insert a radix tree entry for reads, and
+ * leave it clean. Then on the first write we dirty the radix
+ * tree entry via the dax_pfn_mkwrite() path. This sequence
+ * allows the dax_pfn_mkwrite() call to be simpler and avoid a
+ * call into get_block() to translate the pgoff to a sector in
+ * order to be able to create a new radix tree entry.
+ *
+ * The PMD path doesn't have an equivalent to
+ * dax_pfn_mkwrite(), though, so for a read followed by a
+ * write we traverse all the way through __dax_pmd_fault()
+ * twice. This means we can just skip inserting a radix tree
+ * entry completely on the initial read and just wait until
+ * the write to insert a dirty entry.
+ */
+ if (write) {
+ error = dax_radix_entry(mapping, pgoff, dax.sector,
+ true, true);
+ if (error) {
+ dax_pmd_dbg(&bh, address,
+ "PMD radix insertion failed");
+ goto fallback;
+ }
+ }
+
dev_dbg(part_to_dev(bdev->bd_part),
"%s: %s addr: %lx pfn: %lx sect: %llx\n",
__func__, current->comm, address,
* dax_pfn_mkwrite - handle first write to DAX page
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
- *
*/
int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ struct file *file = vma->vm_file;
- sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
- sb_end_pagefault(sb);
+ /*
+ * We pass NO_SECTOR to dax_radix_entry() because we expect that a
+ * RADIX_DAX_PTE entry already exists in the radix tree from a
+ * previous call to __dax_fault(). We just want to look up that PTE
+ * entry using vmf->pgoff and make sure the dirty tag is set. This
+ * saves us from having to make a call to get_block() here to look
+ * up the sector.
+ */
+ dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
BUG_ON((offset + length) > PAGE_CACHE_SIZE);
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
bh.b_size = PAGE_CACHE_SIZE;
err = get_block(inode, index, &bh, 0);
if (err < 0)
*/
void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
{
- BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
+ BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
spin_lock(&dentry->d_lock);
if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
goto out_err;
m1 = &dentry->d_sb->s_vfs_rename_mutex;
- if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+ if (!inode_trylock(alias->d_parent->d_inode))
goto out_err;
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
if (!parent)
parent = debugfs_mount->mnt_root;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
dput(dentry);
}
if (IS_ERR(dentry)) {
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
static struct dentry *failed_creating(struct dentry *dentry)
{
- mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+ inode_unlock(d_inode(dentry->d_parent));
dput(dentry);
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
return NULL;
static struct dentry *end_creating(struct dentry *dentry)
{
- mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+ inode_unlock(d_inode(dentry->d_parent));
return dentry;
}
if (!parent || d_really_is_negative(parent))
return;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
ret = __debugfs_remove(dentry, parent);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!ret)
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
parent = dentry;
down:
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
loop:
/*
* The parent->d_subdirs is protected by the d_lock. Outside that
/* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
spin_unlock(&parent->d_lock);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
parent = child;
goto down;
}
}
spin_unlock(&parent->d_lock);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
child = parent;
parent = parent->d_parent;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
if (child != dentry)
/* go up */
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
}
EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
if (!uid_valid(root_uid) || !gid_valid(root_gid))
return -EINVAL;
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
/* If we have already created ptmx node, return */
if (fsi->ptmx_dentry) {
fsi->ptmx_dentry = dentry;
rc = 0;
out:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return rc;
}
sprintf(s, "%d", index);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, s);
if (dentry) {
inode = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return inode;
}
BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_find_alias(inode);
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
dput(dentry); /* d_find_alias above */
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
}
static int __init init_devpts_fs(void)
iocb->ki_filp->f_mapping;
/* will be released by direct_io_worker */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = filemap_write_and_wait_range(mapping, offset,
end - 1);
if (retval) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kmem_cache_free(dio_cache, dio);
goto out;
}
dio->i_size = i_size_read(inode);
if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
if (dio->flags & DIO_LOCKING)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kmem_cache_free(dio_cache, dio);
retval = 0;
goto out;
* of protecting us from looking up uninitialized blocks.
*/
if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
- mutex_unlock(&dio->inode->i_mutex);
+ inode_unlock(dio->inode);
/*
* The only time we want to leave bios in flight is when a successful
return -EINVAL;
kbuf = memdup_user_nul(buf, count);
- if (!IS_ERR(kbuf))
+ if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
if (check_version(kbuf)) {
struct dentry *dir;
dir = dget_parent(dentry);
- mutex_lock_nested(&(d_inode(dir)->i_mutex), I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
return dir;
}
static void unlock_dir(struct dentry *dir)
{
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
}
int rc = 0;
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
- mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dir_dentry));
lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
lower_dir_dentry,
ecryptfs_dentry->d_name.len);
- mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dir_dentry));
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
"filename; rc = [%d]\n", __func__, rc);
goto out;
}
- mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dir_dentry));
lower_dentry = lookup_one_len(encrypted_and_encoded_name,
lower_dir_dentry,
encrypted_and_encoded_name_size);
- mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dir_dentry));
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = notify_change(lower_dentry, &lower_ia, NULL);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
}
return rc;
}
if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
lower_ia.ia_valid &= ~ATTR_MODE;
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = notify_change(lower_dentry, &lower_ia, NULL);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
fsstack_copy_attr_all(inode, lower_inode);
return rc;
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value,
size);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->listxattr(lower_dentry, list, size);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->removexattr(lower_dentry, name);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
rc = -ENOMEM;
goto out;
}
- mutex_lock(&lower_inode->i_mutex);
+ inode_lock(lower_inode);
size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
xattr_virt, PAGE_CACHE_SIZE);
if (size < 0)
put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
xattr_virt, size, 0);
- mutex_unlock(&lower_inode->i_mutex);
+ inode_unlock(lower_inode);
if (rc)
printk(KERN_ERR "Error whilst attempting to write inode size "
"to lower file xattr; rc = [%d]\n", rc);
d_delete(file->f_path.dentry);
dput(file->f_path.dentry);
} else {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, datasize + sizeof(attributes));
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
bytes = count;
efivar_entry_size(entry, &size);
efivar_entry_add(entry, &efivarfs_list);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode->i_private = entry;
i_size_write(inode, size + sizeof(entry->var.Attributes));
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
d_add(dentry, inode);
return 0;
return;
/* Be careful if suid/sgid is set */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* reload atomically mode/uid/gid now that lock held */
mode = inode->i_mode;
uid = inode->i_uid;
gid = inode->i_gid;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* We ignore suid/sgid if there are no mappings for them in the ns */
if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = sync_inode_metadata(filp->f_mapping->host, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
int err;
parent = ERR_PTR(-EACCES);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
if (mnt->mnt_sb->s_export_op->get_parent)
parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
if (IS_ERR(parent)) {
dprintk("%s: get_parent of %ld failed, err %d\n",
if (err)
goto out_err;
dprintk("%s: found name: %s\n", __func__, nbuf);
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (IS_ERR(tmp)) {
dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
goto out_err;
*/
err = exportfs_get_name(mnt, target_dir, nbuf, result);
if (!err) {
- mutex_lock(&target_dir->d_inode->i_mutex);
+ inode_lock(target_dir->d_inode);
nresult = lookup_one_len(nbuf, target_dir,
strlen(nbuf));
- mutex_unlock(&target_dir->d_inode->i_mutex);
+ inode_unlock(target_dir->d_inode);
if (!IS_ERR(nresult)) {
if (nresult->d_inode) {
dput(result);
{
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
- int ret = VM_FAULT_NOPAGE;
loff_t size;
+ int ret;
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else
+ ret = dax_pfn_mkwrite(vma, vmf);
up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
flags = ext2_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto setflags_out;
}
*/
if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto setflags_out;
}
ext2_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME_SEC;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mark_inode_dirty(inode);
setflags_out:
goto setversion_out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_generation = generation;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mark_inode_dirty(inode);
setversion_out:
EXT4_DECRYPT, page->index, page, page);
}
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len)
{
struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
struct bio *bio;
- ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t pblk = ext4_ext_pblock(ex);
- unsigned int len = ext4_ext_get_actual_len(ex);
int ret, err = 0;
#if 0
res = -ENOKEY;
goto out;
}
+ down_read(&keyring_key->sem);
ukp = user_key_payload(keyring_key);
if (ukp->datalen != sizeof(struct ext4_encryption_key)) {
res = -EINVAL;
+ up_read(&keyring_key->sem);
goto out;
}
master_key = (struct ext4_encryption_key *)ukp->data;
"ext4: key size incorrect: %d\n",
master_key->size);
res = -ENOKEY;
+ up_read(&keyring_key->sem);
goto out;
}
res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
raw_key);
+ up_read(&keyring_key->sem);
if (res)
goto out;
got_key:
#define EXT4_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-#define EXT4_FL_USER_VISIBLE 0x004BDFFF /* User visible flags */
-#define EXT4_FL_USER_MODIFIABLE 0x004380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x204380FF /* User modifiable flags */
+
+#define EXT4_FL_XFLAG_VISIBLE (EXT4_SYNC_FL | \
+ EXT4_IMMUTABLE_FL | \
+ EXT4_APPEND_FL | \
+ EXT4_NODUMP_FL | \
+ EXT4_NOATIME_FL | \
+ EXT4_PROJINHERIT_FL)
/* Flags that should be inherited by new inodes from their parent. */
#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
EXT4_SYNC_FL | EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
- EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
+ EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL |\
+ EXT4_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
- /* Do not take i_data_sem locking in ext4_map_blocks */
-#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/* Convert written extents to unwritten */
-#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0200
+#define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0100
+ /* Write zeros to newly created written extents */
+#define EXT4_GET_BLOCKS_ZERO 0x0200
+#define EXT4_GET_BLOCKS_CREATE_ZERO (EXT4_GET_BLOCKS_CREATE |\
+ EXT4_GET_BLOCKS_ZERO)
/*
* The bit position of these flags must not overlap with any of the
#define EXT4_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
#define EXT4_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct ext4_encryption_policy)
+#ifndef FS_IOC_FSGETXATTR
+/* Until the uapi changes get merged for project quota... */
+
+#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
+
+/*
+ * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+ __u32 fsx_xflags; /* xflags field value (get/set) */
+ __u32 fsx_extsize; /* extsize field value (get/set)*/
+ __u32 fsx_nextents; /* nextents field value (get) */
+ __u32 fsx_projid; /* project identifier (get/set) */
+ unsigned char fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
+#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
+#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
+#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
+#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
+#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
+#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
+#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
+#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
+#endif /* !defined(FS_IOC_FSGETXATTR) */
+
+#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
+
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
* ioctl commands in 32 bit emulation
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
+ /*
+ * i_mmap_sem is for serializing page faults with truncate / punch hole
+ * operations. We have to make sure that new page cannot be faulted in
+ * a section of the inode that is being punched. We cannot easily use
+ * i_data_sem for this since we need protection for the whole punch
+ * operation and i_data_sem ranks below transaction start so we have
+ * to occasionally drop it.
+ */
+ struct rw_semaphore i_mmap_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
/* Encryption params */
struct ext4_crypt_info *i_crypt_info;
#endif
+ kprojid_t i_projid;
};
/*
#endif
/* Number of quota types we support */
-#define EXT4_MAXQUOTAS 2
+#define EXT4_MAXQUOTAS 3
/*
* fourth extended-fs super-block data in memory
EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM|\
- EXT4_FEATURE_RO_COMPAT_QUOTA)
+ EXT4_FEATURE_RO_COMPAT_QUOTA |\
+ EXT4_FEATURE_RO_COMPAT_PROJECT)
#define EXTN_FEATURE_FUNCS(ver) \
static inline bool ext4_has_unknown_ext##ver##_compat_features(struct super_block *sb) \
#define EXT4_DEF_RESUID 0
#define EXT4_DEF_RESGID 0
+/*
+ * Default project ID
+ */
+#define EXT4_DEF_PROJID 0
+
#define EXT4_DEF_INODE_READAHEAD_BLKS 32
/*
struct page *ext4_encrypt(struct inode *inode,
struct page *plaintext_page);
int ext4_decrypt(struct page *page);
-int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_init_crypto(void);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
int ext4_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
+extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
+extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, ext4_lblk_t len);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{
WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
down_write(&EXT4_I(inode)->i_data_sem);
if (newsize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = newsize;
return changed;
}
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ loff_t len);
+
struct ext4_group_info {
unsigned long bb_state;
struct rb_root bb_free_root;
struct page *page);
extern int ext4_try_add_inline_entry(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
- struct inode *inode);
+ struct inode *dir, struct inode *inode);
extern int ext4_try_create_inline_dir(handle_t *handle,
struct inode *parent,
struct inode *inode);
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
- int ret;
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex);
-
- if (ext4_encrypted_inode(inode))
- return ext4_encrypted_zeroout(inode, ex);
-
- ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
- if (ret > 0)
- ret = 0;
-
- return ret;
+ return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
+ ee_len);
}
/*
}
/* IO end_io complete, convert the filled extent to written */
if (flags & EXT4_GET_BLOCKS_CONVERT) {
+ if (flags & EXT4_GET_BLOCKS_ZERO) {
+ if (allocated > map->m_len)
+ allocated = map->m_len;
+ err = ext4_issue_zeroout(inode, map->m_lblk, newblock,
+ allocated);
+ if (err < 0)
+ goto out2;
+ }
ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
ppath);
if (ret >= 0) {
if (len <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
-
/*
* credits to insert 1 extent into extent tree
*/
goto retry;
}
- ext4_inode_resume_unlocked_dio(inode);
-
return ret > 0 ? ret2 : ret;
}
int partial_begin, partial_end;
loff_t start, end;
ext4_lblk_t lblk;
- struct address_space *mapping = inode->i_mapping;
unsigned int blkbits = inode->i_blkbits;
trace_ext4_zero_range(inode, offset, len, mode);
return ret;
}
- /*
- * Write out all dirty pages to avoid race conditions
- * Then release them.
- */
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- ret = filemap_write_and_wait_range(mapping, offset,
- offset + len - 1);
- if (ret)
- return ret;
- }
-
/*
* Round up offset. This is not fallocate, we neet to zero out
* blocks, so convert interior block aligned part of the range to
else
max_blocks -= lblk;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Indirect files do not support unwritten extnets
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
/* Preallocate the range including the unaligned edges */
if (partial_begin || partial_end) {
ret = ext4_alloc_file_blocks(file,
round_down(offset, 1 << blkbits)) >> blkbits,
new_size, flags, mode);
if (ret)
- goto out_mutex;
+ goto out_dio;
}
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
EXT4_EX_NOCACHE);
- /* Now release the pages and zero block aligned part of pages*/
+ /*
+ * Prevent page faults from reinstantiating pages we have
+ * released from page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ ret = ext4_update_disksize_before_punch(inode, offset, len);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ goto out_dio;
+ }
+ /* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
-
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
if (ret)
goto out_dio;
}
out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* We only support preallocation for extent-based files only
goto out;
}
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
+ ext4_inode_resume_unlocked_dio(inode);
if (ret)
goto out;
EXT4_I(inode)->i_sync_tid);
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
return ret;
}
return ret;
}
- /*
- * Need to round down offset to be aligned with page size boundary
- * for page size > block size.
- */
- ioffset = round_down(offset, PAGE_SIZE);
-
- /* Write out all dirty pages */
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- LLONG_MAX);
- if (ret)
- return ret;
-
- /* Take mutex lock */
- mutex_lock(&inode->i_mutex);
-
+ inode_lock(inode);
/*
* There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation
goto out_mutex;
}
- truncate_pagecache(inode, ioffset);
-
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ /*
+ * Need to round down offset to be aligned with page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+ /*
+ * Write tail of the last page before removed range since it will get
+ * removed from the page cache below.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+ if (ret)
+ goto out_mmap;
+ /*
+ * Write data that will be shifted to preserve them when discarding
+ * page cache below. We are also protected from pages becoming dirty
+ * by i_mmap_sem.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+ LLONG_MAX);
+ if (ret)
+ goto out_mmap;
+ truncate_pagecache(inode, ioffset);
+
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out_dio;
+ goto out_mmap;
}
down_write(&EXT4_I(inode)->i_data_sem);
out_stop:
ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
return ret;
}
- /*
- * Need to round down to align start offset to page size boundary
- * for page size > block size.
- */
- ioffset = round_down(offset, PAGE_SIZE);
-
- /* Write out all dirty pages */
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- LLONG_MAX);
- if (ret)
- return ret;
-
- /* Take mutex lock */
- mutex_lock(&inode->i_mutex);
-
+ inode_lock(inode);
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP;
goto out_mutex;
}
- truncate_pagecache(inode, ioffset);
-
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ /*
+ * Need to round down to align start offset to page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+ /* Write out all dirty pages */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+ LLONG_MAX);
+ if (ret)
+ goto out_mmap;
+ truncate_pagecache(inode, ioffset);
+
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out_dio;
+ goto out_mmap;
}
/* Expand file to avoid data loss if there is error while shifting */
out_stop:
ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
- BUG_ON(!mutex_is_locked(&inode1->i_mutex));
- BUG_ON(!mutex_is_locked(&inode2->i_mutex));
+ BUG_ON(!inode_is_locked(inode1));
+ BUG_ON(!inode_is_locked(inode2));
*erp = ext4_es_remove_extent(inode1, lblk1, count);
if (unlikely(*erp))
ext4_unwritten_wait(inode);
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
goto out;
}
ret = __generic_file_write_iter(iocb, from);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret > 0) {
ssize_t err;
return ret;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (aio_mutex)
mutex_unlock(aio_mutex);
return ret;
}
#ifdef CONFIG_FS_DAX
-static void ext4_end_io_unwritten(struct buffer_head *bh, int uptodate)
-{
- struct inode *inode = bh->b_assoc_map->host;
- /* XXX: breaks on 32-bit > 16TB. Is that even supported? */
- loff_t offset = (loff_t)(uintptr_t)bh->b_private << inode->i_blkbits;
- int err;
- if (!uptodate)
- return;
- WARN_ON(!buffer_unwritten(bh));
- err = ext4_convert_unwritten_extents(NULL, inode, offset, bh->b_size);
-}
-
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int result;
handle_t *handle = NULL;
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
- }
+ } else
+ down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
- result = __dax_fault(vma, vmf, ext4_get_block_dax,
- ext4_end_io_unwritten);
+ result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
- }
+ } else
+ up_read(&EXT4_I(inode)->i_mmap_sem);
return result;
}
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
ext4_chunk_trans_blocks(inode,
PMD_SIZE / PAGE_SIZE));
- }
+ } else
+ down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
else
result = __dax_pmd_fault(vma, addr, pmd, flags,
- ext4_get_block_dax, ext4_end_io_unwritten);
+ ext4_dax_mmap_get_block, NULL);
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
- }
+ } else
+ up_read(&EXT4_I(inode)->i_mmap_sem);
return result;
}
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext4_get_block_dax,
- ext4_end_io_unwritten);
+ int err;
+ struct inode *inode = file_inode(vma->vm_file);
+
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(inode->i_sb);
+
+ return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
+ loff_t size;
+ int ret;
+
+ sb_start_pagefault(sb);
+ file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (vmf->pgoff >= size)
+ ret = VM_FAULT_SIGBUS;
+ else
+ ret = dax_pfn_mkwrite(vma, vmf);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(sb);
+
+ return ret;
}
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.pmd_fault = ext4_dax_pmd_fault,
.page_mkwrite = ext4_dax_mkwrite,
- .pfn_mkwrite = dax_pfn_mkwrite,
+ .pfn_mkwrite = ext4_dax_pfn_mkwrite,
};
#else
#define ext4_dax_vm_ops ext4_file_vm_ops
#endif
static const struct vm_operations_struct ext4_file_vm_ops = {
- .fault = filemap_fault,
+ .fault = ext4_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
};
int blkbits;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
dataoff = (loff_t)last << blkbits;
} while (last <= end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (dataoff > isize)
return -ENXIO;
int blkbits;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
break;
} while (last <= end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (holeoff > isize)
holeoff = isize;
inode->i_gid = dir->i_gid;
} else
inode_init_owner(inode, dir, mode);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
+ ei->i_projid = EXT4_I(dir)->i_projid;
+ else
+ ei->i_projid = make_kprojid(&init_user_ns, EXT4_DEF_PROJID);
+
err = dquot_initialize(inode);
if (err)
goto out;
*/
static int ext4_add_dirent_to_inline(handle_t *handle,
struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode,
struct ext4_iloc *iloc,
void *inline_start, int inline_size)
{
- struct inode *dir = d_inode(dentry->d_parent);
int err;
struct ext4_dir_entry_2 *de;
* the new created block.
*/
int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
int ret, inline_size;
void *inline_start;
struct ext4_iloc iloc;
- struct inode *dir = d_inode(dentry->d_parent);
ret = ext4_get_inode_loc(dir, &iloc);
if (ret)
EXT4_INLINE_DOTDOT_SIZE;
inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE;
- ret = ext4_add_dirent_to_inline(handle, fname, dentry, inode, &iloc,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc,
inline_start, inline_size);
if (ret != -ENOSPC)
goto out;
if (inline_size) {
inline_start = ext4_get_inline_xattr_pos(dir, &iloc);
- ret = ext4_add_dirent_to_inline(handle, fname, dentry,
+ ret = ext4_add_dirent_to_inline(handle, fname, dir,
inode, &iloc, inline_start,
inline_size);
return 0;
}
+int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
+ ext4_lblk_t len)
+{
+ int ret;
+
+ if (ext4_encrypted_inode(inode))
+ return ext4_encrypted_zeroout(inode, lblk, pblk, len);
+
+ ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
#define check_block_validity(inode, map) \
__check_block_validity((inode), __func__, __LINE__, (map))
* out taking i_data_sem. So at the time the unwritten extent
* could be converted.
*/
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- down_read(&EXT4_I(inode)->i_data_sem);
+ down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
}
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- up_read((&EXT4_I(inode)->i_data_sem));
+ up_read((&EXT4_I(inode)->i_data_sem));
/*
* We don't check m_len because extent will be collpased in status
* Try to see if we can get the block without requesting a new
* file system block.
*/
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- down_read(&EXT4_I(inode)->i_data_sem);
+ down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
if (ret < 0)
retval = ret;
}
- if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
- up_read((&EXT4_I(inode)->i_data_sem));
+ up_read((&EXT4_I(inode)->i_data_sem));
found:
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
WARN_ON(1);
}
+ /*
+ * We have to zeroout blocks before inserting them into extent
+ * status tree. Otherwise someone could look them up there and
+ * use them before they are really zeroed.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO &&
+ map->m_flags & EXT4_MAP_MAPPED &&
+ map->m_flags & EXT4_MAP_NEW) {
+ ret = ext4_issue_zeroout(inode, map->m_lblk,
+ map->m_pblk, map->m_len);
+ if (ret) {
+ retval = ret;
+ goto out_sem;
+ }
+ }
+
/*
* If the extent has been zeroed out, we don't need to update
* extent status tree.
if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
if (ext4_es_is_written(&es))
- goto has_zeroout;
+ goto out_sem;
}
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
status |= EXTENT_STATUS_DELAYED;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
map->m_pblk, status);
- if (ret < 0)
+ if (ret < 0) {
retval = ret;
+ goto out_sem;
+ }
}
-has_zeroout:
+out_sem:
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
ret = check_block_validity(inode, map);
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
- if (flags && !(flags & EXT4_GET_BLOCKS_NO_LOCK) && !handle) {
+ if (flags && !handle) {
/* Direct IO write... */
if (map.m_len > DIO_MAX_BLOCKS)
map.m_len = DIO_MAX_BLOCKS;
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
- if (IS_DAX(inode) && buffer_unwritten(bh)) {
- /*
- * dgc: I suspect unwritten conversion on ext4+DAX is
- * fundamentally broken here when there are concurrent
- * read/write in progress on this inode.
- */
- WARN_ON_ONCE(io_end);
- bh->b_assoc_map = inode->i_mapping;
- bh->b_private = (void *)(unsigned long)iblock;
- }
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
return ret;
}
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
-
#ifdef CONFIG_EXT4_FS_ENCRYPTION
static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
-static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
+static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- ext4_debug("ext4_get_block_write_nolock: inode %lu, create flag %d\n",
+ int ret;
+
+ ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_NO_LOCK);
+ ret = _ext4_get_block(inode, iblock, bh_result, 0);
+ /*
+ * Blocks should have been preallocated! ext4_file_write_iter() checks
+ * that.
+ */
+ WARN_ON_ONCE(!buffer_mapped(bh_result));
+
+ return ret;
}
-int ext4_get_block_dax(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
+#ifdef CONFIG_FS_DAX
+int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
{
- int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT;
- if (create)
- flags |= EXT4_GET_BLOCKS_CREATE;
- ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
+ int ret, err;
+ int credits;
+ struct ext4_map_blocks map;
+ handle_t *handle = NULL;
+ int flags = 0;
+
+ ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result, flags);
+ map.m_lblk = iblock;
+ map.m_len = bh_result->b_size >> inode->i_blkbits;
+ credits = ext4_chunk_trans_blocks(inode, map.m_len);
+ if (create) {
+ flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ return ret;
+ }
+ }
+
+ ret = ext4_map_blocks(handle, inode, &map, flags);
+ if (create) {
+ err = ext4_journal_stop(handle);
+ if (ret >= 0 && err < 0)
+ ret = err;
+ }
+ if (ret <= 0)
+ goto out;
+ if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ int err2;
+
+ /*
+ * We are protected by i_mmap_sem so we know block cannot go
+ * away from under us even though we dropped i_data_sem.
+ * Convert extent to written and write zeros there.
+ *
+ * Note: We may get here even when create == 0.
+ */
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+
+ err = ext4_map_blocks(handle, inode, &map,
+ EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
+ if (err < 0)
+ ret = err;
+ err2 = ext4_journal_stop(handle);
+ if (err2 < 0 && ret > 0)
+ ret = err2;
+ }
+out:
+ WARN_ON_ONCE(ret == 0 && create);
+ if (ret > 0) {
+ map_bh(bh_result, inode->i_sb, map.m_pblk);
+ bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
+ map.m_flags;
+ /*
+ * At least for now we have to clear BH_New so that DAX code
+ * doesn't attempt to zero blocks again in a racy way.
+ */
+ bh_result->b_state &= ~(1 << BH_New);
+ bh_result->b_size = map.m_len << inode->i_blkbits;
+ ret = 0;
+ }
+ return ret;
}
+#endif
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
/* If we do a overwrite dio, i_mutex locking can be released */
overwrite = *((int *)iocb->private);
- if (overwrite) {
- down_read(&EXT4_I(inode)->i_data_sem);
- mutex_unlock(&inode->i_mutex);
- }
+ if (overwrite)
+ inode_unlock(inode);
/*
* We could direct write to holes and fallocate.
}
if (overwrite) {
- get_block_func = ext4_get_block_write_nolock;
+ get_block_func = ext4_get_block_overwrite;
} else {
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
if (iov_iter_rw(iter) == WRITE)
inode_dio_end(inode);
/* take i_mutex locking again if we do a ovewrite dio */
- if (overwrite) {
- up_read(&EXT4_I(inode)->i_data_sem);
- mutex_lock(&inode->i_mutex);
- }
+ if (overwrite)
+ inode_lock(inode);
return ret;
}
return 0;
}
+/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ loff_t len)
+{
+ handle_t *handle;
+ loff_t size = i_size_read(inode);
+
+ WARN_ON(!inode_is_locked(inode));
+ if (offset > size || offset + len < size)
+ return 0;
+
+ if (EXT4_I(inode)->i_disksize >= size)
+ return 0;
+
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ ext4_update_i_disksize(inode, size);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+
+ return 0;
+}
+
/*
* ext4_punch_hole: punches a hole in a file by releaseing the blocks
* associated with the given offset and length
return ret;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* No need to punch hole beyond i_size */
if (offset >= inode->i_size)
}
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
first_block_offset = round_up(offset, sb->s_blocksize);
last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
/* Now release the pages and zero block aligned part of pages*/
- if (last_block_offset > first_block_offset)
+ if (last_block_offset > first_block_offset) {
+ ret = ext4_update_disksize_before_punch(inode, offset, length);
+ if (ret)
+ goto out_dio;
truncate_pagecache_range(inode, first_block_offset,
last_block_offset);
-
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
+ }
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_writepage_trans_blocks(inode);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- /* Now release the pages again to reduce race window */
- if (last_block_offset > first_block_offset)
- truncate_pagecache_range(inode, first_block_offset,
- last_block_offset);
-
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
out_stop:
ext4_journal_stop(handle);
out_dio:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
* have i_mutex locked because it's not necessary.
*/
if (!(inode->i_state & (I_NEW|I_FREEING)))
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON(!inode_is_locked(inode));
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
EXT4_I(inode)->i_inline_off = 0;
}
+int ext4_get_projid(struct inode *inode, kprojid_t *projid)
+{
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
+ return -EOPNOTSUPP;
+ *projid = EXT4_I(inode)->i_projid;
+ return 0;
+}
+
struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
{
struct ext4_iloc iloc;
int block;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
inode = iget_locked(sb, ino);
if (!inode)
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+ if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
+ else
+ i_projid = EXT4_DEF_PROJID;
+
if (!(test_opt(inode->i_sb, NO_UID32))) {
i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
i_uid_write(inode, i_uid);
i_gid_write(inode, i_gid);
+ ei->i_projid = make_kprojid(&init_user_ns, i_projid);
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
int need_datasync = 0, set_large_file = 0;
uid_t i_uid;
gid_t i_gid;
+ projid_t i_projid;
spin_lock(&ei->i_raw_lock);
raw_inode->i_mode = cpu_to_le16(inode->i_mode);
i_uid = i_uid_read(inode);
i_gid = i_gid_read(inode);
+ i_projid = from_kprojid(&init_user_ns, ei->i_projid);
if (!(test_opt(inode->i_sb, NO_UID32))) {
raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
cpu_to_le16(ei->i_extra_isize);
}
}
+
+ BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT) &&
+ i_projid != EXT4_DEF_PROJID);
+
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
+ EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
+ raw_inode->i_projid = cpu_to_le32(i_projid);
+
ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
if (inode->i_sb->s_flags & MS_LAZYTIME)
} else
ext4_wait_for_tail_page_commit(inode);
}
+ down_write(&EXT4_I(inode)->i_mmap_sem);
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
truncate_pagecache(inode, inode->i_size);
if (shrink)
ext4_truncate(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
}
if (!rc) {
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
!ext4_should_journal_data(inode) &&
out_ret:
ret = block_page_mkwrite_return(ret);
out:
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(inode->i_sb);
return ret;
}
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ int err;
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ err = filemap_fault(vma, vmf);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+
+ return err;
+}
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/random.h>
+#include <linux/quotaops.h>
#include <asm/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
return 1;
}
+static int ext4_ioctl_setflags(struct inode *inode,
+ unsigned int flags)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ handle_t *handle = NULL;
+ int err = EPERM, migrate = 0;
+ struct ext4_iloc iloc;
+ unsigned int oldflags, mask, i;
+ unsigned int jflag;
+
+ /* Is it quota file? Do not allow user to mess with it */
+ if (IS_NOQUOTA(inode))
+ goto flags_out;
+
+ oldflags = ei->i_flags;
+
+ /* The JOURNAL_DATA flag is modifiable only by root */
+ jflag = flags & EXT4_JOURNAL_DATA_FL;
+
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ *
+ * This test looks nicer. Thanks to Pauline Middelink
+ */
+ if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
+ if (!capable(CAP_LINUX_IMMUTABLE))
+ goto flags_out;
+ }
+
+ /*
+ * The JOURNAL_DATA flag can only be changed by
+ * the relevant capability.
+ */
+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ if (!capable(CAP_SYS_RESOURCE))
+ goto flags_out;
+ }
+ if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
+ migrate = 1;
+
+ if (flags & EXT4_EOFBLOCKS_FL) {
+ /* we don't support adding EOFBLOCKS flag */
+ if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
+ err = -EOPNOTSUPP;
+ goto flags_out;
+ }
+ } else if (oldflags & EXT4_EOFBLOCKS_FL)
+ ext4_truncate(inode);
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto flags_out;
+ }
+ if (IS_SYNC(inode))
+ ext4_handle_sync(handle);
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto flags_err;
+
+ for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+ if (!(mask & EXT4_FL_USER_MODIFIABLE))
+ continue;
+ if (mask & flags)
+ ext4_set_inode_flag(inode, i);
+ else
+ ext4_clear_inode_flag(inode, i);
+ }
+
+ ext4_set_inode_flags(inode);
+ inode->i_ctime = ext4_current_time(inode);
+
+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+ ext4_journal_stop(handle);
+ if (err)
+ goto flags_out;
+
+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+ err = ext4_change_inode_journal_flag(inode, jflag);
+ if (err)
+ goto flags_out;
+ if (migrate) {
+ if (flags & EXT4_EXTENTS_FL)
+ err = ext4_ext_migrate(inode);
+ else
+ err = ext4_ind_migrate(inode);
+ }
+
+flags_out:
+ return err;
+}
+
+#ifdef CONFIG_QUOTA
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+ struct inode *inode = file_inode(filp);
+ struct super_block *sb = inode->i_sb;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int err, rc;
+ handle_t *handle;
+ kprojid_t kprojid;
+ struct ext4_iloc iloc;
+ struct ext4_inode *raw_inode;
+
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ if (projid != EXT4_DEF_PROJID)
+ return -EOPNOTSUPP;
+ else
+ return 0;
+ }
+
+ if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
+ return -EOPNOTSUPP;
+
+ kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
+
+ if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
+ return 0;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ err = -EPERM;
+ inode_lock(inode);
+ /* Is it quota file? Do not allow user to mess with it */
+ if (IS_NOQUOTA(inode))
+ goto out_unlock;
+
+ err = ext4_get_inode_loc(inode, &iloc);
+ if (err)
+ goto out_unlock;
+
+ raw_inode = ext4_raw_inode(&iloc);
+ if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
+ err = -EOVERFLOW;
+ brelse(iloc.bh);
+ goto out_unlock;
+ }
+ brelse(iloc.bh);
+
+ dquot_initialize(inode);
+
+ handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
+ EXT4_QUOTA_INIT_BLOCKS(sb) +
+ EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_unlock;
+ }
+
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ goto out_stop;
+
+ if (sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
+ struct dquot *transfer_to[MAXQUOTAS] = { };
+
+ transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
+ if (transfer_to[PRJQUOTA]) {
+ err = __dquot_transfer(inode, transfer_to);
+ dqput(transfer_to[PRJQUOTA]);
+ if (err)
+ goto out_dirty;
+ }
+ }
+ EXT4_I(inode)->i_projid = kprojid;
+ inode->i_ctime = ext4_current_time(inode);
+out_dirty:
+ rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
+ if (!err)
+ err = rc;
+out_stop:
+ ext4_journal_stop(handle);
+out_unlock:
+ inode_unlock(inode);
+ mnt_drop_write_file(filp);
+ return err;
+}
+#else
+static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
+{
+ if (projid != EXT4_DEF_PROJID)
+ return -EOPNOTSUPP;
+ return 0;
+}
+#endif
+
+/* Transfer internal flags to xflags */
+static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)
+{
+ __u32 xflags = 0;
+
+ if (iflags & EXT4_SYNC_FL)
+ xflags |= FS_XFLAG_SYNC;
+ if (iflags & EXT4_IMMUTABLE_FL)
+ xflags |= FS_XFLAG_IMMUTABLE;
+ if (iflags & EXT4_APPEND_FL)
+ xflags |= FS_XFLAG_APPEND;
+ if (iflags & EXT4_NODUMP_FL)
+ xflags |= FS_XFLAG_NODUMP;
+ if (iflags & EXT4_NOATIME_FL)
+ xflags |= FS_XFLAG_NOATIME;
+ if (iflags & EXT4_PROJINHERIT_FL)
+ xflags |= FS_XFLAG_PROJINHERIT;
+ return xflags;
+}
+
+/* Transfer xflags flags to internal */
+static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
+{
+ unsigned long iflags = 0;
+
+ if (xflags & FS_XFLAG_SYNC)
+ iflags |= EXT4_SYNC_FL;
+ if (xflags & FS_XFLAG_IMMUTABLE)
+ iflags |= EXT4_IMMUTABLE_FL;
+ if (xflags & FS_XFLAG_APPEND)
+ iflags |= EXT4_APPEND_FL;
+ if (xflags & FS_XFLAG_NODUMP)
+ iflags |= EXT4_NODUMP_FL;
+ if (xflags & FS_XFLAG_NOATIME)
+ iflags |= EXT4_NOATIME_FL;
+ if (xflags & FS_XFLAG_PROJINHERIT)
+ iflags |= EXT4_PROJINHERIT_FL;
+
+ return iflags;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
return put_user(flags, (int __user *) arg);
case EXT4_IOC_SETFLAGS: {
- handle_t *handle = NULL;
- int err, migrate = 0;
- struct ext4_iloc iloc;
- unsigned int oldflags, mask, i;
- unsigned int jflag;
+ int err;
if (!inode_owner_or_capable(inode))
return -EACCES;
flags = ext4_mask_flags(inode->i_mode, flags);
- err = -EPERM;
- mutex_lock(&inode->i_mutex);
- /* Is it quota file? Do not allow user to mess with it */
- if (IS_NOQUOTA(inode))
- goto flags_out;
-
- oldflags = ei->i_flags;
-
- /* The JOURNAL_DATA flag is modifiable only by root */
- jflag = flags & EXT4_JOURNAL_DATA_FL;
-
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- *
- * This test looks nicer. Thanks to Pauline Middelink
- */
- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE))
- goto flags_out;
- }
-
- /*
- * The JOURNAL_DATA flag can only be changed by
- * the relevant capability.
- */
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
- if (!capable(CAP_SYS_RESOURCE))
- goto flags_out;
- }
- if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
- migrate = 1;
-
- if (flags & EXT4_EOFBLOCKS_FL) {
- /* we don't support adding EOFBLOCKS flag */
- if (!(oldflags & EXT4_EOFBLOCKS_FL)) {
- err = -EOPNOTSUPP;
- goto flags_out;
- }
- } else if (oldflags & EXT4_EOFBLOCKS_FL)
- ext4_truncate(inode);
-
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
- if (IS_ERR(handle)) {
- err = PTR_ERR(handle);
- goto flags_out;
- }
- if (IS_SYNC(inode))
- ext4_handle_sync(handle);
- err = ext4_reserve_inode_write(handle, inode, &iloc);
- if (err)
- goto flags_err;
-
- for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
- if (!(mask & EXT4_FL_USER_MODIFIABLE))
- continue;
- if (mask & flags)
- ext4_set_inode_flag(inode, i);
- else
- ext4_clear_inode_flag(inode, i);
- }
-
- ext4_set_inode_flags(inode);
- inode->i_ctime = ext4_current_time(inode);
-
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-flags_err:
- ext4_journal_stop(handle);
- if (err)
- goto flags_out;
-
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
- err = ext4_change_inode_journal_flag(inode, jflag);
- if (err)
- goto flags_out;
- if (migrate) {
- if (flags & EXT4_EXTENTS_FL)
- err = ext4_ext_migrate(inode);
- else
- err = ext4_ind_migrate(inode);
- }
-
-flags_out:
- mutex_unlock(&inode->i_mutex);
+ inode_lock(inode);
+ err = ext4_ioctl_setflags(inode, flags);
+ inode_unlock(inode);
mnt_drop_write_file(filp);
return err;
}
goto setversion_out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext4_journal_stop(handle);
unlock_out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
setversion_out:
mnt_drop_write_file(filp);
return err;
* ext4_ext_swap_inode_data before we switch the
* inode format to prevent read.
*/
- mutex_lock(&(inode->i_mutex));
+ inode_lock((inode));
err = ext4_ext_migrate(inode);
- mutex_unlock(&(inode->i_mutex));
+ inode_unlock((inode));
mnt_drop_write_file(filp);
return err;
}
return -EOPNOTSUPP;
#endif
}
+ case EXT4_IOC_FSGETXATTR:
+ {
+ struct fsxattr fa;
+
+ memset(&fa, 0, sizeof(struct fsxattr));
+ ext4_get_inode_flags(ei);
+ fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
+
+ if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_PROJECT)) {
+ fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
+ EXT4_I(inode)->i_projid);
+ }
+
+ if (copy_to_user((struct fsxattr __user *)arg,
+ &fa, sizeof(fa)))
+ return -EFAULT;
+ return 0;
+ }
+ case EXT4_IOC_FSSETXATTR:
+ {
+ struct fsxattr fa;
+ int err;
+
+ if (copy_from_user(&fa, (struct fsxattr __user *)arg,
+ sizeof(fa)))
+ return -EFAULT;
+
+ /* Make sure caller has proper permission */
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+
+ flags = ext4_xflags_to_iflags(fa.fsx_xflags);
+ flags = ext4_mask_flags(inode->i_mode, flags);
+
+ inode_lock(inode);
+ flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
+ (flags & EXT4_FL_XFLAG_VISIBLE);
+ err = ext4_ioctl_setflags(inode, flags);
+ inode_unlock(inode);
+ mnt_drop_write_file(filp);
+ if (err)
+ return err;
+
+ err = ext4_ioctl_setproject(filp, fa.fsx_projid);
+ if (err)
+ return err;
+
+ return 0;
+ }
default:
return -ENOTTY;
}
struct ext4_filename *fname,
struct ext4_dir_entry_2 **res_dir);
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode);
+ struct inode *dir, struct inode *inode);
/* checksumming functions */
void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
* directory, and adds the dentry to the indexed directory.
*/
static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry,
+ struct inode *dir,
struct inode *inode, struct buffer_head *bh)
{
- struct inode *dir = d_inode(dentry->d_parent);
struct buffer_head *bh2;
struct dx_root *root;
struct dx_frame frames[2], *frame;
return retval;
if (ext4_has_inline_data(dir)) {
- retval = ext4_try_add_inline_entry(handle, &fname,
- dentry, inode);
+ retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
if (retval < 0)
goto out;
if (retval == 1) {
}
if (is_dx(dir)) {
- retval = ext4_dx_add_entry(handle, &fname, dentry, inode);
+ retval = ext4_dx_add_entry(handle, &fname, dir, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
goto out;
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
if (blocks == 1 && !dx_fallback &&
ext4_has_feature_dir_index(sb)) {
- retval = make_indexed_dir(handle, &fname, dentry,
+ retval = make_indexed_dir(handle, &fname, dir,
inode, bh);
bh = NULL; /* make_indexed_dir releases bh */
goto out;
* Returns 0 for success, or a negative error value
*/
static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
- struct dentry *dentry, struct inode *inode)
+ struct inode *dir, struct inode *inode)
{
struct dx_frame frames[2], *frame;
struct dx_entry *entries, *at;
struct buffer_head *bh;
- struct inode *dir = d_inode(dentry->d_parent);
struct super_block *sb = dir->i_sb;
struct ext4_dir_entry_2 *de;
int err;
return 0;
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
/*
* Exit early if inode already is on orphan list. This is a big speedup
* since we don't have to contend on the global s_orphan_lock.
return 0;
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
/* Do this quick check before taking global s_orphan_lock. */
if (list_empty(&ei->i_orphan))
return 0;
if (ext4_encrypted_inode(dir) &&
!ext4_is_child_context_consistent_with_parent(dir, inode))
return -EPERM;
+
+ if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
err = dquot_initialize(dir);
if (err)
return err;
int credits;
u8 old_file_type;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
+ (!projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
new.inode)))
return -EPERM;
+ if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(new_dir)->i_projid,
+ EXT4_I(old_dentry->d_inode)->i_projid)) ||
+ (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
+ !projid_eq(EXT4_I(old_dir)->i_projid,
+ EXT4_I(new_dentry->d_inode)->i_projid)))
+ return -EXDEV;
+
retval = dquot_initialize(old.dir);
if (retval)
return retval;
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
+/*
+ * Lock ordering
+ *
+ * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
+ * i_mmap_rwsem (inode->i_mmap_rwsem)!
+ *
+ * page fault path:
+ * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+ * page lock -> i_data_sem (rw)
+ *
+ * buffered write path:
+ * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> transaction start -> page lock ->
+ * i_data_sem (rw)
+ *
+ * truncate:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ * i_mmap_rwsem (w) -> page lock
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (w) -> i_mmap_sem (w) ->
+ * transaction start -> i_data_sem (rw)
+ *
+ * direct IO:
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) -> mmap_sem
+ * sb_start_write -> i_mutex -> EXT4_STATE_DIOREAD_LOCK (r) ->
+ * transaction start -> i_data_sem (rw)
+ *
+ * writepages:
+ * transaction start -> page lock(s) -> i_data_sem (rw)
+ */
+
#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
+ init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode);
}
}
#ifdef CONFIG_QUOTA
-#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
-#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+static char *quotatypes[] = INITQFNAMES;
+#define QTYPE2NAME(t) (quotatypes[t])
static int ext4_write_dquot(struct dquot *dquot);
static int ext4_acquire_dquot(struct dquot *dquot);
.write_info = ext4_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
+ .get_projid = ext4_get_projid,
};
static const struct quotactl_ops ext4_qctl_operations = {
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
nr_truncates++;
} else {
if (test_opt(sb, DEBUG))
"without CONFIG_QUOTA");
return 0;
}
+ if (ext4_has_feature_project(sb) && !readonly) {
+ ext4_msg(sb, KERN_ERR,
+ "Filesystem with project quota feature cannot be mounted RDWR "
+ "without CONFIG_QUOTA");
+ return 0;
+ }
#endif /* CONFIG_QUOTA */
return 1;
}
sb->s_qcop = &dquot_quotactl_sysfile_ops;
else
sb->s_qcop = &ext4_qctl_operations;
- sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
return err;
}
+#ifdef CONFIG_QUOTA
+static int ext4_statfs_project(struct super_block *sb,
+ kprojid_t projid, struct kstatfs *buf)
+{
+ struct kqid qid;
+ struct dquot *dquot;
+ u64 limit;
+ u64 curblock;
+
+ qid = make_kqid_projid(projid);
+ dquot = dqget(sb, qid);
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
+ spin_lock(&dq_data_lock);
+
+ limit = (dquot->dq_dqb.dqb_bsoftlimit ?
+ dquot->dq_dqb.dqb_bsoftlimit :
+ dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
+ if (limit && buf->f_blocks > limit) {
+ curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
+ buf->f_blocks = limit;
+ buf->f_bfree = buf->f_bavail =
+ (buf->f_blocks > curblock) ?
+ (buf->f_blocks - curblock) : 0;
+ }
+
+ limit = dquot->dq_dqb.dqb_isoftlimit ?
+ dquot->dq_dqb.dqb_isoftlimit :
+ dquot->dq_dqb.dqb_ihardlimit;
+ if (limit && buf->f_files > limit) {
+ buf->f_files = limit;
+ buf->f_ffree =
+ (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
+ (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ }
+
+ spin_unlock(&dq_data_lock);
+ dqput(dquot);
+ return 0;
+}
+#endif
+
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+#ifdef CONFIG_QUOTA
+ if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
+ sb_has_quota_limits_enabled(sb, PRJQUOTA))
+ ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
+#endif
return 0;
}
struct inode *qf_inode;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
BUG_ON(!ext4_has_feature_quota(sb));
int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
- le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
};
sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
*/
static inline void ext4_truncate_failed_write(struct inode *inode)
{
+ down_write(&EXT4_I(inode)->i_mmap_sem);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
}
/*
return ret;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (start >= isize)
if (ret == 1)
ret = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
loff_t isize;
int err = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize)
found:
if (whence == SEEK_HOLE && data_ofs > isize)
data_ofs = isize;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return vfs_setpos(file, data_ofs, maxbytes);
fail:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_PUNCH_HOLE) {
if (offset >= inode->i_size)
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
trace_f2fs_fallocate(inode, mode, offset, len, ret);
return ret;
flags = f2fs_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = fi->i_flags;
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto out;
}
flags = flags & FS_FL_USER_MODIFIABLE;
flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
fi->i_flags = flags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
f2fs_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME;
f2fs_balance_fs(sbi, true);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* writeback all dirty pages in the range */
err = filemap_write_and_wait_range(inode->i_mapping, range->start,
clear_out:
clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!err)
range->len = (u64)total << PAGE_CACHE_SHIFT;
return err;
buf.dirent = dirent;
buf.result = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
buf.ctx.pos = file->f_pos;
ret = -ENOENT;
if (!IS_DEADDIR(inode)) {
short_only, both ? &buf : NULL);
file->f_pos = buf.ctx.pos;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret >= 0)
ret = buf.result;
return ret;
{
u32 attr;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
attr = fat_make_attrs(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return put_user(attr, user_attr);
}
err = mnt_want_write_file(file);
if (err)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* ATTR_VOLUME and ATTR_DIR cannot be changed; this also
fat_save_attrs(inode, attr);
mark_inode_dirty(inode);
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(file);
out:
return err;
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_KEEP_SIZE) {
ondisksize = inode->i_blocks << 9;
if ((offset + len) <= ondisksize)
}
error:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
static struct file_system_type **find_filesystem(const char *name, unsigned len)
{
struct file_system_type **p;
- for (p=&file_systems; *p; p=&(*p)->next)
- if (strlen((*p)->name) == len &&
- strncmp((*p)->name, name, len) == 0)
+ for (p = &file_systems; *p; p = &(*p)->next)
+ if (strncmp((*p)->name, name, len) == 0 &&
+ !(*p)->name[len])
break;
return p;
}
if (!parent)
return -ENOENT;
- mutex_lock(&parent->i_mutex);
+ inode_lock(parent);
if (!S_ISDIR(parent->i_mode))
goto unlock;
fuse_invalidate_entry(entry);
if (child_nodeid != 0 && d_really_is_positive(entry)) {
- mutex_lock(&d_inode(entry)->i_mutex);
+ inode_lock(d_inode(entry));
if (get_node_id(d_inode(entry)) != child_nodeid) {
err = -ENOENT;
goto badentry;
clear_nlink(d_inode(entry));
err = 0;
badentry:
- mutex_unlock(&d_inode(entry)->i_mutex);
+ inode_unlock(d_inode(entry));
if (!err)
d_delete(entry);
} else {
dput(entry);
unlock:
- mutex_unlock(&parent->i_mutex);
+ inode_unlock(parent);
iput(parent);
return err;
}
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- BUG_ON(!mutex_is_locked(&inode->i_mutex));
+ BUG_ON(!inode_is_locked(inode));
spin_lock(&fc->lock);
BUG_ON(fi->writectr < 0);
return err;
if (lock_inode)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = fuse_do_open(fc, get_node_id(inode), file, isdir);
fuse_finish_open(inode, file);
if (lock_inode)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
fuse_sync_writes(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
if (is_bad_inode(inode))
return -EIO;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Start writeback against all dirty pages of the inode, then
err = 0;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
return generic_file_write_iter(iocb, from);
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
}
out:
current->backing_dev_info = NULL;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return written ? written : err;
}
if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
if (!write)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
fuse_sync_writes(inode);
if (!write)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
while (count) {
return -EIO;
/* Don't allow parallel writes to the same file */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
res = generic_write_checks(iocb, from);
if (res > 0)
res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
fuse_invalidate_attr(inode);
if (res > 0)
fuse_write_update_size(inode, iocb->ki_pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
retval = generic_file_llseek(file, offset, whence);
break;
case SEEK_END:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = fuse_update_attributes(inode, NULL, file, NULL);
if (!retval)
retval = generic_file_llseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
break;
case SEEK_HOLE:
case SEEK_DATA:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = fuse_lseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
break;
default:
retval = -EINVAL;
return -EOPNOTSUPP;
if (lock_inode) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_PUNCH_HOLE) {
loff_t endbyte = offset + length - 1;
err = filemap_write_and_wait_range(inode->i_mapping,
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (lock_inode)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
if (ret)
gfs2_glock_dq_uninit(&gh);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
return -ENOMEM;
sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
- mutex_lock(&ip->i_inode.i_mutex);
+ inode_lock(&ip->i_inode);
for (qx = 0; qx < num_qd; qx++) {
error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
GL_NOCACHE, &ghs[qx]);
out:
while (qx--)
gfs2_glock_dq_uninit(&ghs[qx]);
- mutex_unlock(&ip->i_inode.i_mutex);
+ inode_unlock(&ip->i_inode);
kfree(ghs);
gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
return error;
if (error)
goto out_put;
- mutex_lock(&ip->i_inode.i_mutex);
+ inode_lock(&ip->i_inode);
error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
if (error)
goto out_unlockput;
out_q:
gfs2_glock_dq_uninit(&q_gh);
out_unlockput:
- mutex_unlock(&ip->i_inode.i_mutex);
+ inode_unlock(&ip->i_inode);
out_put:
qd_put(qd);
return error;
{
struct hfs_readdir_data *rd = file->private_data;
if (rd) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
list_del(&rd->list);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(rd);
}
return 0;
if (HFS_IS_RSRC(inode))
inode = HFS_I(inode)->rsrc_inode;
if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
hfs_file_truncate(inode);
//if (inode->i_flags & S_DEAD) {
// hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
// hfs_delete_inode(inode);
//}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* sync the inode to buffers */
ret = write_inode_now(inode, 0);
err = sync_blockdev(sb->s_bdev);
if (!ret)
ret = err;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
{
struct hfsplus_readdir_data *rd = file->private_data;
if (rd) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
list_del(&rd->list);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(rd);
}
return 0;
if (HFSPLUS_IS_RSRC(inode))
inode = HFSPLUS_I(inode)->rsrc_inode;
if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
hfsplus_file_truncate(inode);
if (inode->i_flags & S_DEAD) {
hfsplus_delete_cat(inode->i_ino,
HFSPLUS_SB(sb)->hidden_dir, NULL);
hfsplus_delete_inode(inode);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Sync inode metadata into the catalog and extent trees.
if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
goto out_drop_write;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
mark_inode_dirty(inode);
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out_drop_write:
mnt_drop_write_file(file);
out:
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
if (whence == SEEK_DATA || whence == SEEK_HOLE)
return -EINVAL;
- mutex_lock(&i->i_mutex);
+ inode_lock(i);
hpfs_lock(s);
/*pr_info("dir lseek\n");*/
ok:
filp->f_pos = new_off;
hpfs_unlock(s);
- mutex_unlock(&i->i_mutex);
+ inode_unlock(i);
return new_off;
fail:
/*pr_warn("illegal lseek: %016llx\n", new_off);*/
hpfs_unlock(s);
- mutex_unlock(&i->i_mutex);
+ inode_unlock(i);
return -ESPIPE;
}
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
file_accessed(file);
ret = -ENOMEM;
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
inode->i_size = len;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
if (hole_end > hole_start) {
struct address_space *mapping = inode->i_mapping;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_mmap_lock_write(mapping);
if (!RB_EMPTY_ROOT(&mapping->i_mmap))
hugetlb_vmdelete_list(&mapping->i_mmap,
hole_end >> PAGE_SHIFT);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, hole_start, hole_end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
start = offset >> hpage_shift;
end = (offset + len + hpage_size - 1) >> hpage_shift;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
error = inode_newsize_ok(inode, offset + len);
i_size_write(inode, offset + len);
inode->i_ctime = CURRENT_TIME;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
*/
spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages);
- BUG_ON(inode->i_data.nrshadows);
+ BUG_ON(inode->i_data.nrexceptional);
spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
swap(inode1, inode2);
if (inode1 && !S_ISDIR(inode1->i_mode))
- mutex_lock(&inode1->i_mutex);
+ inode_lock(inode1);
if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
+ inode_lock_nested(inode2, I_MUTEX_NONDIR2);
}
EXPORT_SYMBOL(lock_two_nondirectories);
void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
{
if (inode1 && !S_ISDIR(inode1->i_mode))
- mutex_unlock(&inode1->i_mutex);
+ inode_unlock(inode1);
if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
- mutex_unlock(&inode2->i_mutex);
+ inode_unlock(inode2);
}
EXPORT_SYMBOL(unlock_two_nondirectories);
u64 len, get_block_t *get_block)
{
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
EXPORT_SYMBOL(generic_block_fiemap);
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mtd/mtd.h>
+#include <linux/mm.h> /* kvfree() */
#include "nodelist.h"
static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
return 0;
out_free:
-#ifndef __ECOS
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
-#endif
- kfree(c->blocks);
+ kvfree(c->blocks);
return ret;
}
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Trigger GC to flush any pending writes for this inode */
jffs2_flush_wbuf_gc(c, inode->i_ino);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
out_root:
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
- kfree(c->blocks);
+ kvfree(c->blocks);
out_inohash:
jffs2_clear_xattr_subsystem(c);
kfree(c->inocache_list);
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
- kfree(c->blocks);
+ kvfree(c->blocks);
jffs2_flash_cleanup(c);
kfree(c->inocache_list);
jffs2_clear_xattr_subsystem(c);
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (!(inode->i_state & I_DIRTY_ALL) ||
(datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
/* Make sure committed changes hit the disk */
jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
rc |= jfs_commit_inode(inode, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc ? -EIO : 0;
}
}
/* Lock against other parallel changes of flags */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
jfs_get_inode_flags(jfs_inode);
oldflags = jfs_inode->mode2;
((flags ^ oldflags) &
(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
err = -EPERM;
goto setflags_out;
}
jfs_inode->mode2 = flags;
jfs_set_inode_flags(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
setflags_out:
struct buffer_head tmp_bh;
struct buffer_head *bh;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
}
out:
if (len == towrite) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
if (inode->i_size < off+len-towrite)
inode->i_version++;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return len - towrite;
}
struct inode *inode = file_inode(file);
loff_t ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = generic_file_llseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *dentry = file->f_path.dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
switch (whence) {
case 1:
offset += file->f_pos;
if (offset >= 0)
break;
default:
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return -EINVAL;
}
if (offset != file->f_pos) {
spin_unlock(&dentry->d_lock);
}
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return offset;
}
EXPORT_SYMBOL(dcache_dir_lseek);
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode->i_state & I_DIRTY_ALL))
goto out;
ret = err;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
EXPORT_SYMBOL(__generic_file_fsync);
* bother, maybe that's a sign this just isn't a good file to
* hand out a delegation on.
*/
- if (is_deleg && !mutex_trylock(&inode->i_mutex))
+ if (is_deleg && !inode_trylock(inode))
return -EAGAIN;
if (is_deleg && arg == F_WRLCK) {
/* Write delegations are not currently supported: */
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
WARN_ON_ONCE(1);
return -EINVAL;
}
spin_unlock(&ctx->flc_lock);
locks_dispose_list(&dispose);
if (is_deleg)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error && !my_fl)
*flp = NULL;
return error;
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = li->li_flags;
flags &= LOGFS_FL_USER_MODIFIABLE;
flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
li->li_flags = flags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
logfs_get_wblocks(sb, NULL, WF_LOCK);
logfs_write_anchor(sb);
logfs_put_wblocks(sb, NULL, WF_LOCK);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
parent = nd->path.dentry;
BUG_ON(nd->inode != parent->d_inode);
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
dentry = __lookup_hash(&nd->last, parent, nd->flags);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
path->mnt = nd->path.mnt;
putname(filename);
return ERR_PTR(-EINVAL);
}
- mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
d = __lookup_hash(&last, path->dentry, 0);
if (IS_ERR(d)) {
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
path_put(path);
}
putname(filename);
unsigned int c;
int err;
- WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(base->d_inode));
this.name = name;
this.len = len;
if (ret)
return ret;
- mutex_lock(&base->d_inode->i_mutex);
+ inode_lock(base->d_inode);
ret = __lookup_hash(&this, base, 0);
- mutex_unlock(&base->d_inode->i_mutex);
+ inode_unlock(base->d_inode);
return ret;
}
EXPORT_SYMBOL(lookup_one_len_unlocked);
goto done;
}
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
dentry = d_lookup(dir, &nd->last);
if (!dentry) {
/*
*/
dentry = d_alloc(dir, &nd->last);
if (!dentry) {
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
return -ENOMEM;
}
dentry = lookup_real(dir->d_inode, dentry, nd->flags);
if (IS_ERR(dentry)) {
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
return PTR_ERR(dentry);
}
}
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
done:
if (d_is_negative(dentry)) {
struct dentry *p;
if (p1 == p2) {
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
return NULL;
}
p = d_ancestor(p2, p1);
if (p) {
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
return p;
}
p = d_ancestor(p1, p2);
if (p) {
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
return p;
}
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return NULL;
}
EXPORT_SYMBOL(lock_rename);
void unlock_rename(struct dentry *p1, struct dentry *p2)
{
- mutex_unlock(&p1->d_inode->i_mutex);
+ inode_unlock(p1->d_inode);
if (p1 != p2) {
- mutex_unlock(&p2->d_inode->i_mutex);
+ inode_unlock(p2->d_inode);
mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
}
}
* dropping this one anyway.
*/
}
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
error = lookup_open(nd, &path, file, op, got_write, opened);
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
if (error <= 0) {
if (error)
* Do the final lookup.
*/
lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
- mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path->dentry, lookup_flags);
if (IS_ERR(dentry))
goto unlock;
dput(dentry);
dentry = ERR_PTR(error);
unlock:
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
if (!err2)
mnt_drop_write(path->mnt);
out:
void done_path_create(struct path *path, struct dentry *dentry)
{
dput(dentry);
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
mnt_drop_write(path->mnt);
path_put(path);
}
return -EPERM;
dget(dentry);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
error = -EBUSY;
if (is_local_mountpoint(dentry))
detach_mounts(dentry);
out:
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
dput(dentry);
if (!error)
d_delete(dentry);
if (error)
goto exit1;
- mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
exit3:
dput(dentry);
exit2:
- mutex_unlock(&path.dentry->d_inode->i_mutex);
+ inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
exit1:
path_put(&path);
if (!dir->i_op->unlink)
return -EPERM;
- mutex_lock(&target->i_mutex);
+ inode_lock(target);
if (is_local_mountpoint(dentry))
error = -EBUSY;
else {
}
}
out:
- mutex_unlock(&target->i_mutex);
+ inode_unlock(target);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */
if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
if (error)
goto exit1;
retry_deleg:
- mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
exit2:
dput(dentry);
}
- mutex_unlock(&path.dentry->d_inode->i_mutex);
+ inode_unlock(path.dentry->d_inode);
if (inode)
iput(inode); /* truncate the inode here */
inode = NULL;
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Make sure we don't allow creating hardlink to an unlinked file */
if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
error = -ENOENT;
inode->i_state &= ~I_LINKABLE;
spin_unlock(&inode->i_lock);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error)
fsnotify_link(dir, inode, new_dentry);
return error;
if (!is_dir || (flags & RENAME_EXCHANGE))
lock_two_nondirectories(source, target);
else if (target)
- mutex_lock(&target->i_mutex);
+ inode_lock(target);
error = -EBUSY;
if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
if (!is_dir || (flags & RENAME_EXCHANGE))
unlock_two_nondirectories(source, target);
else if (target)
- mutex_unlock(&target->i_mutex);
+ inode_unlock(target);
dput(new_dentry);
if (!error) {
fsnotify_move(old_dir, new_dir, old_name, is_dir,
struct vfsmount *mnt;
struct dentry *dentry = path->dentry;
retry:
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
if (unlikely(cant_mount(dentry))) {
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return ERR_PTR(-ENOENT);
}
namespace_lock();
mp = new_mountpoint(dentry);
if (IS_ERR(mp)) {
namespace_unlock();
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return mp;
}
return mp;
}
namespace_unlock();
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
path_put(path);
path->mnt = mnt;
dentry = path->dentry = dget(mnt->mnt_root);
struct dentry *dentry = where->m_dentry;
put_mountpoint(where);
namespace_unlock();
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
}
static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
if (!res) {
struct inode *inode = d_inode(dentry);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
ncp_new_dentry(dentry);
val=1;
ncp_dbg(2, "found, but dirEntNum changed\n");
ncp_update_inode2(inode, &finfo);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
finished:
} else {
struct inode *inode = d_inode(newdent);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(inode, I_MUTEX_CHILD);
ncp_update_inode2(inode, entry);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (ctl.idx >= NCP_DIRCACHE_SIZE) {
iocb->ki_pos = pos;
if (pos > i_size_read(inode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (pos > i_size_read(inode))
i_size_write(inode, pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
ncp_dbg(1, "exit %pD2\n", file);
outrel:
dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
filp, offset, whence);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case 1:
offset += filp->f_pos;
dir_ctx->duped = 0;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
if (!count)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
result = nfs_sync_mapping(mapping);
if (result)
goto out_unlock;
NFS_I(inode)->read_io += count;
result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!result) {
result = nfs_direct_wait(dreq);
out_release:
nfs_direct_req_release(dreq);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return result;
}
pos = iocb->ki_pos;
end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
result = nfs_sync_mapping(mapping);
if (result)
pos >> PAGE_CACHE_SHIFT, end);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!result) {
result = nfs_direct_wait(dreq);
out_release:
nfs_direct_req_release(dreq);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return result;
}
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_file_fsync_commit(file, start, end, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
u32 i, j;
if (fl->commit_through_mds) {
- nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+ nfs_request_add_commit_list(req, cinfo);
} else {
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
start = xdr_reserve_space(xdr, 4);
BUG_ON(!start);
- if (ff_layout_encode_ioerr(flo, xdr, args))
- goto out;
-
+ ff_layout_encode_ioerr(flo, xdr, args);
ff_layout_encode_iostats(flo, xdr, args);
-out:
+
*start = cpu_to_be32((xdr->p - start - 1) * 4);
dprintk("%s: Return\n", __func__);
}
err->length = end - err->offset;
}
-static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- nfs4_stateid *stateid,
- struct nfs4_deviceid *deviceid)
+static int
+ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
+ const struct nfs4_ff_layout_ds_err *e2)
{
- return err->status == status && err->opnum == opnum &&
- nfs4_stateid_match(&err->stateid, stateid) &&
- !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) &&
- end_offset(err->offset, err->length) >= offset &&
- err->offset <= end_offset(offset, length);
-}
-
-static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old,
- struct nfs4_ff_layout_ds_err *new)
-{
- if (!ds_error_can_merge(old, new->offset, new->length, new->status,
- new->opnum, &new->stateid, &new->deviceid))
- return false;
-
- extend_ds_error(old, new->offset, new->length);
- return true;
+ int ret;
+
+ if (e1->opnum != e2->opnum)
+ return e1->opnum < e2->opnum ? -1 : 1;
+ if (e1->status != e2->status)
+ return e1->status < e2->status ? -1 : 1;
+ ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid));
+ if (ret != 0)
+ return ret;
+ ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
+ if (ret != 0)
+ return ret;
+ if (end_offset(e1->offset, e1->length) < e2->offset)
+ return -1;
+ if (e1->offset > end_offset(e2->offset, e2->length))
+ return 1;
+ /* If ranges overlap or are contiguous, they are the same */
+ return 0;
}
-static bool
+static void
ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
struct nfs4_ff_layout_ds_err *dserr)
{
- struct nfs4_ff_layout_ds_err *err;
-
- list_for_each_entry(err, &flo->error_list, list) {
- if (merge_ds_error(err, dserr)) {
- return true;
- }
- }
-
- list_add(&dserr->list, &flo->error_list);
- return false;
-}
-
-static bool
-ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- nfs4_stateid *stateid, struct nfs4_deviceid *deviceid)
-{
- bool found = false;
- struct nfs4_ff_layout_ds_err *err;
-
- list_for_each_entry(err, &flo->error_list, list) {
- if (ds_error_can_merge(err, offset, length, status, opnum,
- stateid, deviceid)) {
- found = true;
- extend_ds_error(err, offset, length);
+ struct nfs4_ff_layout_ds_err *err, *tmp;
+ struct list_head *head = &flo->error_list;
+ int match;
+
+ /* Do insertion sort w/ merges */
+ list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
+ match = ff_ds_error_match(err, dserr);
+ if (match < 0)
+ continue;
+ if (match > 0) {
+ /* Add entry "dserr" _before_ entry "err" */
+ head = &err->list;
break;
}
+ /* Entries match, so merge "err" into "dserr" */
+ extend_ds_error(dserr, err->offset, err->length);
+ list_del(&err->list);
+ kfree(err);
}
- return found;
+ list_add_tail(&dserr->list, head);
}
int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
gfp_t gfp_flags)
{
struct nfs4_ff_layout_ds_err *dserr;
- bool needfree;
if (status == 0)
return 0;
if (mirror->mirror_ds == NULL)
return -EINVAL;
- spin_lock(&flo->generic_hdr.plh_inode->i_lock);
- if (ff_layout_update_ds_error(flo, offset, length, status, opnum,
- &mirror->stateid,
- &mirror->mirror_ds->id_node.deviceid)) {
- spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
- return 0;
- }
- spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
dserr = kmalloc(sizeof(*dserr), gfp_flags);
if (!dserr)
return -ENOMEM;
NFS4_DEVICEID4_SIZE);
spin_lock(&flo->generic_hdr.plh_inode->i_lock);
- needfree = ff_layout_add_ds_error_locked(flo, dserr);
+ ff_layout_add_ds_error_locked(flo, dserr);
spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
- if (needfree)
- kfree(dserr);
return 0;
}
trace_nfs_getattr_enter(inode);
/* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs_sync_inode(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (err)
goto out;
}
spin_unlock(&inode->i_lock);
trace_nfs_invalidate_mapping_enter(inode);
if (may_lock) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_invalidate_mapping(inode, mapping);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
} else
ret = nfs_invalidate_mapping(inode, mapping);
trace_nfs_invalidate_mapping_exit(inode, ret);
struct nfs_commit_info *cinfo,
u32 ds_commit_idx);
void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+void nfs_request_add_commit_list(struct nfs_page *req,
struct nfs_commit_info *cinfo);
void nfs_request_add_commit_list_locked(struct nfs_page *req,
struct list_head *dst,
if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
return -EOPNOTSUPP;
nfs_wb_all(inode);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0)
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_file_fsync_commit(file, start, end, datasync);
if (!ret)
ret = pnfs_sync_inode(inode, !!datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
if (same_inode) {
- mutex_lock(&src_inode->i_mutex);
+ inode_lock(src_inode);
} else if (dst_inode < src_inode) {
- mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(dst_inode, I_MUTEX_PARENT);
+ inode_lock_nested(src_inode, I_MUTEX_CHILD);
} else {
- mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(src_inode, I_MUTEX_PARENT);
+ inode_lock_nested(dst_inode, I_MUTEX_CHILD);
}
/* flush all pending writes on both src and dst so that server
out_unlock:
if (same_inode) {
- mutex_unlock(&src_inode->i_mutex);
+ inode_unlock(src_inode);
} else if (dst_inode < src_inode) {
- mutex_unlock(&src_inode->i_mutex);
- mutex_unlock(&dst_inode->i_mutex);
+ inode_unlock(src_inode);
+ inode_unlock(dst_inode);
} else {
- mutex_unlock(&dst_inode->i_mutex);
- mutex_unlock(&src_inode->i_mutex);
+ inode_unlock(dst_inode);
+ inode_unlock(src_inode);
}
out:
return ret;
* holding the nfs_page lock.
*/
void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
- struct nfs_commit_info *cinfo)
+nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
spin_lock(cinfo->lock);
- nfs_request_add_commit_list_locked(req, dst, cinfo);
+ nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
spin_unlock(cinfo->lock);
nfs_mark_page_unstable(req->wb_page, cinfo);
}
{
if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
return;
- nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+ nfs_request_add_commit_list(req, cinfo);
}
static void
struct inode *inode = d_inode(resfh->fh_dentry);
int status;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = security_inode_setsecctx(resfh->fh_dentry,
label->data, label->len);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (status)
/*
dir = nn->rec_file->f_path.dentry;
/* lock the parent */
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
if (IS_ERR(dentry)) {
out_put:
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (status == 0) {
if (nn->in_grace) {
crp = nfs4_client_to_reclaim(dname, nn);
}
status = iterate_dir(nn->rec_file, &ctx.ctx);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
if (!status) {
list_del(&entry->list);
kfree(entry);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
nfs4_reset_creds(original_cred);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
dir = nn->rec_file->f_path.dentry;
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
dentry = lookup_one_len(name, dir, namlen);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
out:
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
return status;
}
}
inode = d_inode(dentry);
- mutex_lock_nested(&inode->i_mutex, subclass);
+ inode_lock_nested(inode, subclass);
fill_pre_wcc(fhp);
fhp->fh_locked = true;
}
{
if (fhp->fh_locked) {
fill_post_wcc(fhp);
- mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
+ inode_unlock(d_inode(fhp->fh_dentry));
fhp->fh_locked = false;
}
}
dentry = fhp->fh_dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
host_error = security_inode_setsecctx(dentry, label->data, label->len);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return nfserrno(host_error);
}
#else
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (ret == 1)
ret = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
flags = nilfs_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = NILFS_I(inode)->i_flags;
nilfs_mark_inode_dirty(inode);
ret = nilfs_transaction_commit(inode->i_sb);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
}
err = filemap_write_and_wait_range(vi->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
BUG_ON(!S_ISDIR(vi->i_mode));
/* If the bitmap attribute inode is in memory sync it, too. */
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
return ret;
}
ssize_t written = 0;
ssize_t err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
/* We can write back this queue in page reclaim. */
current->backing_dev_info = inode_to_bdi(vi);
err = ntfs_prepare_file_for_write(iocb, from);
if (iov_iter_count(from) && !err)
written = ntfs_perform_write(file, from, iocb->ki_pos);
current->backing_dev_info = NULL;
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
if (likely(written > 0)) {
err = generic_write_sync(file, iocb->ki_pos, written);
if (err < 0)
err = filemap_write_and_wait_range(vi->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
BUG_ON(S_ISDIR(vi->i_mode));
if (!datasync || !NInoNonResident(NTFS_I(vi)))
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
return ret;
}
ntfs_error(vol->sb, "Quota inodes are not open.");
return false;
}
- mutex_lock(&vol->quota_q_ino->i_mutex);
+ inode_lock(vol->quota_q_ino);
ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
if (!ictx) {
ntfs_error(vol->sb, "Failed to get index context.");
ntfs_index_entry_mark_dirty(ictx);
set_done:
ntfs_index_ctx_put(ictx);
- mutex_unlock(&vol->quota_q_ino->i_mutex);
+ inode_unlock(vol->quota_q_ino);
/*
* We set the flag so we do not try to mark the quotas out of date
* again on remount.
err_out:
if (ictx)
ntfs_index_ctx_put(ictx);
- mutex_unlock(&vol->quota_q_ino->i_mutex);
+ inode_unlock(vol->quota_q_ino);
return false;
}
* Find the inode number for the hibernation file by looking up the
* filename hiberfil.sys in the root directory.
*/
- mutex_lock(&vol->root_ino->i_mutex);
+ inode_lock(vol->root_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
&name);
- mutex_unlock(&vol->root_ino->i_mutex);
+ inode_unlock(vol->root_ino);
if (IS_ERR_MREF(mref)) {
ret = MREF_ERR(mref);
/* If the file does not exist, Windows is not hibernated. */
* Find the inode number for the quota file by looking up the filename
* $Quota in the extended system files directory $Extend.
*/
- mutex_lock(&vol->extend_ino->i_mutex);
+ inode_lock(vol->extend_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
&name);
- mutex_unlock(&vol->extend_ino->i_mutex);
+ inode_unlock(vol->extend_ino);
if (IS_ERR_MREF(mref)) {
/*
* If the file does not exist, quotas are disabled and have
* Find the inode number for the transaction log file by looking up the
* filename $UsnJrnl in the extended system files directory $Extend.
*/
- mutex_lock(&vol->extend_ino->i_mutex);
+ inode_lock(vol->extend_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
&name);
- mutex_unlock(&vol->extend_ino->i_mutex);
+ inode_unlock(vol->extend_ino);
if (IS_ERR_MREF(mref)) {
/*
* If the file does not exist, transaction logging is disabled,
goto bail;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
out_commit:
ocfs2_commit_trans(osb, handle);
out:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
bail:
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
- BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+ BUG_ON(inode_trylock(tl_inode));
start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
- BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+ BUG_ON(inode_trylock(tl_inode));
di = (struct ocfs2_dinode *) tl_bh->b_data;
goto out;
}
- mutex_lock(&data_alloc_inode->i_mutex);
+ inode_lock(data_alloc_inode);
status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
if (status < 0) {
ocfs2_inode_unlock(data_alloc_inode, 1);
out_mutex:
- mutex_unlock(&data_alloc_inode->i_mutex);
+ inode_unlock(data_alloc_inode);
iput(data_alloc_inode);
out:
int status;
struct inode *tl_inode = osb->osb_tl_inode;
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
status = __ocfs2_flush_truncate_log(osb);
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
return status;
}
(unsigned long long)le64_to_cpu(tl_copy->i_blkno),
num_recs);
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
for(i = 0; i < num_recs; i++) {
if (ocfs2_truncate_log_needs_flush(osb)) {
status = __ocfs2_flush_truncate_log(osb);
}
bail_up:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
return status;
}
goto out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret) {
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
out:
while(head) {
handle_t *handle;
int ret = 0;
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
while (head) {
if (ocfs2_truncate_log_needs_flush(osb)) {
}
}
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
while (head) {
/* Premature exit may have left some dangling items. */
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
if (ret < 0) {
ocfs2_inode_unlock(main_bm_inode, 0);
brelse(main_bm_bh);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
return ret;
int ret = 0;
unsigned int truncated_clusters;
- mutex_lock(&osb->osb_tl_inode->i_mutex);
+ inode_lock(osb->osb_tl_inode);
truncated_clusters = osb->truncated_clusters;
- mutex_unlock(&osb->osb_tl_inode->i_mutex);
+ inode_unlock(osb->osb_tl_inode);
/*
* Check whether we can succeed in allocating if we free
mlog_errno(ret);
goto out;
}
- mutex_lock(&dx_alloc_inode->i_mutex);
+ inode_lock(dx_alloc_inode);
ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
if (ret) {
ocfs2_inode_unlock(dx_alloc_inode, 1);
out_mutex:
- mutex_unlock(&dx_alloc_inode->i_mutex);
+ inode_unlock(dx_alloc_inode);
brelse(dx_alloc_bh);
out:
iput(dx_alloc_inode);
unsigned int gen;
int noqueue_attempted = 0;
int dlm_locked = 0;
+ int kick_dc = 0;
if (!(lockres->l_flags & OCFS2_LOCK_INITIALIZED)) {
mlog_errno(-EINVAL);
unlock:
lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+ /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
+ kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
+
spin_unlock_irqrestore(&lockres->l_lock, flags);
+ if (kick_dc)
+ ocfs2_wake_downconvert_thread(osb);
out:
/*
* This is helping work around a lock inversion between the page lock
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* This prevents concurrent writes on other nodes
ocfs2_rw_unlock(inode, 1);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
relock:
/*
ocfs2_rw_unlock(inode, rw_level);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (written)
ret = written;
struct inode *inode = file->f_mapping->host;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_SET:
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret)
return ret;
return offset;
goto bail;
}
- mutex_lock(&inode_alloc_inode->i_mutex);
+ inode_lock(inode_alloc_inode);
status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1);
if (status < 0) {
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
mlog_errno(status);
goto bail;
ocfs2_commit_trans(osb, handle);
bail_unlock:
ocfs2_inode_unlock(inode_alloc_inode, 1);
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
brelse(inode_alloc_bh);
bail:
iput(inode_alloc_inode);
/* Lock the orphan dir. The lock will be held for the entire
* delete_inode operation. We do this now to avoid races with
* recovery completion on other nodes. */
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
mlog_errno(status);
goto bail;
return status;
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
brelse(orphan_dir_bh);
bail:
iput(orphan_dir_inode);
unsigned oldflags;
int status;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = ocfs2_inode_lock(inode, &bh, 1);
if (status < 0) {
bail_unlock:
ocfs2_inode_unlock(inode, 1);
bail:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
brelse(bh);
struct ocfs2_dinode *dinode_alloc = NULL;
if (inode_alloc)
- mutex_lock(&inode_alloc->i_mutex);
+ inode_lock(inode_alloc);
if (o2info_coherent(&fi->ifi_req)) {
status = ocfs2_inode_lock(inode_alloc, &bh, 0);
ocfs2_inode_unlock(inode_alloc, 0);
if (inode_alloc)
- mutex_unlock(&inode_alloc->i_mutex);
+ inode_unlock(inode_alloc);
brelse(bh);
struct ocfs2_dinode *gb_dinode = NULL;
if (gb_inode)
- mutex_lock(&gb_inode->i_mutex);
+ inode_lock(gb_inode);
if (o2info_coherent(&ffg->iff_req)) {
status = ocfs2_inode_lock(gb_inode, &bh, 0);
ocfs2_inode_unlock(gb_inode, 0);
if (gb_inode)
- mutex_unlock(&gb_inode->i_mutex);
+ inode_unlock(gb_inode);
iput(gb_inode);
brelse(bh);
return status;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
if (status < 0) {
mlog_errno(status);
out_cluster:
ocfs2_inode_unlock(orphan_dir_inode, 0);
out:
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
return status;
}
oi->ip_next_orphan = NULL;
if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = ocfs2_rw_lock(inode, 1);
if (ret < 0) {
mlog_errno(ret);
unlock_rw:
ocfs2_rw_unlock(inode, 1);
unlock_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* clear dio flag in ocfs2_inode_info */
oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (status < 0) {
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
goto bail;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = ocfs2_read_inode_block_full(inode, &alloc_bh,
OCFS2_BH_IGNORE_CACHE);
brelse(alloc_bh);
if (inode) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
}
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (status < 0) {
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
brelse(main_bm_bh);
goto bail;
}
- mutex_lock(&local_alloc_inode->i_mutex);
+ inode_lock(local_alloc_inode);
/*
* We must double check state and allocator bits because
status = 0;
bail:
if (status < 0 && local_alloc_inode) {
- mutex_unlock(&local_alloc_inode->i_mutex);
+ inode_unlock(local_alloc_inode);
iput(local_alloc_inode);
}
* context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
*/
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
ocfs2_commit_trans(osb, handle);
out_unlock_mutex:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
if (context->data_ac) {
ocfs2_free_alloc_context(context->data_ac);
goto out;
}
- mutex_lock(&gb_inode->i_mutex);
+ inode_lock(gb_inode);
ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
if (ret) {
goto out_unlock_gb_mutex;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
brelse(gd_bh);
out_unlock_tl_inode:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
ocfs2_inode_unlock(gb_inode, 1);
out_unlock_gb_mutex:
- mutex_unlock(&gb_inode->i_mutex);
+ inode_unlock(gb_inode);
brelse(gb_bh);
iput(gb_inode);
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* This prevents concurrent writes from other nodes
out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return status;
}
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
return ret;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (ret < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
mlog_errno(ret);
if (ret) {
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
}
ocfs2_free_alloc_context(inode_ac);
/* Unroll orphan dir locking */
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
ocfs2_inode_unlock(orphan_dir, 1);
iput(orphan_dir);
}
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
bail_unlock_orphan:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
ocfs2_free_dir_lookup_result(&orphan_insert);
goto bail;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
mlog_errno(status);
goto bail;
bail_unlock_orphan:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
brelse(orphan_dir_bh);
iput(orphan_dir_inode);
goto leave;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
mlog_errno(status);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
goto leave;
}
ocfs2_commit_trans(osb, handle);
orphan_unlock:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
leave:
WARN_ON(bh != oinfo->dqi_gqi_bh);
spin_unlock(&dq_data_lock);
if (ex) {
- mutex_lock(&oinfo->dqi_gqinode->i_mutex);
+ inode_lock(oinfo->dqi_gqinode);
down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
} else {
down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
{
if (ex) {
up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
- mutex_unlock(&oinfo->dqi_gqinode->i_mutex);
+ inode_unlock(oinfo->dqi_gqinode);
} else {
up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
}
mlog_errno(ret);
goto out;
}
- mutex_lock(&alloc_inode->i_mutex);
+ inode_lock(alloc_inode);
ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
if (ret) {
}
out_mutex:
if (alloc_inode) {
- mutex_unlock(&alloc_inode->i_mutex);
+ inode_unlock(alloc_inode);
iput(alloc_inode);
}
out:
goto out;
}
- mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(new_inode, I_MUTEX_CHILD);
ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
OI_LS_REFLINK_TARGET);
if (ret) {
ocfs2_inode_unlock(new_inode, 1);
brelse(new_bh);
out_unlock:
- mutex_unlock(&new_inode->i_mutex);
+ inode_unlock(new_inode);
out:
if (!ret) {
ret = filemap_fdatawait(inode->i_mapping);
return error;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = dquot_initialize(dir);
if (!error)
error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error)
fsnotify_create(dir, new_dentry);
return error;
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (ret < 0) {
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (ret < 0) {
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
if (ac->ac_which != OCFS2_AC_USE_LOCAL)
ocfs2_inode_unlock(inode, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
ac->ac_inode = NULL;
return -EINVAL;
}
- mutex_lock(&alloc_inode->i_mutex);
+ inode_lock(alloc_inode);
status = ocfs2_inode_lock(alloc_inode, &bh, 1);
if (status < 0) {
- mutex_unlock(&alloc_inode->i_mutex);
+ inode_unlock(alloc_inode);
iput(alloc_inode);
mlog_errno(status);
goto bail;
}
- mutex_lock(&inode_alloc_inode->i_mutex);
+ inode_lock(inode_alloc_inode);
status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
if (status < 0) {
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
iput(inode_alloc_inode);
mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
(u32)suballoc_slot, status);
mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
ocfs2_inode_unlock(inode_alloc_inode, 0);
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
iput(inode_alloc_inode);
brelse(alloc_bh);
mlog_errno(ret);
goto out;
}
- mutex_lock(&xb_alloc_inode->i_mutex);
+ inode_lock(xb_alloc_inode);
ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
if (ret < 0) {
ocfs2_inode_unlock(xb_alloc_inode, 1);
brelse(xb_alloc_bh);
out_mutex:
- mutex_unlock(&xb_alloc_inode->i_mutex);
+ inode_unlock(xb_alloc_inode);
iput(xb_alloc_inode);
out:
brelse(blk_bh);
}
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
if (ret < 0) {
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
mlog_errno(ret);
goto cleanup;
}
}
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
&xbs, &ctxt, ref_meta, &credits);
return ret;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
out:
ocfs2_schedule_truncate_log_flush(osb, 1);
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
if (ret)
newattrs.ia_valid |= ret | ATTR_FORCE;
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
/* Note any delegations or leases have already been broken: */
ret = notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return ret;
}
if (error)
return error;
retry_deleg:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_path_chmod(path, mode);
if (error)
goto out_unlock;
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
error = notify_change(path->dentry, &newattrs, &delegated_inode);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_path_chown(path, uid, gid);
if (!error)
error = notify_change(path->dentry, &newattrs, &delegated_inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
if (err)
goto out_cleanup;
- mutex_lock(&newdentry->d_inode->i_mutex);
+ inode_lock(newdentry->d_inode);
err = ovl_set_attr(newdentry, stat);
- mutex_unlock(&newdentry->d_inode->i_mutex);
+ inode_unlock(newdentry->d_inode);
if (err)
goto out_cleanup;
struct dentry *newdentry;
int err;
- mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(udir, I_MUTEX_PARENT);
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(newdentry);
out_dput:
dput(newdentry);
out_unlock:
- mutex_unlock(&udir->i_mutex);
+ inode_unlock(udir);
return err;
}
if (err)
goto out_cleanup;
- mutex_lock(&opaquedir->d_inode->i_mutex);
+ inode_lock(opaquedir->d_inode);
err = ovl_set_attr(opaquedir, &stat);
- mutex_unlock(&opaquedir->d_inode->i_mutex);
+ inode_unlock(opaquedir->d_inode);
if (err)
goto out_cleanup;
struct dentry *upper = ovl_dentry_upper(dentry);
int err;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
err = -ESTALE;
if (upper->d_parent == upperdir) {
/* Don't let d_delete() think it can reset d_inode */
* now.
*/
d_drop(dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return err;
}
if (!err) {
upperdentry = ovl_dentry_upper(dentry);
- mutex_lock(&upperdentry->d_inode->i_mutex);
+ inode_lock(upperdentry->d_inode);
err = notify_change(upperdentry, attr, NULL);
- mutex_unlock(&upperdentry->d_inode->i_mutex);
+ inode_unlock(upperdentry->d_inode);
}
ovl_drop_write(dentry);
out:
dput(dentry);
}
}
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
}
revert_creds(old_cred);
put_cred(override_cred);
loff_t res;
struct ovl_dir_file *od = file->private_data;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
if (!file->f_pos)
ovl_dir_reset(file);
res = offset;
}
out_unlock:
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return res;
}
ovl_path_upper(dentry, &upperpath);
realfile = ovl_path_open(&upperpath, O_RDONLY);
smp_mb__before_spinlock();
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (!od->upperfile) {
if (IS_ERR(realfile)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return PTR_ERR(realfile);
}
od->upperfile = realfile;
fput(realfile);
realfile = od->upperfile;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
}
struct ovl_dir_file *od = file->private_data;
if (od->cache) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ovl_cache_put(od, file->f_path.dentry);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
fput(od->realfile);
if (od->upperfile)
{
struct ovl_cache_entry *p;
- mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
list_for_each_entry(p, list, l_node) {
struct dentry *dentry;
ovl_cleanup(upper->d_inode, dentry);
dput(dentry);
}
- mutex_unlock(&upper->d_inode->i_mutex);
+ inode_unlock(upper->d_inode);
}
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
WARN_ON(oe->__upperdentry);
BUG_ON(!upperdentry->d_inode);
/*
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(dentry->d_inode));
oe->version++;
}
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(dentry->d_inode));
return oe->version;
}
{
struct dentry *dentry;
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
dentry = lookup_one_len(name->name, dir, name->len);
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
if (IS_ERR(dentry)) {
if (PTR_ERR(dentry) == -ENOENT)
if (err)
return ERR_PTR(err);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
retry:
work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
strlen(OVL_WORKDIR_NAME));
goto out_dput;
}
out_unlock:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
mnt_drop_write(mnt);
return work;
*/
unsigned int pipe_min_size = PAGE_SIZE;
+/* Maximum allocatable pages per user. Hard limit is unset by default, soft
+ * matches default values.
+ */
+unsigned long pipe_user_pages_hard;
+unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+
/*
* We use a start+len construction, which provides full use of the
* allocated memory.
return retval;
}
+static void account_pipe_buffers(struct pipe_inode_info *pipe,
+ unsigned long old, unsigned long new)
+{
+ atomic_long_add(new - old, &pipe->user->pipe_bufs);
+}
+
+static bool too_many_pipe_buffers_soft(struct user_struct *user)
+{
+ return pipe_user_pages_soft &&
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
+}
+
+static bool too_many_pipe_buffers_hard(struct user_struct *user)
+{
+ return pipe_user_pages_hard &&
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
+}
+
struct pipe_inode_info *alloc_pipe_info(void)
{
struct pipe_inode_info *pipe;
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
if (pipe) {
- pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+ unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
+ struct user_struct *user = get_current_user();
+
+ if (!too_many_pipe_buffers_hard(user)) {
+ if (too_many_pipe_buffers_soft(user))
+ pipe_bufs = 1;
+ pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+ }
+
if (pipe->bufs) {
init_waitqueue_head(&pipe->wait);
pipe->r_counter = pipe->w_counter = 1;
- pipe->buffers = PIPE_DEF_BUFFERS;
+ pipe->buffers = pipe_bufs;
+ pipe->user = user;
+ account_pipe_buffers(pipe, 0, pipe_bufs);
mutex_init(&pipe->mutex);
return pipe;
}
+ free_uid(user);
kfree(pipe);
}
{
int i;
+ account_pipe_buffers(pipe, pipe->buffers, 0);
+ free_uid(pipe->user);
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
}
+ account_pipe_buffers(pipe, pipe->buffers, nr_pages);
pipe->curbuf = 0;
kfree(pipe->bufs);
pipe->bufs = bufs;
if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
ret = -EPERM;
goto out;
+ } else if ((too_many_pipe_buffers_hard(pipe->user) ||
+ too_many_pipe_buffers_soft(pipe->user)) &&
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out;
}
ret = pipe_set_size(pipe, nr_pages);
break;
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, proc_root_kcore->size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
struct pid_namespace *ns = s->s_fs_info;
struct dentry *self;
- mutex_lock(&root_inode->i_mutex);
+ inode_lock(root_inode);
self = d_alloc_name(s->s_root, "self");
if (self) {
struct inode *inode = new_inode_pseudo(s);
} else {
self = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&root_inode->i_mutex);
+ inode_unlock(root_inode);
if (IS_ERR(self)) {
pr_err("proc_fill_super: can't allocate /proc/self\n");
return PTR_ERR(self);
pte_t *pte;
spinlock_t *ptl;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
smaps_pmd_entry(pmd, addr, walk);
spin_unlock(ptl);
return 0;
spinlock_t *ptl;
struct page *page;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty_pmd(vma, addr, pmd);
goto out;
int err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_trans_huge_lock(pmdp, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmdp, vma);
+ if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
pte_t *orig_pte;
pte_t *pte;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
struct pid_namespace *ns = s->s_fs_info;
struct dentry *thread_self;
- mutex_lock(&root_inode->i_mutex);
+ inode_lock(root_inode);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
struct inode *inode = new_inode_pseudo(s);
} else {
thread_self = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&root_inode->i_mutex);
+ inode_unlock(root_inode);
if (IS_ERR(thread_self)) {
pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
return PTR_ERR(thread_self);
break;
}
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry)
list_add(&private->list, &allpstore);
spin_unlock_irqrestore(&allpstore_lock, flags);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return 0;
fail_lockedalloc:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
kfree(private);
fail_alloc:
iput(inode);
continue;
if (!sb_has_quota_active(sb, cnt))
continue;
- mutex_lock(&dqopt->files[cnt]->i_mutex);
+ inode_lock(dqopt->files[cnt]);
truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
- mutex_unlock(&dqopt->files[cnt]->i_mutex);
+ inode_unlock(dqopt->files[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
/* If quota was reenabled in the meantime, we have
* nothing to do */
if (!sb_has_quota_loaded(sb, cnt)) {
- mutex_lock(&toputinode[cnt]->i_mutex);
+ inode_lock(toputinode[cnt]);
toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
S_NOATIME | S_NOQUOTA);
truncate_inode_pages(&toputinode[cnt]->i_data,
0);
- mutex_unlock(&toputinode[cnt]->i_mutex);
+ inode_unlock(toputinode[cnt]);
mark_inode_dirty_sync(toputinode[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
/* We don't want quota and atime on quota files (deadlocks
* possible) Also nobody should write to the file - we use
* special IO operations which ignore the immutable bit. */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
S_NOQUOTA);
inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* When S_NOQUOTA is set, remove dquot references as no more
* references can be added
iput(inode);
out_lock:
if (oldflags != -1) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Set the flags back (in the case of accidental quotaon()
* on a wrong file we don't want to mess up the flags) */
inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
inode->i_flags |= oldflags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
mutex_unlock(&dqopt->dqonoff_mutex);
out_fmt:
struct dentry *dentry;
int error;
- mutex_lock(&d_inode(sb->s_root)->i_mutex);
+ inode_lock(d_inode(sb->s_root));
dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
- mutex_unlock(&d_inode(sb->s_root)->i_mutex);
+ inode_unlock(d_inode(sb->s_root));
if (IS_ERR(dentry))
return PTR_ERR(dentry);
struct inode *inode = file_inode(file);
loff_t retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_END:
offset += i_size_read(inode);
retval = offset;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
EXPORT_SYMBOL(default_llseek);
mnt_drop_write_file(dst_file);
next_loop:
fdput(dst_fd);
+
+ if (fatal_signal_pending(current))
+ goto out;
}
out:
fsnotify_access(file);
file_accessed(file);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return res;
}
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
reiserfs_write_lock(inode->i_sb);
err = reiserfs_commit_for_inode(inode);
reiserfs_write_unlock(inode->i_sb);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (err < 0)
return err;
return 0;
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
BUG_ON(!S_ISREG(inode->i_mode));
err = sync_mapping_buffers(inode->i_mapping);
reiserfs_write_lock(inode->i_sb);
reiserfs_write_unlock(inode->i_sb);
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (barrier_done < 0)
return barrier_done;
return (err < 0) ? -EIO : 0;
page_cache_release(page);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
reiserfs_write_unlock(inode->i_sb);
return retval;
}
pathrelse(&path);
inode = reiserfs_iget(s, &obj_key);
- if (!inode) {
+ if (IS_ERR_OR_NULL(inode)) {
/*
* the unlink almost completed, it just did not
* manage to remove "save" link and release objectid
#ifdef CONFIG_REISERFS_FS_XATTR
static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
{
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
return dir->i_op->create(dir, dentry, mode, true);
}
#endif
static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
return dir->i_op->mkdir(dir, dentry, mode);
}
{
int error;
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
error = dir->i_op->unlink(dir, dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (!error)
d_delete(dentry);
{
int error;
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
error = dir->i_op->rmdir(dir, dentry);
if (!error)
d_inode(dentry)->i_flags |= S_DEAD;
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (!error)
d_delete(dentry);
if (d_really_is_negative(privroot))
return ERR_PTR(-ENODATA);
- mutex_lock_nested(&d_inode(privroot)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR);
xaroot = dget(REISERFS_SB(sb)->xattr_root);
if (!xaroot)
}
}
- mutex_unlock(&d_inode(privroot)->i_mutex);
+ inode_unlock(d_inode(privroot));
return xaroot;
}
le32_to_cpu(INODE_PKEY(inode)->k_objectid),
inode->i_generation);
- mutex_lock_nested(&d_inode(xaroot)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xaroot), I_MUTEX_XATTR);
xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
if (!IS_ERR(xadir) && d_really_is_negative(xadir)) {
}
}
- mutex_unlock(&d_inode(xaroot)->i_mutex);
+ inode_unlock(d_inode(xaroot));
dput(xaroot);
return xadir;
}
container_of(ctx, struct reiserfs_dentry_buf, ctx);
struct dentry *dentry;
- WARN_ON_ONCE(!mutex_is_locked(&d_inode(dbuf->xadir)->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(d_inode(dbuf->xadir)));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
return -ENOSPC;
goto out_dir;
}
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
buf.xadir = dir;
while (1) {
break;
buf.count = 0;
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
cleanup_dentry_buf(&buf);
if (!err) {
int jerror;
- mutex_lock_nested(&d_inode(dir->d_parent)->i_mutex,
+ inode_lock_nested(d_inode(dir->d_parent),
I_MUTEX_XATTR);
err = action(dir, data);
reiserfs_write_lock(inode->i_sb);
jerror = journal_end(&th);
reiserfs_write_unlock(inode->i_sb);
- mutex_unlock(&d_inode(dir->d_parent)->i_mutex);
+ inode_unlock(d_inode(dir->d_parent));
err = jerror ?: err;
}
}
if (IS_ERR(xadir))
return ERR_CAST(xadir);
- mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
xafile = lookup_one_len(name, xadir, strlen(name));
if (IS_ERR(xafile)) {
err = PTR_ERR(xafile);
if (err)
dput(xafile);
out:
- mutex_unlock(&d_inode(xadir)->i_mutex);
+ inode_unlock(d_inode(xadir));
dput(xadir);
if (err)
return ERR_PTR(err);
if (IS_ERR(xadir))
return PTR_ERR(xadir);
- mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
dentry = lookup_one_len(name, xadir, strlen(name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
dput(dentry);
out_dput:
- mutex_unlock(&d_inode(xadir)->i_mutex);
+ inode_unlock(d_inode(xadir));
dput(xadir);
return err;
}
.ia_valid = ATTR_SIZE | ATTR_CTIME,
};
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
inode_dio_wait(d_inode(dentry));
err = reiserfs_setattr(dentry, &newattrs);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
} else
update_ctime(inode);
out_unlock:
goto out;
}
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (!err)
err = buf.pos;
int err;
struct inode *inode = d_inode(dentry->d_parent);
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
err = xattr_mkdir(inode, dentry, 0700);
if (err || d_really_is_negative(dentry)) {
int err = 0;
/* If we don't have the privroot located yet - go find it */
- mutex_lock(&d_inode(s->s_root)->i_mutex);
+ inode_lock(d_inode(s->s_root));
dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
d_inode(dentry)->i_flags |= S_PRIVATE;
} else
err = PTR_ERR(dentry);
- mutex_unlock(&d_inode(s->s_root)->i_mutex);
+ inode_unlock(d_inode(s->s_root));
return err;
}
goto error;
if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
- mutex_lock(&d_inode(s->s_root)->i_mutex);
+ inode_lock(d_inode(s->s_root));
err = create_privroot(REISERFS_SB(s)->priv_root);
- mutex_unlock(&d_inode(s->s_root)->i_mutex);
+ inode_unlock(d_inode(s->s_root));
}
if (d_really_is_positive(privroot)) {
s->s_xattr = reiserfs_xattr_handlers;
- mutex_lock(&d_inode(privroot)->i_mutex);
+ inode_lock(d_inode(privroot));
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
else
err = PTR_ERR(dentry);
}
- mutex_unlock(&d_inode(privroot)->i_mutex);
+ inode_unlock(d_inode(privroot));
}
error:
* the files within the tracefs system. It is up to the individual
* mkdir routine to handle races.
*/
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = tracefs_ops.mkdir(name);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
kfree(name);
* This time we need to unlock not only the parent (inode) but
* also the directory that is being deleted.
*/
- mutex_unlock(&inode->i_mutex);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(inode);
+ inode_unlock(dentry->d_inode);
ret = tracefs_ops.rmdir(name);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock_nested(inode, I_MUTEX_PARENT);
+ inode_lock(dentry->d_inode);
kfree(name);
if (!parent)
parent = tracefs_mount->mnt_root;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry) && dentry->d_inode) {
dput(dentry);
}
if (IS_ERR(dentry)) {
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
}
static struct dentry *failed_creating(struct dentry *dentry)
{
- mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ inode_unlock(dentry->d_parent->d_inode);
dput(dentry);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
return NULL;
static struct dentry *end_creating(struct dentry *dentry)
{
- mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ inode_unlock(dentry->d_parent->d_inode);
return dentry;
}
if (!parent || !parent->d_inode)
return;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
ret = __tracefs_remove(dentry, parent);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (!ret)
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
}
parent = dentry;
down:
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
loop:
/*
* The parent->d_subdirs is protected by the d_lock. Outside that
/* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
spin_unlock(&parent->d_lock);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
parent = child;
goto down;
}
}
spin_unlock(&parent->d_lock);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
child = parent;
parent = parent->d_parent;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
if (child != dentry)
/* go up */
if (!__tracefs_remove(child, parent))
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
}
/**
dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = dbg_check_synced_i_size(c, inode);
if (err)
dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
inode->i_ino, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = check_dir_empty(c, d_inode(dentry));
if (err)
return err;
dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
old_dentry, old_inode->i_ino, old_dir->i_ino,
new_dentry, new_dir->i_ino);
- ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
- ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
+ ubifs_assert(inode_is_locked(old_dir));
+ ubifs_assert(inode_is_locked(new_dir));
if (unlink)
- ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
+ ubifs_assert(inode_is_locked(new_inode));
if (unlink && is_dir) {
err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Synchronize the inode unless this is a 'datasync()' call. */
if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
*/
err = ubifs_sync_wbufs_by_inode(c, inode);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
union ubifs_key key;
int err, type;
- ubifs_assert(mutex_is_locked(&host->i_mutex));
+ ubifs_assert(inode_is_locked(host));
if (size > UBIFS_MAX_INO_DATA)
return -ERANGE;
dbg_gen("xattr '%s', ino %lu ('%pd')", name,
host->i_ino, dentry);
- ubifs_assert(mutex_is_locked(&host->i_mutex));
+ ubifs_assert(inode_is_locked(host));
err = check_namespace(&nm);
if (err < 0)
struct udf_inode_info *iinfo = UDF_I(inode);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = generic_write_checks(iocb, from);
if (retval <= 0)
(udf_file_entry_alloc_offset(inode) + end)) {
err = udf_expand_file_adinicb(inode);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
udf_debug("udf_expand_adinicb: err=%d\n", err);
return err;
}
retval = __generic_file_write_iter(iocb, from);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (retval > 0) {
mark_inode_dirty(inode);
* Grab i_mutex to avoid races with writes changing i_size
* while we are running.
*/
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
down_write(&UDF_I(inode)->i_data_sem);
udf_discard_prealloc(inode);
udf_truncate_tail_extent(inode);
up_write(&UDF_I(inode)->i_data_sem);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
.nr_to_write = 1,
};
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
if (!iinfo->i_lenAlloc) {
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
{
int i;
int nr_groups = bitmap->s_nr_groups;
- int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
- nr_groups);
for (i = 0; i < nr_groups; i++)
if (bitmap->s_block_bitmap[i])
brelse(bitmap->s_block_bitmap[i]);
- if (size <= PAGE_SIZE)
- kfree(bitmap);
- else
- vfree(bitmap);
+ kvfree(bitmap);
}
static void udf_free_partition(struct udf_part_map *map)
}
}
retry_deleg:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = notify_change(path->dentry, &newattrs, &delegated_inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_inode_setxattr(dentry, name, value, size, flags);
if (error)
goto out;
error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
EXPORT_SYMBOL_GPL(vfs_setxattr);
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_inode_removexattr(dentry, name);
if (error)
goto out;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
EXPORT_SYMBOL_GPL(vfs_removexattr);
/*
* Values for di_flags
- * There should be a one-to-one correspondence between these flags and the
- * XFS_XFLAG_s.
*/
#define XFS_DIFLAG_REALTIME_BIT 0 /* file's blocks come from rt area */
#define XFS_DIFLAG_PREALLOC_BIT 1 /* file space has been preallocated */
XFS_DIFLAG_PROJINHERIT | XFS_DIFLAG_NOSYMLINKS | XFS_DIFLAG_EXTSIZE | \
XFS_DIFLAG_EXTSZINHERIT | XFS_DIFLAG_NODEFRAG | XFS_DIFLAG_FILESTREAM)
+/*
+ * Values for di_flags2 These start by being exposed to userspace in the upper
+ * 16 bits of the XFS_XFLAG_s range.
+ */
+#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
+#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
+
+#define XFS_DIFLAG2_ANY (XFS_DIFLAG2_DAX)
+
/*
* Inode number format:
* low inopblog bits - offset in block
};
#endif
-/*
- * Structure for XFS_IOC_FSGETXATTR[A] and XFS_IOC_FSSETXATTR.
- */
-#ifndef HAVE_FSXATTR
-struct fsxattr {
- __u32 fsx_xflags; /* xflags field value (get/set) */
- __u32 fsx_extsize; /* extsize field value (get/set)*/
- __u32 fsx_nextents; /* nextents field value (get) */
- __u32 fsx_projid; /* project identifier (get/set) */
- unsigned char fsx_pad[12];
-};
-#endif
-
-/*
- * Flags for the bs_xflags/fsx_xflags field
- * There should be a one-to-one correspondence between these flags and the
- * XFS_DIFLAG_s.
- */
-#define XFS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
-#define XFS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
-#define XFS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
-#define XFS_XFLAG_APPEND 0x00000010 /* all writes append */
-#define XFS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
-#define XFS_XFLAG_NOATIME 0x00000040 /* do not update access time */
-#define XFS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
-#define XFS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
-#define XFS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
-#define XFS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
-#define XFS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
-#define XFS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
-#define XFS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
-#define XFS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
-#define XFS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
-
/*
* Structure for XFS_IOC_GETBMAP.
* On input, fill in bmv_offset and bmv_length of the first structure
#define XFS_IOC_ALLOCSP _IOW ('X', 10, struct xfs_flock64)
#define XFS_IOC_FREESP _IOW ('X', 11, struct xfs_flock64)
#define XFS_IOC_DIOINFO _IOR ('X', 30, struct dioattr)
-#define XFS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
-#define XFS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr)
+#define XFS_IOC_FSGETXATTR FS_IOC_FSGETXATTR
+#define XFS_IOC_FSSETXATTR FS_IOC_FSSETXATTR
#define XFS_IOC_ALLOCSP64 _IOW ('X', 36, struct xfs_flock64)
#define XFS_IOC_FREESP64 _IOW ('X', 37, struct xfs_flock64)
#define XFS_IOC_GETBMAP _IOWR('X', 38, struct getbmap)
LIST_HEAD(dispose);
int loop = 0;
+ /*
+ * We need to flush the buffer workqueue to ensure that all IO
+ * completion processing is 100% done. Just waiting on buffer locks is
+ * not sufficient for async IO as the reference count held over IO is
+ * not released until after the buffer lock is dropped. Hence we need to
+ * ensure here that all reference counts have been dropped before we
+ * start walking the LRU list.
+ */
+ drain_workqueue(btp->bt_mount->m_buf_workqueue);
+
/* loop until there is nothing left on the lru list. */
while (list_lru_count(&btp->bt_lru)) {
list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
int type)
{
if (type & XFS_IOLOCK_EXCL)
- mutex_lock(&VFS_I(ip)->i_mutex);
+ inode_lock(VFS_I(ip));
xfs_ilock(ip, type);
}
{
xfs_iunlock(ip, type);
if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
+ inode_unlock(VFS_I(ip));
}
static inline void
{
xfs_ilock_demote(ip, type);
if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
+ inode_unlock(VFS_I(ip));
}
/*
/*
* pfn_mkwrite was originally inteneded to ensure we capture time stamp
* updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
*/
static int
xfs_filemap_pfn_mkwrite(
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else if (IS_DAX(inode))
+ ret = dax_pfn_mkwrite(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
STATIC uint
_xfs_dic2xflags(
- __uint16_t di_flags)
+ __uint16_t di_flags,
+ uint64_t di_flags2,
+ bool has_attr)
{
uint flags = 0;
if (di_flags & XFS_DIFLAG_ANY) {
if (di_flags & XFS_DIFLAG_REALTIME)
- flags |= XFS_XFLAG_REALTIME;
+ flags |= FS_XFLAG_REALTIME;
if (di_flags & XFS_DIFLAG_PREALLOC)
- flags |= XFS_XFLAG_PREALLOC;
+ flags |= FS_XFLAG_PREALLOC;
if (di_flags & XFS_DIFLAG_IMMUTABLE)
- flags |= XFS_XFLAG_IMMUTABLE;
+ flags |= FS_XFLAG_IMMUTABLE;
if (di_flags & XFS_DIFLAG_APPEND)
- flags |= XFS_XFLAG_APPEND;
+ flags |= FS_XFLAG_APPEND;
if (di_flags & XFS_DIFLAG_SYNC)
- flags |= XFS_XFLAG_SYNC;
+ flags |= FS_XFLAG_SYNC;
if (di_flags & XFS_DIFLAG_NOATIME)
- flags |= XFS_XFLAG_NOATIME;
+ flags |= FS_XFLAG_NOATIME;
if (di_flags & XFS_DIFLAG_NODUMP)
- flags |= XFS_XFLAG_NODUMP;
+ flags |= FS_XFLAG_NODUMP;
if (di_flags & XFS_DIFLAG_RTINHERIT)
- flags |= XFS_XFLAG_RTINHERIT;
+ flags |= FS_XFLAG_RTINHERIT;
if (di_flags & XFS_DIFLAG_PROJINHERIT)
- flags |= XFS_XFLAG_PROJINHERIT;
+ flags |= FS_XFLAG_PROJINHERIT;
if (di_flags & XFS_DIFLAG_NOSYMLINKS)
- flags |= XFS_XFLAG_NOSYMLINKS;
+ flags |= FS_XFLAG_NOSYMLINKS;
if (di_flags & XFS_DIFLAG_EXTSIZE)
- flags |= XFS_XFLAG_EXTSIZE;
+ flags |= FS_XFLAG_EXTSIZE;
if (di_flags & XFS_DIFLAG_EXTSZINHERIT)
- flags |= XFS_XFLAG_EXTSZINHERIT;
+ flags |= FS_XFLAG_EXTSZINHERIT;
if (di_flags & XFS_DIFLAG_NODEFRAG)
- flags |= XFS_XFLAG_NODEFRAG;
+ flags |= FS_XFLAG_NODEFRAG;
if (di_flags & XFS_DIFLAG_FILESTREAM)
- flags |= XFS_XFLAG_FILESTREAM;
+ flags |= FS_XFLAG_FILESTREAM;
}
+ if (di_flags2 & XFS_DIFLAG2_ANY) {
+ if (di_flags2 & XFS_DIFLAG2_DAX)
+ flags |= FS_XFLAG_DAX;
+ }
+
+ if (has_attr)
+ flags |= FS_XFLAG_HASATTR;
+
return flags;
}
uint
xfs_ip2xflags(
- xfs_inode_t *ip)
+ struct xfs_inode *ip)
{
- xfs_icdinode_t *dic = &ip->i_d;
+ struct xfs_icdinode *dic = &ip->i_d;
- return _xfs_dic2xflags(dic->di_flags) |
- (XFS_IFORK_Q(ip) ? XFS_XFLAG_HASATTR : 0);
+ return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
}
uint
xfs_dic2xflags(
- xfs_dinode_t *dip)
+ struct xfs_dinode *dip)
{
- return _xfs_dic2xflags(be16_to_cpu(dip->di_flags)) |
- (XFS_DFORK_Q(dip) ? XFS_XFLAG_HASATTR : 0);
+ return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
+ be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
}
/*
case S_IFREG:
case S_IFDIR:
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
- uint di_flags = 0;
+ uint64_t di_flags2 = 0;
+ uint di_flags = 0;
if (S_ISDIR(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_NODEFRAG;
if (pip->i_d.di_flags & XFS_DIFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+
ip->i_d.di_flags |= di_flags;
+ ip->i_d.di_flags2 |= di_flags2;
}
/* FALLTHROUGH */
case S_IFLNK:
unsigned int xflags = start;
if (flags & FS_IMMUTABLE_FL)
- xflags |= XFS_XFLAG_IMMUTABLE;
+ xflags |= FS_XFLAG_IMMUTABLE;
else
- xflags &= ~XFS_XFLAG_IMMUTABLE;
+ xflags &= ~FS_XFLAG_IMMUTABLE;
if (flags & FS_APPEND_FL)
- xflags |= XFS_XFLAG_APPEND;
+ xflags |= FS_XFLAG_APPEND;
else
- xflags &= ~XFS_XFLAG_APPEND;
+ xflags &= ~FS_XFLAG_APPEND;
if (flags & FS_SYNC_FL)
- xflags |= XFS_XFLAG_SYNC;
+ xflags |= FS_XFLAG_SYNC;
else
- xflags &= ~XFS_XFLAG_SYNC;
+ xflags &= ~FS_XFLAG_SYNC;
if (flags & FS_NOATIME_FL)
- xflags |= XFS_XFLAG_NOATIME;
+ xflags |= FS_XFLAG_NOATIME;
else
- xflags &= ~XFS_XFLAG_NOATIME;
+ xflags &= ~FS_XFLAG_NOATIME;
if (flags & FS_NODUMP_FL)
- xflags |= XFS_XFLAG_NODUMP;
+ xflags |= FS_XFLAG_NODUMP;
else
- xflags &= ~XFS_XFLAG_NODUMP;
+ xflags &= ~FS_XFLAG_NODUMP;
return xflags;
}
unsigned int xflags)
{
unsigned int di_flags;
+ uint64_t di_flags2;
/* can't set PREALLOC this way, just preserve it */
di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
- if (xflags & XFS_XFLAG_IMMUTABLE)
+ if (xflags & FS_XFLAG_IMMUTABLE)
di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
+ if (xflags & FS_XFLAG_APPEND)
di_flags |= XFS_DIFLAG_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
+ if (xflags & FS_XFLAG_SYNC)
di_flags |= XFS_DIFLAG_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
+ if (xflags & FS_XFLAG_NOATIME)
di_flags |= XFS_DIFLAG_NOATIME;
- if (xflags & XFS_XFLAG_NODUMP)
+ if (xflags & FS_XFLAG_NODUMP)
di_flags |= XFS_DIFLAG_NODUMP;
- if (xflags & XFS_XFLAG_NODEFRAG)
+ if (xflags & FS_XFLAG_NODEFRAG)
di_flags |= XFS_DIFLAG_NODEFRAG;
- if (xflags & XFS_XFLAG_FILESTREAM)
+ if (xflags & FS_XFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
if (S_ISDIR(ip->i_d.di_mode)) {
- if (xflags & XFS_XFLAG_RTINHERIT)
+ if (xflags & FS_XFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
- if (xflags & XFS_XFLAG_NOSYMLINKS)
+ if (xflags & FS_XFLAG_NOSYMLINKS)
di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (xflags & XFS_XFLAG_EXTSZINHERIT)
+ if (xflags & FS_XFLAG_EXTSZINHERIT)
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- if (xflags & XFS_XFLAG_PROJINHERIT)
+ if (xflags & FS_XFLAG_PROJINHERIT)
di_flags |= XFS_DIFLAG_PROJINHERIT;
} else if (S_ISREG(ip->i_d.di_mode)) {
- if (xflags & XFS_XFLAG_REALTIME)
+ if (xflags & FS_XFLAG_REALTIME)
di_flags |= XFS_DIFLAG_REALTIME;
- if (xflags & XFS_XFLAG_EXTSIZE)
+ if (xflags & FS_XFLAG_EXTSIZE)
di_flags |= XFS_DIFLAG_EXTSIZE;
}
-
ip->i_d.di_flags = di_flags;
+
+ /* diflags2 only valid for v3 inodes. */
+ if (ip->i_d.di_version < 3)
+ return;
+
+ di_flags2 = 0;
+ if (xflags & FS_XFLAG_DAX)
+ di_flags2 |= XFS_DIFLAG2_DAX;
+
+ ip->i_d.di_flags2 = di_flags2;
+
}
STATIC void
struct inode *inode = VFS_I(ip);
unsigned int xflags = xfs_ip2xflags(ip);
- if (xflags & XFS_XFLAG_IMMUTABLE)
+ if (xflags & FS_XFLAG_IMMUTABLE)
inode->i_flags |= S_IMMUTABLE;
else
inode->i_flags &= ~S_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
+ if (xflags & FS_XFLAG_APPEND)
inode->i_flags |= S_APPEND;
else
inode->i_flags &= ~S_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
+ if (xflags & FS_XFLAG_SYNC)
inode->i_flags |= S_SYNC;
else
inode->i_flags &= ~S_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
+ if (xflags & FS_XFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
else
inode->i_flags &= ~S_NOATIME;
+ if (xflags & FS_XFLAG_DAX)
+ inode->i_flags |= S_DAX;
+ else
+ inode->i_flags &= ~S_DAX;
+
}
static int
/* Can't change realtime flag if any extents are allocated. */
if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
- XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & XFS_XFLAG_REALTIME))
+ XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME))
return -EINVAL;
/* If realtime flag is set then must have realtime device */
- if (fa->fsx_xflags & XFS_XFLAG_REALTIME) {
+ if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
(ip->i_d.di_extsize % mp->m_sb.sb_rextsize))
return -EINVAL;
* we have appropriate permission.
*/
if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
- (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
+ (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
* extent size hint validation is somewhat cumbersome. Rules are:
*
* 1. extent size hint is only valid for directories and regular files
- * 2. XFS_XFLAG_EXTSIZE is only valid for regular files
- * 3. XFS_XFLAG_EXTSZINHERIT is only valid for directories.
+ * 2. FS_XFLAG_EXTSIZE is only valid for regular files
+ * 3. FS_XFLAG_EXTSZINHERIT is only valid for directories.
* 4. can only be changed on regular files if no extents are allocated
* 5. can be changed on directories at any time
* 6. extsize hint of 0 turns off hints, clears inode flags.
{
struct xfs_mount *mp = ip->i_mount;
- if ((fa->fsx_xflags & XFS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
return -EINVAL;
- if ((fa->fsx_xflags & XFS_XFLAG_EXTSZINHERIT) &&
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
!S_ISDIR(ip->i_d.di_mode))
return -EINVAL;
return -EINVAL;
if (XFS_IS_REALTIME_INODE(ip) ||
- (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
+ (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
} else {
size = mp->m_sb.sb_blocksize;
if (fa->fsx_extsize % size)
return -EINVAL;
} else
- fa->fsx_xflags &= ~(XFS_XFLAG_EXTSIZE | XFS_XFLAG_EXTSZINHERIT);
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
return 0;
}
if (xfs_get_projid(ip) != fa->fsx_projid)
return -EINVAL;
- if ((fa->fsx_xflags & XFS_XFLAG_PROJINHERIT) !=
+ if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
return -EINVAL;
inode->i_flags |= S_SYNC;
if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
- /* XXX: Also needs an on-disk per inode flag! */
- if (ip->i_mount->m_flags & XFS_MOUNT_DAX)
+ if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+ ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
inode->i_flags |= S_DAX;
}
while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
xfs_iunlock(ip, *iolock);
if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
error = break_layout(inode, true);
*iolock = XFS_IOLOCK_EXCL;
if (with_imutex)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xfs_ilock(ip, *iolock);
}
long tout = 0; /* milliseconds */
current->flags |= PF_MEMALLOC;
- set_freezable();
while (!kthread_should_stop()) {
if (tout && tout <= 20)
unsigned int keylen);
unsigned int reqsize;
+ bool has_setkey;
struct crypto_tfm base;
};
int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
unsigned int keylen);
+static inline bool crypto_ahash_has_setkey(struct crypto_ahash *tfm)
+{
+ return tfm->has_setkey;
+}
+
/**
* crypto_ahash_finup() - update and finalize message digest
* @req: reference to the ahash_request handle that holds all information
struct sock *parent;
+ unsigned int refcnt;
+ unsigned int nokey_refcnt;
+
const struct af_alg_type *type;
void *private;
};
void (*release)(void *private);
int (*setkey)(void *private, const u8 *key, unsigned int keylen);
int (*accept)(void *private, struct sock *sk);
+ int (*accept_nokey)(void *private, struct sock *sk);
int (*setauthsize)(void *private, unsigned int authsize);
struct proto_ops *ops;
+ struct proto_ops *ops_nokey;
struct module *owner;
char name[14];
};
int af_alg_unregister_type(const struct af_alg_type *type);
int af_alg_release(struct socket *sock);
+void af_alg_release_parent(struct sock *sk);
int af_alg_accept(struct sock *sk, struct socket *newsock);
int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
return (struct alg_sock *)sk;
}
-static inline void af_alg_release_parent(struct sock *sk)
-{
- sock_put(alg_sk(sk)->parent);
-}
-
static inline void af_alg_init_completion(struct af_alg_completion *completion)
{
init_completion(&completion->completion);
unsigned int ivsize;
unsigned int reqsize;
+ bool has_setkey;
+
struct crypto_tfm base;
};
return tfm->setkey(tfm, key, keylen);
}
+static inline bool crypto_skcipher_has_setkey(struct crypto_skcipher *tfm)
+{
+ return tfm->has_setkey;
+}
+
/**
* crypto_skcipher_reqtfm() - obtain cipher handle from request
* @req: skcipher_request out of which the cipher handle is to be obtained
#ifndef _AER_H_
#define _AER_H_
+#include <linux/errno.h>
#include <linux/types.h>
#define AER_NONFATAL 0
BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */
};
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
-{
- if (bio->bi_rw & REQ_INTEGRITY)
- return bio->bi_integrity;
-
- return NULL;
-}
-
/*
* bio integrity payload
*/
struct bio_vec bip_inline_vecs[0];/* embedded bvec array */
};
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
+{
+ if (bio->bi_rw & REQ_INTEGRITY)
+ return bio->bi_integrity;
+
+ return NULL;
+}
+
static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
return false;
}
+static inline void *bio_integrity_alloc(struct bio * bio, gfp_t gfp,
+ unsigned int nr)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ return 0;
+}
+
#endif /* CONFIG_BLK_DEV_INTEGRITY */
#endif /* CONFIG_BLOCK */
+++ /dev/null
-#ifndef BLK_IOPOLL_H
-#define BLK_IOPOLL_H
-
-struct blk_iopoll;
-typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
-
-struct blk_iopoll {
- struct list_head list;
- unsigned long state;
- unsigned long data;
- int weight;
- int max;
- blk_iopoll_fn *poll;
-};
-
-enum {
- IOPOLL_F_SCHED = 0,
- IOPOLL_F_DISABLE = 1,
-};
-
-/*
- * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
- * that we were the first to acquire this iop for scheduling. If this iop
- * is currently disabled, return "failure".
- */
-static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
-{
- if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
- return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
-
- return 1;
-}
-
-static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
-{
- return test_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-
-extern void blk_iopoll_sched(struct blk_iopoll *);
-extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
-extern void blk_iopoll_complete(struct blk_iopoll *);
-extern void __blk_iopoll_complete(struct blk_iopoll *);
-extern void blk_iopoll_enable(struct blk_iopoll *);
-extern void blk_iopoll_disable(struct blk_iopoll *);
-
-#endif
__REQ_PM, /* runtime pm request */
__REQ_HASHED, /* on IO scheduler merge hash */
__REQ_MQ_INFLIGHT, /* track inflight for MQ */
- __REQ_NO_TIMEOUT, /* requests may never expire */
__REQ_NR_BITS, /* stops here */
};
#define REQ_PM (1ULL << __REQ_PM)
#define REQ_HASHED (1ULL << __REQ_HASHED)
#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
-#define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT)
typedef unsigned int blk_qc_t;
#define BLK_QC_T_NONE -1U
unsigned int rq_timeout;
struct timer_list timeout;
+ struct work_struct timeout_work;
struct list_head timeout_list;
struct list_head icq_list;
return 24 - ceph_frag_bits(f);
}
-static inline int ceph_frag_contains_value(__u32 f, __u32 v)
+static inline bool ceph_frag_contains_value(__u32 f, __u32 v)
{
return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
}
-static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
-{
- /* is sub as specific as us, and contained by us? */
- return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
- (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
-}
-static inline __u32 ceph_frag_parent(__u32 f)
-{
- return ceph_frag_make(ceph_frag_bits(f) - 1,
- ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
-}
-static inline int ceph_frag_is_left_child(__u32 f)
-{
- return ceph_frag_bits(f) > 0 &&
- (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
-}
-static inline int ceph_frag_is_right_child(__u32 f)
-{
- return ceph_frag_bits(f) > 0 &&
- (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
-}
-static inline __u32 ceph_frag_sibling(__u32 f)
-{
- return ceph_frag_make(ceph_frag_bits(f),
- ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
-}
-static inline __u32 ceph_frag_left_child(__u32 f)
-{
- return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
-}
-static inline __u32 ceph_frag_right_child(__u32 f)
-{
- return ceph_frag_make(ceph_frag_bits(f)+1,
- ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
-}
static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
{
int newbits = ceph_frag_bits(f) + by;
struct ceph_entity_addr actual_peer_addr;
/* message out temps */
+ struct ceph_msg_header out_hdr;
struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */
bool out_msg_done;
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
- bool out_kvec_is_msg; /* kvec refers to out_msg */
int out_more; /* there is more data after the kvecs */
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
{
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
}
+
+static inline bool dax_mapping(struct address_space *mapping)
+{
+ return mapping->host && IS_DAX(mapping->host);
+}
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+ loff_t end);
#endif
*/
#ifndef DRBD_H
#define DRBD_H
-#include <linux/connector.h>
#include <asm/types.h>
#ifdef __KERNEL__
#endif
extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.5"
+#define REL_VERSION "8.4.6"
#define API_VERSION 1
#define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 101
#define MDF_AL_CLEAN (1 << 7)
#define MDF_AL_DISABLED (1 << 8)
+#define MAX_PEERS 32
+
enum drbd_uuid_index {
UI_CURRENT,
UI_BITMAP,
UI_EXTENDED_SIZE /* Everything. */
};
+#define HISTORY_UUIDS MAX_PEERS
+
enum drbd_timeout_flag {
UT_DEFAULT = 0,
UT_DEGRADED = 1,
UT_PEER_OUTDATED = 2,
};
+enum drbd_notification_type {
+ NOTIFY_EXISTS,
+ NOTIFY_CREATE,
+ NOTIFY_CHANGE,
+ NOTIFY_DESTROY,
+ NOTIFY_CALL,
+ NOTIFY_RESPONSE,
+
+ NOTIFY_CONTINUES = 0x8000,
+ NOTIFY_FLAGS = NOTIFY_CONTINUES,
+};
+
#define UUID_JUST_CREATED ((__u64)4)
+enum write_ordering_e {
+ WO_NONE,
+ WO_DRAIN_IO,
+ WO_BDEV_FLUSH,
+ WO_BIO_BARRIER
+};
+
/* magic numbers used in meta data and network packets */
#define DRBD_MAGIC 0x83740267
#define DRBD_MAGIC_BIG 0x835a
__flg_field(1, DRBD_GENLA_F_MANDATORY, force_detach)
)
+GENL_struct(DRBD_NLA_RESOURCE_INFO, 15, resource_info,
+ __u32_field(1, 0, res_role)
+ __flg_field(2, 0, res_susp)
+ __flg_field(3, 0, res_susp_nod)
+ __flg_field(4, 0, res_susp_fen)
+ /* __flg_field(5, 0, res_weak) */
+)
+
+GENL_struct(DRBD_NLA_DEVICE_INFO, 16, device_info,
+ __u32_field(1, 0, dev_disk_state)
+)
+
+GENL_struct(DRBD_NLA_CONNECTION_INFO, 17, connection_info,
+ __u32_field(1, 0, conn_connection_state)
+ __u32_field(2, 0, conn_role)
+)
+
+GENL_struct(DRBD_NLA_PEER_DEVICE_INFO, 18, peer_device_info,
+ __u32_field(1, 0, peer_repl_state)
+ __u32_field(2, 0, peer_disk_state)
+ __u32_field(3, 0, peer_resync_susp_user)
+ __u32_field(4, 0, peer_resync_susp_peer)
+ __u32_field(5, 0, peer_resync_susp_dependency)
+)
+
+GENL_struct(DRBD_NLA_RESOURCE_STATISTICS, 19, resource_statistics,
+ __u32_field(1, 0, res_stat_write_ordering)
+)
+
+GENL_struct(DRBD_NLA_DEVICE_STATISTICS, 20, device_statistics,
+ __u64_field(1, 0, dev_size) /* (sectors) */
+ __u64_field(2, 0, dev_read) /* (sectors) */
+ __u64_field(3, 0, dev_write) /* (sectors) */
+ __u64_field(4, 0, dev_al_writes) /* activity log writes (count) */
+ __u64_field(5, 0, dev_bm_writes) /* bitmap writes (count) */
+ __u32_field(6, 0, dev_upper_pending) /* application requests in progress */
+ __u32_field(7, 0, dev_lower_pending) /* backing device requests in progress */
+ __flg_field(8, 0, dev_upper_blocked)
+ __flg_field(9, 0, dev_lower_blocked)
+ __flg_field(10, 0, dev_al_suspended) /* activity log suspended */
+ __u64_field(11, 0, dev_exposed_data_uuid)
+ __u64_field(12, 0, dev_current_uuid)
+ __u32_field(13, 0, dev_disk_flags)
+ __bin_field(14, 0, history_uuids, HISTORY_UUIDS * sizeof(__u64))
+)
+
+GENL_struct(DRBD_NLA_CONNECTION_STATISTICS, 21, connection_statistics,
+ __flg_field(1, 0, conn_congested)
+)
+
+GENL_struct(DRBD_NLA_PEER_DEVICE_STATISTICS, 22, peer_device_statistics,
+ __u64_field(1, 0, peer_dev_received) /* sectors */
+ __u64_field(2, 0, peer_dev_sent) /* sectors */
+ __u32_field(3, 0, peer_dev_pending) /* number of requests */
+ __u32_field(4, 0, peer_dev_unacked) /* number of requests */
+ __u64_field(5, 0, peer_dev_out_of_sync) /* sectors */
+ __u64_field(6, 0, peer_dev_resync_failed) /* sectors */
+ __u64_field(7, 0, peer_dev_bitmap_uuid)
+ __u32_field(9, 0, peer_dev_flags)
+)
+
+GENL_struct(DRBD_NLA_NOTIFICATION_HEADER, 23, drbd_notification_header,
+ __u32_field(1, DRBD_GENLA_F_MANDATORY, nh_type)
+)
+
+GENL_struct(DRBD_NLA_HELPER, 24, drbd_helper_info,
+ __str_field(1, DRBD_GENLA_F_MANDATORY, helper_name, 32)
+ __u32_field(2, DRBD_GENLA_F_MANDATORY, helper_status)
+)
+
/*
* Notifications and commands (genlmsghdr->cmd)
*/
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
GENL_op(DRBD_ADM_DOWN, 27, GENL_doit(drbd_adm_down),
GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
+
+GENL_op(DRBD_ADM_GET_RESOURCES, 30,
+ GENL_op_init(
+ .dumpit = drbd_adm_dump_resources,
+ ),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_GET_DEVICES, 31,
+ GENL_op_init(
+ .dumpit = drbd_adm_dump_devices,
+ .done = drbd_adm_dump_devices_done,
+ ),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_GET_CONNECTIONS, 32,
+ GENL_op_init(
+ .dumpit = drbd_adm_dump_connections,
+ .done = drbd_adm_dump_connections_done,
+ ),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_GENLA_F_MANDATORY))
+
+GENL_op(DRBD_ADM_GET_PEER_DEVICES, 33,
+ GENL_op_init(
+ .dumpit = drbd_adm_dump_peer_devices,
+ .done = drbd_adm_dump_peer_devices_done,
+ ),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_GENLA_F_MANDATORY)
+ GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_GENLA_F_MANDATORY))
+
+GENL_notification(
+ DRBD_RESOURCE_STATE, 34, events,
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_RESOURCE_INFO, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_RESOURCE_STATISTICS, DRBD_F_REQUIRED))
+
+GENL_notification(
+ DRBD_DEVICE_STATE, 35, events,
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_DEVICE_INFO, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_DEVICE_STATISTICS, DRBD_F_REQUIRED))
+
+GENL_notification(
+ DRBD_CONNECTION_STATE, 36, events,
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_CONNECTION_INFO, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_CONNECTION_STATISTICS, DRBD_F_REQUIRED))
+
+GENL_notification(
+ DRBD_PEER_DEVICE_STATE, 37, events,
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_PEER_DEVICE_INFO, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_PEER_DEVICE_STATISTICS, DRBD_F_REQUIRED))
+
+GENL_op(
+ DRBD_ADM_GET_INITIAL_STATE, 38,
+ GENL_op_init(
+ .dumpit = drbd_adm_get_initial_state,
+ ),
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY))
+
+GENL_notification(
+ DRBD_HELPER, 40, events,
+ GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
+ GENL_tla_expected(DRBD_NLA_HELPER, DRBD_F_REQUIRED))
+
+GENL_notification(
+ DRBD_INITIAL_STATE_DONE, 41, events,
+ GENL_tla_expected(DRBD_NLA_NOTIFICATION_HEADER, DRBD_F_REQUIRED))
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */
- unsigned long nrshadows; /* number of shadow entries */
+ /* number of shadow or DAX exceptional entries */
+ unsigned long nrexceptional;
pgoff_t writeback_index;/* writeback starts here */
const struct address_space_operations *a_ops; /* methods */
unsigned long flags; /* error bits/gfp mask */
I_MUTEX_PARENT2,
};
+static inline void inode_lock(struct inode *inode)
+{
+ mutex_lock(&inode->i_mutex);
+}
+
+static inline void inode_unlock(struct inode *inode)
+{
+ mutex_unlock(&inode->i_mutex);
+}
+
+static inline int inode_trylock(struct inode *inode)
+{
+ return mutex_trylock(&inode->i_mutex);
+}
+
+static inline int inode_is_locked(struct inode *inode)
+{
+ return mutex_is_locked(&inode->i_mutex);
+}
+
+static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
+{
+ mutex_lock_nested(&inode->i_mutex, subclass);
+}
+
void lock_two_nondirectories(struct inode *, struct inode*);
void unlock_two_nondirectories(struct inode *, struct inode*);
}
static inline bool dir_relax(struct inode *inode)
{
- mutex_unlock(&inode->i_mutex);
- mutex_lock(&inode->i_mutex);
+ inode_unlock(inode);
+ inode_lock(inode);
return !IS_DEADDIR(inode);
}
unsigned long start,
unsigned long end,
long adjust_next);
-extern bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
- spinlock_t **ptl);
+extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
+ struct vm_area_struct *vma);
/* mmap_sem must be held on entry */
-static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
- spinlock_t **ptl)
+static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
+ struct vm_area_struct *vma)
{
VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
- return __pmd_trans_huge_lock(pmd, vma, ptl);
+ return __pmd_trans_huge_lock(pmd, vma);
else
return false;
}
long adjust_next)
{
}
-static inline bool pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
- spinlock_t **ptl)
+static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
+ struct vm_area_struct *vma)
{
- return false;
+ return NULL;
}
static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
#define idr_for_each_entry(idp, entry, id) \
for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
+/**
+ * idr_for_each_entry - continue iteration over an idr's elements of a given type
+ * @idp: idr handle
+ * @entry: the type * to use as cursor
+ * @id: id entry's key
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define idr_for_each_entry_continue(idp, entry, id) \
+ for ((entry) = idr_get_next((idp), &(id)); \
+ entry; \
+ ++id, (entry) = idr_get_next((idp), &(id)))
+
/*
* IDA - IDR based id allocator, use when translation from id to
* pointer isn't necessary.
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
- BLOCK_IOPOLL_SOFTIRQ,
+ IRQ_POLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
--- /dev/null
+#ifndef IRQ_POLL_H
+#define IRQ_POLL_H
+
+struct irq_poll;
+typedef int (irq_poll_fn)(struct irq_poll *, int);
+
+struct irq_poll {
+ struct list_head list;
+ unsigned long state;
+ int weight;
+ irq_poll_fn *poll;
+};
+
+enum {
+ IRQ_POLL_F_SCHED = 0,
+ IRQ_POLL_F_DISABLE = 1,
+};
+
+extern void irq_poll_sched(struct irq_poll *);
+extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *);
+extern void irq_poll_complete(struct irq_poll *);
+extern void irq_poll_enable(struct irq_poll *);
+extern void irq_poll_disable(struct irq_poll *);
+
+#endif
#ifndef NVM_H
#define NVM_H
+#include <linux/types.h>
+
enum {
NVM_IO_OK = 0,
NVM_IO_REQUEUE = 1,
NVM_IOTYPE_GC = 1,
};
+#define NVM_BLK_BITS (16)
+#define NVM_PG_BITS (16)
+#define NVM_SEC_BITS (8)
+#define NVM_PL_BITS (8)
+#define NVM_LUN_BITS (8)
+#define NVM_CH_BITS (8)
+
+struct ppa_addr {
+ /* Generic structure for all addresses */
+ union {
+ struct {
+ u64 blk : NVM_BLK_BITS;
+ u64 pg : NVM_PG_BITS;
+ u64 sec : NVM_SEC_BITS;
+ u64 pl : NVM_PL_BITS;
+ u64 lun : NVM_LUN_BITS;
+ u64 ch : NVM_CH_BITS;
+ } g;
+
+ u64 ppa;
+ };
+};
+
+struct nvm_rq;
+struct nvm_id;
+struct nvm_dev;
+
+typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
+typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
+typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
+typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
+ nvm_l2p_update_fn *, void *);
+typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
+ nvm_bb_update_fn *, void *);
+typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int);
+typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
+typedef void (nvm_destroy_dma_pool_fn)(void *);
+typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
+ dma_addr_t *);
+typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
+
+struct nvm_dev_ops {
+ nvm_id_fn *identity;
+ nvm_get_l2p_tbl_fn *get_l2p_tbl;
+ nvm_op_bb_tbl_fn *get_bb_tbl;
+ nvm_op_set_bb_fn *set_bb_tbl;
+
+ nvm_submit_io_fn *submit_io;
+ nvm_erase_blk_fn *erase_block;
+
+ nvm_create_dma_pool_fn *create_dma_pool;
+ nvm_destroy_dma_pool_fn *destroy_dma_pool;
+ nvm_dev_dma_alloc_fn *dev_dma_alloc;
+ nvm_dev_dma_free_fn *dev_dma_free;
+
+ unsigned int max_phys_sect;
+};
+
+
+
#ifdef CONFIG_NVM
#include <linux/blkdev.h>
-#include <linux/types.h>
#include <linux/file.h>
#include <linux/dmapool.h>
+#include <uapi/linux/lightnvm.h>
enum {
/* HW Responsibilities */
/* Block Types */
NVM_BLK_T_FREE = 0x0,
NVM_BLK_T_BAD = 0x1,
- NVM_BLK_T_DEV = 0x2,
- NVM_BLK_T_HOST = 0x4,
+ NVM_BLK_T_GRWN_BAD = 0x2,
+ NVM_BLK_T_DEV = 0x4,
+ NVM_BLK_T_HOST = 0x8,
+
+ /* Memory capabilities */
+ NVM_ID_CAP_SLC = 0x1,
+ NVM_ID_CAP_CMD_SUSPEND = 0x2,
+ NVM_ID_CAP_SCRAMBLE = 0x4,
+ NVM_ID_CAP_ENCRYPT = 0x8,
+
+ /* Memory types */
+ NVM_ID_FMTYPE_SLC = 0,
+ NVM_ID_FMTYPE_MLC = 1,
+};
+
+struct nvm_id_lp_mlc {
+ u16 num_pairs;
+ u8 pairs[886];
+};
+
+struct nvm_id_lp_tbl {
+ __u8 id[8];
+ struct nvm_id_lp_mlc mlc;
};
struct nvm_id_group {
u32 mpos;
u32 mccap;
u16 cpar;
+
+ struct nvm_id_lp_tbl lptbl;
};
struct nvm_addr_format {
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCH 0
-#define NVM_BLK_BITS (16)
-#define NVM_PG_BITS (16)
-#define NVM_SEC_BITS (8)
-#define NVM_PL_BITS (8)
-#define NVM_LUN_BITS (8)
-#define NVM_CH_BITS (8)
-
-struct ppa_addr {
- /* Generic structure for all addresses */
- union {
- struct {
- u64 blk : NVM_BLK_BITS;
- u64 pg : NVM_PG_BITS;
- u64 sec : NVM_SEC_BITS;
- u64 pl : NVM_PL_BITS;
- u64 lun : NVM_LUN_BITS;
- u64 ch : NVM_CH_BITS;
- } g;
-
- u64 ppa;
- };
-};
+struct nvm_rq;
+typedef void (nvm_end_io_fn)(struct nvm_rq *);
struct nvm_rq {
struct nvm_tgt_instance *ins;
void *metadata;
dma_addr_t dma_metadata;
+ struct completion *wait;
+ nvm_end_io_fn *end_io;
+
uint8_t opcode;
uint16_t nr_pages;
uint16_t flags;
+
+ int error;
};
static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu)
struct nvm_block;
-typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
-typedef int (nvm_bb_update_fn)(struct ppa_addr, int, u8 *, void *);
-typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
-typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
- nvm_l2p_update_fn *, void *);
-typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, int,
- nvm_bb_update_fn *, void *);
-typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct nvm_rq *, int);
-typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
-typedef void (nvm_destroy_dma_pool_fn)(void *);
-typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
- dma_addr_t *);
-typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
-
-struct nvm_dev_ops {
- nvm_id_fn *identity;
- nvm_get_l2p_tbl_fn *get_l2p_tbl;
- nvm_op_bb_tbl_fn *get_bb_tbl;
- nvm_op_set_bb_fn *set_bb_tbl;
-
- nvm_submit_io_fn *submit_io;
- nvm_erase_blk_fn *erase_block;
-
- nvm_create_dma_pool_fn *create_dma_pool;
- nvm_destroy_dma_pool_fn *destroy_dma_pool;
- nvm_dev_dma_alloc_fn *dev_dma_alloc;
- nvm_dev_dma_free_fn *dev_dma_free;
-
- unsigned int max_phys_sect;
-};
-
struct nvm_lun {
int id;
int lun_id;
int chnl_id;
- unsigned int nr_inuse_blocks; /* Number of used blocks */
+ /* It is up to the target to mark blocks as closed. If the target does
+ * not do it, all blocks are marked as open, and nr_open_blocks
+ * represents the number of blocks in use
+ */
+ unsigned int nr_open_blocks; /* Number of used, writable blocks */
+ unsigned int nr_closed_blocks; /* Number of used, read-only blocks */
unsigned int nr_free_blocks; /* Number of unused blocks */
unsigned int nr_bad_blocks; /* Number of bad blocks */
- struct nvm_block *blocks;
spinlock_t lock;
+
+ struct nvm_block *blocks;
+};
+
+enum {
+ NVM_BLK_ST_FREE = 0x1, /* Free block */
+ NVM_BLK_ST_OPEN = 0x2, /* Open block - read-write */
+ NVM_BLK_ST_CLOSED = 0x4, /* Closed block - read-only */
+ NVM_BLK_ST_BAD = 0x8, /* Bad block */
};
struct nvm_block {
unsigned long id;
void *priv;
- int type;
+ int state;
+};
+
+/* system block cpu representation */
+struct nvm_sb_info {
+ unsigned long seqnr;
+ unsigned long erase_cnt;
+ unsigned int version;
+ char mmtype[NVM_MMTYPE_LEN];
+ struct ppa_addr fs_ppa;
};
struct nvm_dev {
struct nvmm_type *mt;
void *mp;
+ /* System blocks */
+ struct nvm_sb_info sb;
+
/* Device information */
int nr_chnls;
int nr_planes;
int blks_per_lun;
int sec_size;
int oob_size;
+ int mccap;
struct nvm_addr_format ppaf;
/* Calculated/Cached values. These do not reflect the actual usable
int sec_per_blk;
int sec_per_lun;
+ /* lower page table */
+ int lps_per_blk;
+ int *lptbl;
+
unsigned long total_pages;
unsigned long total_blocks;
int nr_luns;
/* Backend device */
struct request_queue *q;
char name[DISK_NAME_LEN];
+
+ struct mutex mlock;
};
static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
return ppa;
}
+static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
+{
+ return dev->lptbl[slc_pg];
+}
+
typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int);
typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
typedef void (nvm_tgt_exit_fn)(void *);
/* target entry points */
nvm_tgt_make_rq_fn *make_rq;
nvm_tgt_capacity_fn *capacity;
- nvm_tgt_end_io_fn *end_io;
+ nvm_end_io_fn *end_io;
/* module-specific init/teardown */
nvm_tgt_init_fn *init;
typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_end_io_fn)(struct nvm_rq *, int);
typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
unsigned long);
typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
nvmm_unregister_fn *unregister_mgr;
/* Block administration callbacks */
+ nvmm_get_blk_fn *get_blk_unlocked;
+ nvmm_put_blk_fn *put_blk_unlocked;
nvmm_get_blk_fn *get_blk;
nvmm_put_blk_fn *put_blk;
nvmm_open_blk_fn *open_blk;
nvmm_flush_blk_fn *flush_blk;
nvmm_submit_io_fn *submit_io;
- nvmm_end_io_fn *end_io;
nvmm_erase_blk_fn *erase_blk;
/* Configuration management */
extern int nvm_register_mgr(struct nvmm_type *);
extern void nvm_unregister_mgr(struct nvmm_type *);
+extern struct nvm_block *nvm_get_blk_unlocked(struct nvm_dev *,
+ struct nvm_lun *, unsigned long);
+extern void nvm_put_blk_unlocked(struct nvm_dev *, struct nvm_block *);
+
extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
unsigned long);
extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
extern void nvm_unregister(char *);
extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
+extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
+extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
+ struct ppa_addr *, int);
+extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int);
extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
+extern void nvm_end_io(struct nvm_rq *, int);
+extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
+ void *, int);
+
+/* sysblk.c */
+#define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
+
+/* system block on disk representation */
+struct nvm_system_block {
+ __be32 magic; /* magic signature */
+ __be32 seqnr; /* sequence number */
+ __be32 erase_cnt; /* erase count */
+ __be16 version; /* version number */
+ u8 mmtype[NVM_MMTYPE_LEN]; /* media manager name */
+ __be64 fs_ppa; /* PPA for media manager
+ * superblock */
+};
+
+extern int nvm_get_sysblock(struct nvm_dev *, struct nvm_sb_info *);
+extern int nvm_update_sysblock(struct nvm_dev *, struct nvm_sb_info *);
+extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
+
+extern int nvm_dev_factory(struct nvm_dev *, int flags);
#else /* CONFIG_NVM */
struct nvm_dev_ops;
extern void lc_committed(struct lru_cache *lc);
struct seq_file;
-extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
+extern void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
void (*detail) (struct seq_file *, struct lc_element *));
MLX4_SET_PORT_GID_TABLE = 0x5,
MLX4_SET_PORT_PRIO2TC = 0x8,
MLX4_SET_PORT_SCHEDULER = 0x9,
- MLX4_SET_PORT_VXLAN = 0xB
+ MLX4_SET_PORT_VXLAN = 0xB,
+ MLX4_SET_PORT_ROCE_ADDR = 0xD
};
enum {
MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30,
MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
+ MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
};
enum {
MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9,
MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10,
MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11,
+ MLX4_BMME_FLAG_ROCE_V1_V2 = 1 << 19,
MLX4_BMME_FLAG_PORT_REMAP = 1 << 24,
MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28,
};
enum {
- MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP
+ MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP,
+ MLX4_FLAG_ROCE_V1_V2 = MLX4_BMME_FLAG_ROCE_V1_V2
};
enum mlx4_event {
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if ((type) == (dev)->caps.port_mask[(port)])
-#define mlx4_foreach_non_ib_transport_port(port, dev) \
- for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
- if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
-
#define mlx4_foreach_ib_transport_port(port, dev) \
- for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
+ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
- ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+ ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \
+ ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2))
#define MLX4_INVALID_SLAVE_ID 0xFF
#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1)
int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port);
int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
u8 mtu_msgmax;
u8 rq_size_stride;
u8 sq_size_stride;
- u8 rlkey;
+ u8 rlkey_roce_mode;
__be32 usr_page;
__be32 local_qpn;
__be32 remote_qpn;
u32 reserved1;
__be32 next_send_psn;
__be32 cqn_send;
- u32 reserved2[2];
+ __be16 roce_entropy;
+ __be16 reserved2[3];
__be32 last_acked_psn;
__be32 ssn;
__be32 params2;
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
+static inline u16 folded_qp(u32 q)
+{
+ u16 res;
+
+ res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00);
+ return res;
+}
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn);
+
#endif /* MLX4_QP_H */
#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
+#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
+
+enum {
+ MLX5_EVENT_QUEUE_TYPE_QP = 0,
+ MLX5_EVENT_QUEUE_TYPE_RQ = 1,
+ MLX5_EVENT_QUEUE_TYPE_SQ = 2,
+};
+
enum mlx5_event {
MLX5_EVENT_TYPE_COMP = 0x0,
MLX5_DEV_CAP_FLAG_CMDIF_CSUM = 3LL << 46,
};
+enum {
+ MLX5_ROCE_VERSION_1 = 0,
+ MLX5_ROCE_VERSION_2 = 2,
+};
+
+enum {
+ MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1,
+ MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2,
+};
+
+enum {
+ MLX5_ROCE_L3_TYPE_IPV4 = 0,
+ MLX5_ROCE_L3_TYPE_IPV6 = 1,
+};
+
+enum {
+ MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1,
+ MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2,
+};
+
enum {
MLX5_OPCODE_NOP = 0x00,
MLX5_OPCODE_SEND_INVAL = 0x01,
__be32 rsvd2[880];
__be32 internal_timer_h;
__be32 internal_timer_l;
- __be32 rsrv3[2];
+ __be32 rsvd3[2];
__be32 health_counter;
__be32 rsvd4[1019];
__be64 ieee1588_clk;
};
struct mlx5_eqe_qp_srq {
- __be32 reserved[6];
+ __be32 reserved1[5];
+ u8 type;
+ u8 reserved2[3];
__be32 qp_srq_n;
};
CQE_RSS_HTYPE_L4 = 0x3 << 2,
};
+enum {
+ MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0,
+ MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1,
+ MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2,
+};
+
enum {
CQE_L2_OK = 1 << 0,
CQE_L3_OK = 1 << 1,
MLX5_REG_HOST_ENDIANNESS = 0x7004,
};
+enum {
+ MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0,
+ MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1,
+};
+
enum mlx5_page_fault_resume_flags {
MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1,
};
enum mlx5_res_type {
- MLX5_RES_QP,
- MLX5_RES_SRQ,
- MLX5_RES_XSRQ,
+ MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
+ MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
+ MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ,
+ MLX5_RES_SRQ = 3,
+ MLX5_RES_XSRQ = 4,
};
struct mlx5_core_rsc_common {
.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
.struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field
-struct ib_field {
- size_t struct_offset_bytes;
- size_t struct_size_bytes;
- int offset_bits;
- int size_bits;
-};
-
static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev)
{
return pci_get_drvdata(pdev);
MLX5_MODIFY_TIR_BITMASK_TUNNELED_OFFLOAD_EN = 0x3
};
+enum {
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0,
+ MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
+};
+
enum {
MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
MLX5_CMD_OP_QUERY_ADAPTER = 0x101,
struct mlx5_ifc_atomic_caps_bits {
u8 reserved_0[0x40];
- u8 atomic_req_endianness[0x1];
- u8 reserved_1[0x1f];
+ u8 atomic_req_8B_endianess_mode[0x2];
+ u8 reserved_1[0x4];
+ u8 supported_atomic_req_8B_endianess_mode_1[0x1];
- u8 reserved_2[0x20];
+ u8 reserved_2[0x19];
- u8 reserved_3[0x10];
- u8 atomic_operations[0x10];
+ u8 reserved_3[0x20];
u8 reserved_4[0x10];
- u8 atomic_size_qp[0x10];
+ u8 atomic_operations[0x10];
u8 reserved_5[0x10];
+ u8 atomic_size_qp[0x10];
+
+ u8 reserved_6[0x10];
u8 atomic_size_dc[0x10];
- u8 reserved_6[0x720];
+ u8 reserved_7[0x720];
};
struct mlx5_ifc_odp_cap_bits {
u8 reserved_66[0x8];
u8 log_uar_page_sz[0x10];
- u8 reserved_67[0x40];
+ u8 reserved_67[0x20];
+ u8 device_frequency_mhz[0x20];
u8 device_frequency_khz[0x20];
u8 reserved_68[0x5f];
u8 cqe_zip[0x1];
u8 mtu[0x10];
- u8 reserved_3[0x640];
+ u8 system_image_guid[0x40];
+ u8 port_guid[0x40];
+ u8 node_guid[0x40];
+
+ u8 reserved_3[0x140];
+ u8 qkey_violation_counter[0x10];
+ u8 reserved_4[0x430];
u8 promisc_uc[0x1];
u8 promisc_mc[0x1];
u8 promisc_all[0x1];
- u8 reserved_4[0x2];
+ u8 reserved_5[0x2];
u8 allowed_list_type[0x3];
- u8 reserved_5[0xc];
+ u8 reserved_6[0xc];
u8 allowed_list_size[0xc];
struct mlx5_ifc_mac_address_layout_bits permanent_address;
- u8 reserved_6[0x20];
+ u8 reserved_7[0x20];
u8 current_uc_mac_address[0][0x40];
};
u8 reserved_1[0x40];
};
+struct mlx5_ifc_modify_tis_bitmask_bits {
+ u8 reserved_0[0x20];
+
+ u8 reserved_1[0x1f];
+ u8 prio[0x1];
+};
+
struct mlx5_ifc_modify_tis_in_bits {
u8 opcode[0x10];
u8 reserved_0[0x10];
u8 reserved_3[0x20];
- u8 modify_bitmask[0x40];
+ struct mlx5_ifc_modify_tis_bitmask_bits bitmask;
u8 reserved_4[0x40];
MLX5_QP_STATE_ERR = 6,
MLX5_QP_STATE_SQ_DRAINING = 7,
MLX5_QP_STATE_SUSPENDED = 9,
- MLX5_QP_NUM_STATE
+ MLX5_QP_NUM_STATE,
+ MLX5_QP_STATE,
+ MLX5_QP_STATE_BAD,
+};
+
+enum {
+ MLX5_SQ_STATE_NA = MLX5_SQC_STATE_ERR + 1,
+ MLX5_SQ_NUM_STATE = MLX5_SQ_STATE_NA + 1,
+ MLX5_RQ_STATE_NA = MLX5_RQC_STATE_ERR + 1,
+ MLX5_RQ_NUM_STATE = MLX5_RQ_STATE_NA + 1,
};
enum {
MLX5_QP_BIT_RWE = 1 << 14,
MLX5_QP_BIT_RAE = 1 << 13,
MLX5_QP_BIT_RIC = 1 << 4,
+ MLX5_QP_BIT_CC_SLAVE_RECV = 1 << 2,
+ MLX5_QP_BIT_CC_SLAVE_SEND = 1 << 1,
+ MLX5_QP_BIT_CC_MASTER = 1 << 0
};
enum {
__be32 dqp_dct;
u8 stat_rate_sl;
u8 fl_mlid;
- __be16 rlid;
- u8 reserved0[10];
+ union {
+ __be16 rlid;
+ __be16 udp_sport;
+ };
+ u8 reserved0[4];
+ u8 rmac[6];
u8 tclass;
u8 hop_limit;
__be32 grh_gid_fl;
u8 static_rate;
u8 hop_limit;
__be32 tclass_flowlabel;
- u8 rgid[16];
- u8 rsvd1[4];
- u8 sl;
+ union {
+ u8 rgid[16];
+ u8 rip[16];
+ };
+ u8 f_dscp_ecn_prio;
+ u8 ecn_dscp;
+ __be16 udp_sport;
+ u8 dci_cfi_prio_sl;
u8 port;
- u8 rsvd2[6];
+ u8 rmac[6];
};
struct mlx5_qp_context {
struct mlx5_core_qp *qp,
struct mlx5_create_qp_mbox_in *in,
int inlen);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
- enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
struct mlx5_modify_qp_mbox_in *in, int sqd_event,
struct mlx5_core_qp *qp);
int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
u8 context, int error);
#endif
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq);
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *rq);
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq);
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *sq);
static inline const char *mlx5_qp_type_str(int type)
{
--- /dev/null
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __TRANSOBJ_H__
+#define __TRANSOBJ_H__
+
+#include <linux/mlx5/driver.h>
+
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
+int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rqn);
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
+void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out);
+int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *sqn);
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
+void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *tirn);
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
+ int inlen);
+void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *tisn);
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+ int inlen);
+void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
+int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rmpn);
+int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen);
+int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn);
+int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
+int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
+int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rmpn);
+int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 rmpn);
+int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u32 *out);
+int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm);
+
+int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ u32 *rqtn);
+int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
+ int inlen);
+void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn);
+
+#endif /* __TRANSOBJ_H__ */
u16 vport, u8 *addr);
int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
u16 vport, u8 *addr);
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+ u64 *system_image_guid);
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+ u16 *qkey_viol_cntr);
int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
u8 port_num, u16 vf_num, u16 gid_index,
union ib_gid *gid);
u16 vlans[],
int list_size);
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
+
#endif /* __MLX5_VPORT_H__ */
#include <linux/types.h>
-struct nvme_bar {
- __u64 cap; /* Controller Capabilities */
- __u32 vs; /* Version */
- __u32 intms; /* Interrupt Mask Set */
- __u32 intmc; /* Interrupt Mask Clear */
- __u32 cc; /* Controller Configuration */
- __u32 rsvd1; /* Reserved */
- __u32 csts; /* Controller Status */
- __u32 nssr; /* Subsystem Reset */
- __u32 aqa; /* Admin Queue Attributes */
- __u64 asq; /* Admin SQ Base Address */
- __u64 acq; /* Admin CQ Base Address */
- __u32 cmbloc; /* Controller Memory Buffer Location */
- __u32 cmbsz; /* Controller Memory Buffer Size */
+enum {
+ NVME_REG_CAP = 0x0000, /* Controller Capabilities */
+ NVME_REG_VS = 0x0008, /* Version */
+ NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */
+ NVME_REG_INTMC = 0x0010, /* Interrupt Mask Set */
+ NVME_REG_CC = 0x0014, /* Controller Configuration */
+ NVME_REG_CSTS = 0x001c, /* Controller Status */
+ NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */
+ NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */
+ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */
+ NVME_REG_ACQ = 0x0030, /* Admin SQ Base Address */
+ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
+ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
};
#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
unsigned int nr_pages, struct page **pages);
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
int tag, unsigned int nr_pages, struct page **pages);
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+ int tag, unsigned int nr_entries,
+ struct page **entries, pgoff_t *indices);
struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags);
* @fasync_readers: reader side fasync
* @fasync_writers: writer side fasync
* @bufs: the circular array of pipe buffers
+ * @user: the user who created this pipe
**/
struct pipe_inode_info {
struct mutex mutex;
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
struct pipe_buffer *bufs;
+ struct user_struct *user;
};
/*
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
extern unsigned int pipe_max_size, pipe_min_size;
+extern unsigned long pipe_user_pages_hard;
+extern unsigned long pipe_user_pages_soft;
int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
{
BUG();
}
+
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+{
+ BUG();
+}
#endif
/*
* Architectures that define ARCH_HAS_PMEM_API must provide
* implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
+ * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
+ * and arch_has_wmb_pmem().
*/
static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
{
else
default_clear_pmem(addr, size);
}
+
+/**
+ * wb_cache_pmem - write back processor cache for PMEM memory range
+ * @addr: virtual start address
+ * @size: number of bytes to write back
+ *
+ * Write back the processor cache range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+{
+ if (arch_has_pmem_api())
+ arch_wb_cache_pmem(addr, size);
+}
#endif /* __PMEM_H__ */
#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
+#define RADIX_DAX_MASK 0xf
+#define RADIX_DAX_SHIFT 4
+#define RADIX_DAX_PTE (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_PMD (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK)
+#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
+#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
+ RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE)))
+
static inline int radix_tree_is_indirect_ptr(void *ptr)
{
return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
#endif
unsigned long locked_shm; /* How many pages of mlocked shm ? */
unsigned long unix_inflight; /* How many files in flight in unix sockets */
+ atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
#ifdef CONFIG_KEYS
struct key *uid_keyring; /* UID specific keyring */
unsigned int seals; /* shmem seals */
unsigned long flags;
unsigned long alloced; /* data pages alloced to file */
- union {
- unsigned long swapped; /* subtotal assigned to swap */
- char *symlink; /* unswappable short symlink */
- };
+ unsigned long swapped; /* subtotal assigned to swap */
struct shared_policy policy; /* NUMA memory alloc policy */
struct list_head swaplist; /* chain of maybes on swap */
struct simple_xattrs xattrs; /* list of xattrs */
/* RPC/RDMA parameters and stats */
extern unsigned int svcrdma_ord;
extern unsigned int svcrdma_max_requests;
+extern unsigned int svcrdma_max_bc_requests;
extern unsigned int svcrdma_max_req_size;
extern atomic_t rdma_stat_recv;
* completes.
*/
struct svc_rdma_op_ctxt {
+ struct list_head free;
struct svc_rdma_op_ctxt *read_hdr;
struct svc_rdma_fastreg_mr *frmr;
int hdr_count;
struct list_head frmr_list;
};
struct svc_rdma_req_map {
+ struct list_head free;
unsigned long count;
union {
struct kvec sge[RPCSVC_MAXPAGES];
int sc_max_sge;
int sc_max_sge_rd; /* max sge for read target */
- int sc_sq_depth; /* Depth of SQ */
atomic_t sc_sq_count; /* Number of SQ WR on queue */
-
- int sc_max_requests; /* Depth of RQ */
+ unsigned int sc_sq_depth; /* Depth of SQ */
+ unsigned int sc_rq_depth; /* Depth of RQ */
+ u32 sc_max_requests; /* Forward credits */
+ u32 sc_max_bc_requests;/* Backward credits */
int sc_max_req_size; /* Size of each RQ WR buf */
struct ib_pd *sc_pd;
atomic_t sc_dma_used;
- atomic_t sc_ctxt_used;
+ spinlock_t sc_ctxt_lock;
+ struct list_head sc_ctxts;
+ int sc_ctxt_used;
+ spinlock_t sc_map_lock;
+ struct list_head sc_maps;
+
struct list_head sc_rq_dto_q;
spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp;
struct ib_cq *sc_rq_cq;
struct ib_cq *sc_sq_cq;
- struct ib_mr *sc_phys_mr; /* MR for server memory */
int (*sc_reader)(struct svcxprt_rdma *,
struct svc_rqst *,
struct svc_rdma_op_ctxt *,
int *, u32 *, u32, u32, u64, bool);
u32 sc_dev_caps; /* distilled device caps */
- u32 sc_dma_lkey; /* local dma key */
unsigned int sc_frmr_pg_list_len;
struct list_head sc_frmr_q;
spinlock_t sc_frmr_q_lock;
#define RPCRDMA_MAX_REQUESTS 32
#define RPCRDMA_MAX_REQ_SIZE 4096
+/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
+ * current NFSv4.1 implementation supports one backchannel slot.
+ */
+#define RPCRDMA_MAX_BC_REQUESTS 2
+
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
+/* svc_rdma_backchannel.c */
+extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
+ struct rpcrdma_msg *rmsgp,
+ struct xdr_buf *rcvbuf);
+
/* svc_rdma_marshal.c */
extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
u32, u32, u64, bool);
/* svc_rdma_sendto.c */
+extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
+ struct svc_rdma_req_map *);
extern int svc_rdma_sendto(struct svc_rqst *);
extern struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *);
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
enum rpcrdma_errcode);
-extern int svc_rdma_post_recv(struct svcxprt_rdma *);
+extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
-extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *);
+extern void svc_rdma_put_req_map(struct svcxprt_rdma *,
+ struct svc_rdma_req_map *);
extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
struct svc_rdma_fastreg_mr *);
#endif
/* svc_rdma.c */
+extern struct workqueue_struct *svc_rdma_wq;
extern int svc_rdma_init(void);
extern void svc_rdma_cleanup(void);
/* Default weight of a bound cooling device */
#define THERMAL_WEIGHT_DEFAULT 0
+/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
+#define THERMAL_TEMP_INVALID -274000
+
/* Unit conversion macros */
#define DECI_KELVIN_TO_CELSIUS(t) ({ \
long _t = (t); \
* @forced_passive: If > 0, temperature at which to switch on all ACPI
* processor cooling devices. Currently only used by the
* step-wise governor.
+ * @need_update: if equals 1, thermal_zone_device_update needs to be invoked.
* @ops: operations this &thermal_zone_device supports
* @tzp: thermal zone parameters
* @governor: pointer to the governor for this thermal zone
int emul_temperature;
int passive;
unsigned int forced_passive;
+ atomic_t need_update;
struct thermal_zone_device_ops *ops;
struct thermal_zone_params *tzp;
struct thermal_governor *governor;
int bound_dev_if;
enum rdma_transport_type transport;
struct net *net;
+ enum rdma_network_type network;
+ int hoplimit;
};
/**
*
* The dev_addr->net field must be initialized.
*/
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
- u16 *vlan_id);
+int rdma_translate_ip(const struct sockaddr *addr,
+ struct rdma_dev_addr *dev_addr, u16 *vlan_id);
/**
* rdma_resolve_ip - Resolve source and destination IP addresses to
struct rdma_dev_addr *addr, void *context),
void *context);
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr);
+
void rdma_addr_cancel(struct rdma_dev_addr *addr);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
int rdma_addr_size(struct sockaddr *addr);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
- u8 *smac, u16 *vlan_id, int if_index);
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *smac, u16 *vlan_id, int *if_index,
+ int *hoplimit);
static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
{
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the cached GID table where the GID was found. This
*/
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
struct net_device *ndev,
u8 *port_num,
u16 *index);
* GID value occurs
* @device: The device to query.
* @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
* @port_num: The port number of the device where the GID value sould be
* searched.
* @ndev: In RoCE, the net device of the device. Null means ignore.
*/
int ib_find_cached_gid_by_port(struct ib_device *device,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
u8 port_num,
struct net_device *ndev,
u16 *index);
/**
* ib_mad_recv_handler - callback handler for a received MAD.
* @mad_agent: MAD agent requesting the received MAD.
+ * @send_buf: Send buffer if found, else NULL
* @mad_recv_wc: Received work completion information on the received MAD.
*
* MADs received in response to a send request operation will be handed to
* modify the data referenced by @mad_recv_wc.
*/
typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc);
/**
IB_ETH_BYTES = 14,
IB_VLAN_BYTES = 4,
IB_GRH_BYTES = 40,
+ IB_IP4_BYTES = 20,
+ IB_UDP_BYTES = 8,
IB_BTH_BYTES = 12,
IB_DETH_BYTES = 8
};
__be16 type;
};
+struct ib_unpacked_ip4 {
+ u8 ver;
+ u8 hdr_len;
+ u8 tos;
+ __be16 tot_len;
+ __be16 id;
+ __be16 frag_off;
+ u8 ttl;
+ u8 protocol;
+ __sum16 check;
+ __be32 saddr;
+ __be32 daddr;
+};
+
+struct ib_unpacked_udp {
+ __be16 sport;
+ __be16 dport;
+ __be16 length;
+ __be16 csum;
+};
+
struct ib_unpacked_vlan {
__be16 tag;
__be16 type;
struct ib_unpacked_vlan vlan;
int grh_present;
struct ib_unpacked_grh grh;
+ int ipv4_present;
+ struct ib_unpacked_ip4 ip4;
+ int udp_present;
+ struct ib_unpacked_udp udp;
struct ib_unpacked_bth bth;
struct ib_unpacked_deth deth;
int immediate_present;
void *buf,
void *structure);
-void ib_ud_header_init(int payload_bytes,
- int lrh_present,
- int eth_present,
- int vlan_present,
- int grh_present,
- int immediate_present,
- struct ib_ud_header *header);
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header);
+
+int ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
+ int grh_present,
+ int ip_version,
+ int udp_present,
+ int immediate_present,
+ struct ib_ud_header *header);
int ib_ud_header_pack(struct ib_ud_header *header,
void *buf);
*/
#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8)
#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10)
#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12)
#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
int ifindex;
/* ignored in IB */
struct net *net;
+ enum ib_gid_type gid_type;
};
static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
*/
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
+ struct net_device *ndev,
+ enum ib_gid_type gid_type,
struct ib_ah_attr *ah_attr);
/**
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <linux/socket.h>
+#include <linux/irq_poll.h>
#include <uapi/linux/if_ether.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <linux/string.h>
+#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
extern struct workqueue_struct *ib_wq;
+extern struct workqueue_struct *ib_comp_wq;
union ib_gid {
u8 raw[16];
extern union ib_gid zgid;
+enum ib_gid_type {
+ /* If link layer is Ethernet, this is RoCE V1 */
+ IB_GID_TYPE_IB = 0,
+ IB_GID_TYPE_ROCE = 0,
+ IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+ IB_GID_TYPE_SIZE
+};
+
+#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
+ enum ib_gid_type gid_type;
struct net_device *ndev;
};
__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type);
+enum rdma_network_type {
+ RDMA_NETWORK_IB,
+ RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+ RDMA_NETWORK_IPV4,
+ RDMA_NETWORK_IPV6
+};
+
+static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
+{
+ if (network_type == RDMA_NETWORK_IPV4 ||
+ network_type == RDMA_NETWORK_IPV6)
+ return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+ /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
+ return IB_GID_TYPE_IB;
+}
+
+static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
+ union ib_gid *gid)
+{
+ if (gid_type == IB_GID_TYPE_IB)
+ return RDMA_NETWORK_IB;
+
+ if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+ return RDMA_NETWORK_IPV4;
+ else
+ return RDMA_NETWORK_IPV6;
+}
+
enum rdma_link_layer {
IB_LINK_LAYER_UNSPECIFIED,
IB_LINK_LAYER_INFINIBAND,
};
enum ib_device_cap_flags {
- IB_DEVICE_RESIZE_MAX_WR = 1,
- IB_DEVICE_BAD_PKEY_CNTR = (1<<1),
- IB_DEVICE_BAD_QKEY_CNTR = (1<<2),
- IB_DEVICE_RAW_MULTI = (1<<3),
- IB_DEVICE_AUTO_PATH_MIG = (1<<4),
- IB_DEVICE_CHANGE_PHY_PORT = (1<<5),
- IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6),
- IB_DEVICE_CURR_QP_STATE_MOD = (1<<7),
- IB_DEVICE_SHUTDOWN_PORT = (1<<8),
- IB_DEVICE_INIT_TYPE = (1<<9),
- IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10),
- IB_DEVICE_SYS_IMAGE_GUID = (1<<11),
- IB_DEVICE_RC_RNR_NAK_GEN = (1<<12),
- IB_DEVICE_SRQ_RESIZE = (1<<13),
- IB_DEVICE_N_NOTIFY_CQ = (1<<14),
- IB_DEVICE_LOCAL_DMA_LKEY = (1<<15),
- IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */
- IB_DEVICE_MEM_WINDOW = (1<<17),
+ IB_DEVICE_RESIZE_MAX_WR = (1 << 0),
+ IB_DEVICE_BAD_PKEY_CNTR = (1 << 1),
+ IB_DEVICE_BAD_QKEY_CNTR = (1 << 2),
+ IB_DEVICE_RAW_MULTI = (1 << 3),
+ IB_DEVICE_AUTO_PATH_MIG = (1 << 4),
+ IB_DEVICE_CHANGE_PHY_PORT = (1 << 5),
+ IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6),
+ IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7),
+ IB_DEVICE_SHUTDOWN_PORT = (1 << 8),
+ IB_DEVICE_INIT_TYPE = (1 << 9),
+ IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10),
+ IB_DEVICE_SYS_IMAGE_GUID = (1 << 11),
+ IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12),
+ IB_DEVICE_SRQ_RESIZE = (1 << 13),
+ IB_DEVICE_N_NOTIFY_CQ = (1 << 14),
+
+ /*
+ * This device supports a per-device lkey or stag that can be
+ * used without performing a memory registration for the local
+ * memory. Note that ULPs should never check this flag, but
+ * instead of use the local_dma_lkey flag in the ib_pd structure,
+ * which will always contain a usable lkey.
+ */
+ IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15),
+ IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16),
+ IB_DEVICE_MEM_WINDOW = (1 << 17),
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
* insertion of UDP and TCP checksum on outgoing UD IPoIB
* incoming messages. Setting this flag implies that the
* IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
*/
- IB_DEVICE_UD_IP_CSUM = (1<<18),
- IB_DEVICE_UD_TSO = (1<<19),
- IB_DEVICE_XRC = (1<<20),
- IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
- IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
- IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
- IB_DEVICE_RC_IP_CSUM = (1<<25),
- IB_DEVICE_RAW_IP_CSUM = (1<<26),
- IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
- IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
- IB_DEVICE_ON_DEMAND_PAGING = (1<<31),
+ IB_DEVICE_UD_IP_CSUM = (1 << 18),
+ IB_DEVICE_UD_TSO = (1 << 19),
+ IB_DEVICE_XRC = (1 << 20),
+
+ /*
+ * This device supports the IB "base memory management extension",
+ * which includes support for fast registrations (IB_WR_REG_MR,
+ * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs). This flag should
+ * also be set by any iWarp device which must support FRs to comply
+ * to the iWarp verbs spec. iWarp devices also support the
+ * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
+ * stag.
+ */
+ IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21),
+ IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22),
+ IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
+ IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
+ IB_DEVICE_RC_IP_CSUM = (1 << 25),
+ IB_DEVICE_RAW_IP_CSUM = (1 << 26),
+ /*
+ * Devices should set IB_DEVICE_CROSS_CHANNEL if they
+ * support execution of WQEs that involve synchronization
+ * of I/O operations with single completion queue managed
+ * by hardware.
+ */
+ IB_DEVICE_CROSS_CHANNEL = (1 << 27),
+ IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
+ IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
+ IB_DEVICE_ON_DEMAND_PAGING = (1 << 31),
};
enum ib_signature_prot_cap {
enum ib_cq_creation_flags {
IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
+ IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
};
struct ib_cq_init_attr {
#define RDMA_CORE_CAP_PROT_IB 0x00100000
#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
+#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
| RDMA_CORE_CAP_IB_MAD \
| RDMA_CORE_CAP_IB_CM \
| RDMA_CORE_CAP_AF_IB \
| RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \
+ (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_AF_IB \
+ | RDMA_CORE_CAP_ETH_AH)
#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \
| RDMA_CORE_CAP_IW_CM)
#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
union ib_gid dgid;
};
+union rdma_network_hdr {
+ struct ib_grh ibgrh;
+ struct {
+ /* The IB spec states that if it's IPv4, the header
+ * is located in the last 20 bytes of the header.
+ */
+ u8 reserved[20];
+ struct iphdr roce4grh;
+ };
+};
+
enum {
IB_MULTICAST_QPN = 0xffffff
};
IB_WC_RDMA_READ,
IB_WC_COMP_SWAP,
IB_WC_FETCH_ADD,
- IB_WC_BIND_MW,
IB_WC_LSO,
IB_WC_LOCAL_INV,
IB_WC_REG_MR,
IB_WC_IP_CSUM_OK = (1<<3),
IB_WC_WITH_SMAC = (1<<4),
IB_WC_WITH_VLAN = (1<<5),
+ IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6),
};
struct ib_wc {
- u64 wr_id;
+ union {
+ u64 wr_id;
+ struct ib_cqe *wr_cqe;
+ };
enum ib_wc_status status;
enum ib_wc_opcode opcode;
u32 vendor_err;
u8 port_num; /* valid only for DR SMPs on switches */
u8 smac[ETH_ALEN];
u16 vlan_id;
+ u8 network_hdr_type;
};
enum ib_cq_notify_flags {
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ IB_QP_CREATE_CROSS_CHANNEL = 1 << 2,
+ IB_QP_CREATE_MANAGED_SEND = 1 << 3,
+ IB_QP_CREATE_MANAGED_RECV = 1 << 4,
IB_QP_CREATE_NETIF_QP = 1 << 5,
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
IB_WR_REG_MR,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
- IB_WR_BIND_MW,
IB_WR_REG_SIG_MR,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
u32 lkey;
};
-/**
- * struct ib_mw_bind_info - Parameters for a memory window bind operation.
- * @mr: A memory region to bind the memory window to.
- * @addr: The address where the memory window should begin.
- * @length: The length of the memory window, in bytes.
- * @mw_access_flags: Access flags from enum ib_access_flags for the window.
- *
- * This struct contains the shared parameters for type 1 and type 2
- * memory window bind operations.
- */
-struct ib_mw_bind_info {
- struct ib_mr *mr;
- u64 addr;
- u64 length;
- int mw_access_flags;
+struct ib_cqe {
+ void (*done)(struct ib_cq *cq, struct ib_wc *wc);
};
struct ib_send_wr {
struct ib_send_wr *next;
- u64 wr_id;
+ union {
+ u64 wr_id;
+ struct ib_cqe *wr_cqe;
+ };
struct ib_sge *sg_list;
int num_sge;
enum ib_wr_opcode opcode;
return container_of(wr, struct ib_reg_wr, wr);
}
-struct ib_bind_mw_wr {
- struct ib_send_wr wr;
- struct ib_mw *mw;
- /* The new rkey for the memory window. */
- u32 rkey;
- struct ib_mw_bind_info bind_info;
-};
-
-static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
-{
- return container_of(wr, struct ib_bind_mw_wr, wr);
-}
-
struct ib_sig_handover_wr {
struct ib_send_wr wr;
struct ib_sig_attrs *sig_attrs;
struct ib_recv_wr {
struct ib_recv_wr *next;
- u64 wr_id;
+ union {
+ u64 wr_id;
+ struct ib_cqe *wr_cqe;
+ };
struct ib_sge *sg_list;
int num_sge;
};
IB_ACCESS_ON_DEMAND = (1<<6),
};
-struct ib_phys_buf {
- u64 addr;
- u64 size;
-};
-
-struct ib_mr_attr {
- struct ib_pd *pd;
- u64 device_virt_addr;
- u64 size;
- int mr_access_flags;
- u32 lkey;
- u32 rkey;
-};
-
+/*
+ * XXX: these are apparently used for ->rereg_user_mr, no idea why they
+ * are hidden here instead of a uapi header!
+ */
enum ib_mr_rereg_flags {
IB_MR_REREG_TRANS = 1,
IB_MR_REREG_PD = (1<<1),
IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
};
-/**
- * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
- * @wr_id: Work request id.
- * @send_flags: Flags from ib_send_flags enum.
- * @bind_info: More parameters of the bind operation.
- */
-struct ib_mw_bind {
- u64 wr_id;
- int send_flags;
- struct ib_mw_bind_info bind_info;
-};
-
struct ib_fmr_attr {
int max_pages;
int max_maps;
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
+enum ib_poll_context {
+ IB_POLL_DIRECT, /* caller context, no hw completions */
+ IB_POLL_SOFTIRQ, /* poll from softirq context */
+ IB_POLL_WORKQUEUE, /* poll from workqueue */
+};
+
struct ib_cq {
struct ib_device *device;
struct ib_uobject *uobject;
void *cq_context;
int cqe;
atomic_t usecnt; /* count number of work queues */
+ enum ib_poll_context poll_ctx;
+ struct ib_wc *wc;
+ union {
+ struct irq_poll iop;
+ struct work_struct work;
+ };
};
struct ib_srq {
u64 iova;
u32 length;
unsigned int page_size;
- atomic_t usecnt; /* count number of MWs */
};
struct ib_mw {
int wc_cnt);
struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
int mr_access_flags);
- struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start);
struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
u64 start, u64 length,
u64 virt_addr,
int mr_access_flags,
struct ib_pd *pd,
struct ib_udata *udata);
- int (*query_mr)(struct ib_mr *mr,
- struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
enum ib_mr_type mr_type,
int (*map_mr_sg)(struct ib_mr *mr,
struct scatterlist *sg,
int sg_nents);
- int (*rereg_phys_mr)(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start);
struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
enum ib_mw_type type);
- int (*bind_mw)(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int (*dealloc_mw)(struct ib_mw *mw);
struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
int mr_access_flags,
u16 is_switch:1;
u8 node_type;
u8 phys_port_cnt;
+ struct ib_device_attr attrs;
/**
* The following mandatory functions are used only at device
return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
}
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+ size_t offset,
+ size_t len)
+{
+ const void __user *p = udata->inbuf + offset;
+ bool ret = false;
+ u8 *buf;
+
+ if (len > USHRT_MAX)
+ return false;
+
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return false;
+
+ if (copy_from_user(buf, p, len))
+ goto free;
+
+ ret = !memchr_inv(buf, 0, len);
+
+free:
+ kfree(buf);
+ return ret;
+}
+
/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
int ib_unregister_event_handler(struct ib_event_handler *event_handler);
void ib_dispatch_event(struct ib_event *event);
-int ib_query_device(struct ib_device *device,
- struct ib_device_attr *device_attr);
-
int ib_query_port(struct ib_device *device,
u8 port_num, struct ib_port_attr *port_attr);
}
static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags &
+ (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+}
+
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+}
+
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
{
return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
}
static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags &
- (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE);
+ return rdma_protocol_ib(device, port_num) ||
+ rdma_protocol_roce(device, port_num);
}
/**
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index);
+ enum ib_gid_type gid_type, struct net_device *ndev,
+ u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
}
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
+void ib_free_cq(struct ib_cq *cq);
+int ib_process_cq_direct(struct ib_cq *cq, int budget);
+
/**
* ib_create_cq - Creates a CQ on the specified device.
* @device: The device on which to create the CQ.
dma_free_coherent(dev->dma_device, size, cpu_addr, dma_handle);
}
-/**
- * ib_query_mr - Retrieves information about a specific memory region.
- * @mr: The memory region to retrieve information about.
- * @mr_attr: The attributes of the specified memory region.
- */
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
/**
* ib_dereg_mr - Deregisters a memory region and removes it from the
* HCA translation table.
return ((rkey + 1) & mask) | (rkey & ~mask);
}
-/**
- * ib_alloc_mw - Allocates a memory window.
- * @pd: The protection domain associated with the memory window.
- * @type: The type of the memory window (1 or 2).
- */
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-/**
- * ib_bind_mw - Posts a work request to the send queue of the specified
- * QP, which binds the memory window to the given address range and
- * remote access attributes.
- * @qp: QP to post the bind work request on.
- * @mw: The memory window to bind.
- * @mw_bind: Specifies information about the memory window, including
- * its address range, remote access rights, and associated memory region.
- *
- * If there is no immediate error, the function will update the rkey member
- * of the mw parameter to its new value. The bind operation can still fail
- * asynchronously.
- */
-static inline int ib_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- /* XXX reference counting in corresponding MR? */
- return mw->device->bind_mw ?
- mw->device->bind_mw(qp, mw, mw_bind) :
- -ENOSYS;
-}
-
-/**
- * ib_dealloc_mw - Deallocates a memory window.
- * @mw: The memory window to deallocate.
- */
-int ib_dealloc_mw(struct ib_mw *mw);
-
/**
* ib_alloc_fmr - Allocates a unmapped fast memory region.
* @pd: The protection domain associated with the unmapped region.
--- /dev/null
+/*
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ISCSI_ISER_H
+#define ISCSI_ISER_H
+
+#define ISER_ZBVA_NOT_SUP 0x80
+#define ISER_SEND_W_INV_NOT_SUP 0x40
+#define ISERT_ZBVA_NOT_USED 0x80
+#define ISERT_SEND_W_INV_NOT_USED 0x40
+
+#define ISCSI_CTRL 0x10
+#define ISER_HELLO 0x20
+#define ISER_HELLORPLY 0x30
+
+#define ISER_VER 0x10
+#define ISER_WSV 0x08
+#define ISER_RSV 0x04
+
+/**
+ * struct iser_cm_hdr - iSER CM header (from iSER Annex A12)
+ *
+ * @flags: flags support (zbva, send_w_inv)
+ * @rsvd: reserved
+ */
+struct iser_cm_hdr {
+ u8 flags;
+ u8 rsvd[3];
+} __packed;
+
+/**
+ * struct iser_ctrl - iSER header of iSCSI control PDU
+ *
+ * @flags: opcode and read/write valid bits
+ * @rsvd: reserved
+ * @write_stag: write rkey
+ * @write_va: write virtual address
+ * @reaf_stag: read rkey
+ * @read_va: read virtual address
+ */
+struct iser_ctrl {
+ u8 flags;
+ u8 rsvd[3];
+ __be32 write_stag;
+ __be64 write_va;
+ __be32 read_stag;
+ __be64 read_va;
+} __packed;
+
+#endif /* ISCSI_ISER_H */
int event,
struct timespec * tstamp,
unsigned long resolution);
+ void (*disconnect)(struct snd_timer_instance *timeri);
void *callback_data;
unsigned long ticks; /* auto-load ticks when expired */
unsigned long cticks; /* current ticks */
{ EXT4_GET_BLOCKS_METADATA_NOFAIL, "METADATA_NOFAIL" }, \
{ EXT4_GET_BLOCKS_NO_NORMALIZE, "NO_NORMALIZE" }, \
{ EXT4_GET_BLOCKS_KEEP_SIZE, "KEEP_SIZE" }, \
- { EXT4_GET_BLOCKS_NO_LOCK, "NO_LOCK" })
+ { EXT4_GET_BLOCKS_ZERO, "ZERO" })
#define show_mflags(flags) __print_flags(flags, "", \
{ EXT4_MAP_NEW, "N" }, \
TRACE_EVENT(mm_khugepaged_scan_pmd,
- TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable,
+ TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
bool referenced, int none_or_zero, int status),
- TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status),
+ TP_ARGS(mm, page, writable, referenced, none_or_zero, status),
TP_STRUCT__entry(
__field(struct mm_struct *, mm)
TP_fast_assign(
__entry->mm = mm;
- __entry->pfn = pfn;
+ __entry->pfn = page ? page_to_pfn(page) : -1;
__entry->writable = writable;
__entry->referenced = referenced;
__entry->none_or_zero = none_or_zero;
TRACE_EVENT(mm_collapse_huge_page_isolate,
- TP_PROTO(unsigned long pfn, int none_or_zero,
+ TP_PROTO(struct page *page, int none_or_zero,
bool referenced, bool writable, int status),
- TP_ARGS(pfn, none_or_zero, referenced, writable, status),
+ TP_ARGS(page, none_or_zero, referenced, writable, status),
TP_STRUCT__entry(
__field(unsigned long, pfn)
),
TP_fast_assign(
- __entry->pfn = pfn;
+ __entry->pfn = page ? page_to_pfn(page) : -1;
__entry->none_or_zero = none_or_zero;
__entry->referenced = referenced;
__entry->writable = writable;
softirq_name(NET_TX) \
softirq_name(NET_RX) \
softirq_name(BLOCK) \
- softirq_name(BLOCK_IOPOLL) \
+ softirq_name(IRQ_POLL) \
softirq_name(TASKLET) \
softirq_name(SCHED) \
softirq_name(HRTIMER) \
header-y += nl80211.h
header-y += n_r3964.h
header-y += nubus.h
-header-y += nvme.h
+header-y += nvme_ioctl.h
header-y += nvram.h
header-y += omap3isp.h
header-y += omapfb.h
#define _UAPI_LINUX_FS_H
/*
- * This file has definitions for some important file table
- * structures etc.
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's. Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
*/
#include <linux/limits.h>
#define MS_MGC_VAL 0xC0ED0000
#define MS_MGC_MSK 0xffff0000
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+ __u32 fsx_xflags; /* xflags field value (get/set) */
+ __u32 fsx_extsize; /* extsize field value (get/set)*/
+ __u32 fsx_nextents; /* nextents field value (get) */
+ __u32 fsx_projid; /* project identifier (get/set) */
+ unsigned char fsx_pad[12];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
+#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
+#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
+#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
+#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
+#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
+#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
+#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
+#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */
+#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
+
/* the read-only stuff doesn't really belong here, but any other place is
probably as bad and I don't want to create yet another include file. */
#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
#define FS_IOC32_GETVERSION _IOR('v', 1, int)
#define FS_IOC32_SETVERSION _IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR _IOR ('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR _IOW ('X', 32, struct fsxattr)
/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs). You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are
+ * almost out of 32-bit flags. :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface. This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
*/
#define FS_SECRM_FL 0x00000001 /* Secure deletion */
#define FS_UNRM_FL 0x00000002 /* Undelete */
#define FS_DIRTY_FL 0x00000100
#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
#define FS_NOCOMP_FL 0x00000400 /* Don't compress */
-#define FS_ECOMPR_FL 0x00000800 /* Compression error */
/* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */
#define FS_BTREE_FL 0x00001000 /* btree format dir */
#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
#define FS_IMAGIC_FL 0x00002000 /* AFS directory */
#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
#define FS_EXTENT_FL 0x00080000 /* Extents */
-#define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */
+#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
#define NVM_TTYPE_NAME_MAX 48
#define NVM_TTYPE_MAX 63
+#define NVM_MMTYPE_LEN 8
#define NVM_CTRL_FILE "/dev/lightnvm/control"
__u32 flags;
};
+struct nvm_ioctl_dev_init {
+ char dev[DISK_NAME_LEN]; /* open-channel SSD device */
+ char mmtype[NVM_MMTYPE_LEN]; /* register to media manager */
+
+ __u32 flags;
+};
+
+enum {
+ NVM_FACTORY_ERASE_ONLY_USER = 1 << 0, /* erase only blocks used as
+ * host blks or grown blks */
+ NVM_FACTORY_RESET_HOST_BLKS = 1 << 1, /* remove host blk marks */
+ NVM_FACTORY_RESET_GRWN_BBLKS = 1 << 2, /* remove grown blk marks */
+ NVM_FACTORY_NR_BITS = 1 << 3, /* stops here */
+};
+
+struct nvm_ioctl_dev_factory {
+ char dev[DISK_NAME_LEN];
+
+ __u32 flags;
+};
/* The ioctl type, 'L', 0x20 - 0x2F documented in ioctl-number.txt */
enum {
/* device level cmds */
NVM_DEV_CREATE_CMD,
NVM_DEV_REMOVE_CMD,
+
+ /* Init a device to support LightNVM media managers */
+ NVM_DEV_INIT_CMD,
+
+ /* Factory reset device */
+ NVM_DEV_FACTORY_CMD,
};
#define NVM_IOCTL 'L' /* 0x4c */
struct nvm_ioctl_create)
#define NVM_DEV_REMOVE _IOW(NVM_IOCTL, NVM_DEV_REMOVE_CMD, \
struct nvm_ioctl_remove)
+#define NVM_DEV_INIT _IOW(NVM_IOCTL, NVM_DEV_INIT_CMD, \
+ struct nvm_ioctl_dev_init)
+#define NVM_DEV_FACTORY _IOW(NVM_IOCTL, NVM_DEV_FACTORY_CMD, \
+ struct nvm_ioctl_dev_factory)
#define NVM_VERSION_MAJOR 1
#define NVM_VERSION_MINOR 0
typedef uint16_t blkif_vdev_t;
typedef uint64_t blkif_sector_t;
+/*
+ * Multiple hardware queues/rings:
+ * If supported, the backend will write the key "multi-queue-max-queues" to
+ * the directory for that vbd, and set its value to the maximum supported
+ * number of queues.
+ * Frontends that are aware of this feature and wish to use it can write the
+ * key "multi-queue-num-queues" with the number they wish to use, which must be
+ * greater than zero, and no more than the value reported by the backend in
+ * "multi-queue-max-queues".
+ *
+ * For frontends requesting just one queue, the usual event-channel and
+ * ring-ref keys are written as before, simplifying the backend processing
+ * to avoid distinguishing between a frontend that doesn't understand the
+ * multi-queue feature, and one that does, but requested only one queue.
+ *
+ * Frontends requesting two or more queues must not write the toplevel
+ * event-channel and ring-ref keys, instead writing those keys under sub-keys
+ * having the name "queue-N" where N is the integer ID of the queue/ring for
+ * which those keys belong. Queues are indexed from zero.
+ * For example, a frontend with two queues must write the following set of
+ * queue-related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ * It is also possible to use multiple queues/rings together with
+ * feature multi-page ring buffer.
+ * For example, a frontend requests two queues/rings and the size of each ring
+ * buffer is two pages must write the following set of related keys:
+ *
+ * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
+ * /local/domain/1/device/vbd/0/ring-page-order = "1"
+ * /local/domain/1/device/vbd/0/queue-0 = ""
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
+ * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
+ * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
+ * /local/domain/1/device/vbd/0/queue-1 = ""
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
+ * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
+ * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
+ *
+ */
+
/*
* REQUEST CODES.
*/
ro = mnt_want_write(mnt); /* we'll drop it in any case */
error = 0;
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
path.dentry = lookup_one_len(name->name, root, strlen(name->name));
if (IS_ERR(path.dentry)) {
error = PTR_ERR(path.dentry);
put_unused_fd(fd);
fd = error;
}
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
if (!ro)
mnt_drop_write(mnt);
out_putname:
err = mnt_want_write(mnt);
if (err)
goto out_name;
- mutex_lock_nested(&d_inode(mnt->mnt_root)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
dentry = lookup_one_len(name->name, mnt->mnt_root,
strlen(name->name));
if (IS_ERR(dentry)) {
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(mnt->mnt_root)->i_mutex);
+ inode_unlock(d_inode(mnt->mnt_root));
if (inode)
iput(inode);
mnt_drop_write(mnt);
wake_up_sem_queue_do(&tasks);
out_free:
if (sem_io != fast_sem_io)
- ipc_free(sem_io, sizeof(ushort)*nsems);
+ ipc_free(sem_io);
return err;
}
/**
* ipc_free - free ipc space
* @ptr: pointer returned by ipc_alloc
- * @size: size of block
*
- * Free a block created with ipc_alloc(). The caller must know the size
- * used in the allocation call.
+ * Free a block created with ipc_alloc().
*/
-void ipc_free(void *ptr, int size)
+void ipc_free(void *ptr)
{
- if (size > PAGE_SIZE)
- vfree(ptr);
- else
- kfree(ptr);
+ kvfree(ptr);
}
/**
* both function can sleep
*/
void *ipc_alloc(int size);
-void ipc_free(void *ptr, int size);
+void ipc_free(void *ptr);
/*
* For allocation that need to be freed by RCU.
if (IS_ERR(dentry))
return (void *)dentry; /* returning an error */
inode = path.dentry->d_inode;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
if (unlikely(!audit_mark)) {
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+ inode_unlock(d_backing_inode(parent->dentry));
if (d_is_positive(d)) {
/* update watch filter fields */
watch->dev = d_backing_inode(d)->i_sb->s_dev;
struct perf_event *event = filp->private_data;
int retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = fasync_helper(fd, filp, on, &event->fasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (retval < 0)
return retval;
if (!desc->count)
return 0;
- mutex_lock(&file_inode(filp)->i_mutex);
+ inode_lock(file_inode(filp));
do {
if (!relay_file_read_avail(buf, *ppos))
break;
*ppos = relay_file_read_end_pos(buf, read_start, ret);
}
} while (desc->count && ret);
- mutex_unlock(&file_inode(filp)->i_mutex);
+ inode_unlock(file_inode(filp));
return desc->written;
}
/* Ensure the static_key remains in a consistent state */
inode = file_inode(filp);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i = sched_feat_set(cmp);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (i == __SCHED_FEAT_NR)
return -EINVAL;
.proc_handler = &pipe_proc_fn,
.extra1 = &pipe_min_size,
},
+ {
+ .procname = "pipe-user-pages-hard",
+ .data = &pipe_user_pages_hard,
+ .maxlen = sizeof(pipe_user_pages_hard),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
+ {
+ .procname = "pipe-user-pages-soft",
+ .data = &pipe_user_pages_soft,
+ .maxlen = sizeof(pipe_user_pages_soft),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ },
{ }
};
# compression support is select'ed if needed
#
config 842_COMPRESS
+ select CRC32
tristate
config 842_DECOMPRESS
+ select CRC32
tristate
config ZLIB_INFLATE
information. This data is useful for drivers handling
DDR SDRAM controllers.
+config IRQ_POLL
+ bool "IRQ polling library"
+ help
+ Helper library to poll interrupt mitigation using polling.
+
config MPILIB
tristate
select CLZ_TAB
obj-$(CONFIG_SG_SPLIT) += sg_split.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
+obj-$(CONFIG_IRQ_POLL) += irq_poll.o
libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
fdt_empty_tree.o
--- /dev/null
+/*
+ * Functions related to interrupt-poll handling in the block layer. This
+ * is similar to NAPI for network devices.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/irq_poll.h>
+#include <linux/delay.h>
+
+static unsigned int irq_poll_budget __read_mostly = 256;
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
+
+/**
+ * irq_poll_sched - Schedule a run of the iopoll handler
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Add this irq_poll structure to the pending poll list and trigger the
+ * raise of the blk iopoll softirq.
+ **/
+void irq_poll_sched(struct irq_poll *iop)
+{
+ unsigned long flags;
+
+ if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+ return;
+ if (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+ return;
+
+ local_irq_save(flags);
+ list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(irq_poll_sched);
+
+/**
+ * __irq_poll_complete - Mark this @iop as un-polled again
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * See irq_poll_complete(). This function must be called with interrupts
+ * disabled.
+ **/
+static void __irq_poll_complete(struct irq_poll *iop)
+{
+ list_del(&iop->list);
+ smp_mb__before_atomic();
+ clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
+}
+
+/**
+ * irq_poll_complete - Mark this @iop as un-polled again
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * If a driver consumes less than the assigned budget in its run of the
+ * iopoll handler, it'll end the polled mode by calling this function. The
+ * iopoll handler will not be invoked again before irq_poll_sched()
+ * is called.
+ **/
+void irq_poll_complete(struct irq_poll *iop)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __irq_poll_complete(iop);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(irq_poll_complete);
+
+static void irq_poll_softirq(struct softirq_action *h)
+{
+ struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
+ int rearm = 0, budget = irq_poll_budget;
+ unsigned long start_time = jiffies;
+
+ local_irq_disable();
+
+ while (!list_empty(list)) {
+ struct irq_poll *iop;
+ int work, weight;
+
+ /*
+ * If softirq window is exhausted then punt.
+ */
+ if (budget <= 0 || time_after(jiffies, start_time)) {
+ rearm = 1;
+ break;
+ }
+
+ local_irq_enable();
+
+ /* Even though interrupts have been re-enabled, this
+ * access is safe because interrupts can only add new
+ * entries to the tail of this list, and only ->poll()
+ * calls can remove this head entry from the list.
+ */
+ iop = list_entry(list->next, struct irq_poll, list);
+
+ weight = iop->weight;
+ work = 0;
+ if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
+ work = iop->poll(iop, weight);
+
+ budget -= work;
+
+ local_irq_disable();
+
+ /*
+ * Drivers must not modify the iopoll state, if they
+ * consume their assigned weight (or more, some drivers can't
+ * easily just stop processing, they have to complete an
+ * entire mask of commands).In such cases this code
+ * still "owns" the iopoll instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (work >= weight) {
+ if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+ __irq_poll_complete(iop);
+ else
+ list_move_tail(&iop->list, list);
+ }
+ }
+
+ if (rearm)
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+
+ local_irq_enable();
+}
+
+/**
+ * irq_poll_disable - Disable iopoll on this @iop
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Disable io polling and wait for any pending callbacks to have completed.
+ **/
+void irq_poll_disable(struct irq_poll *iop)
+{
+ set_bit(IRQ_POLL_F_DISABLE, &iop->state);
+ while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+ msleep(1);
+ clear_bit(IRQ_POLL_F_DISABLE, &iop->state);
+}
+EXPORT_SYMBOL(irq_poll_disable);
+
+/**
+ * irq_poll_enable - Enable iopoll on this @iop
+ * @iop: The parent iopoll structure
+ *
+ * Description:
+ * Enable iopoll on this @iop. Note that the handler run will not be
+ * scheduled, it will only mark it as active.
+ **/
+void irq_poll_enable(struct irq_poll *iop)
+{
+ BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state));
+ smp_mb__before_atomic();
+ clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(irq_poll_enable);
+
+/**
+ * irq_poll_init - Initialize this @iop
+ * @iop: The parent iopoll structure
+ * @weight: The default weight (or command completion budget)
+ * @poll_fn: The handler to invoke
+ *
+ * Description:
+ * Initialize and enable this irq_poll structure.
+ **/
+void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
+{
+ memset(iop, 0, sizeof(*iop));
+ INIT_LIST_HEAD(&iop->list);
+ iop->weight = weight;
+ iop->poll = poll_fn;
+}
+EXPORT_SYMBOL(irq_poll_init);
+
+static int irq_poll_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ /*
+ * If a CPU goes away, splice its entries to the current CPU
+ * and trigger a run of the softirq
+ */
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+ int cpu = (unsigned long) hcpu;
+
+ local_irq_disable();
+ list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+ this_cpu_ptr(&blk_cpu_iopoll));
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+ local_irq_enable();
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block irq_poll_cpu_notifier = {
+ .notifier_call = irq_poll_cpu_notify,
+};
+
+static __init int irq_poll_setup(void)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
+
+ open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
+ register_hotcpu_notifier(&irq_poll_cpu_notifier);
+ return 0;
+}
+subsys_initcall(irq_poll_setup);
MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
MODULE_LICENSE("GPL");
+MODULE_SOFTDEP("pre: crc32c");
* @seq: the seq_file to print into
* @lc: the lru cache to print statistics of
*/
-size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
+void lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
{
/* NOTE:
* total calls to lc_get are
seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
lc->name, lc->used, lc->nr_elements,
lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
-
- return 0;
}
static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
if (rs->missed)
printk(KERN_WARNING "%s: %d callbacks suppressed\n",
func, rs->missed);
- rs->begin = 0;
+ rs->begin = jiffies;
rs->printed = 0;
rs->missed = 0;
}
*/
#include <linux/export.h>
#include <linux/compiler.h>
+#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/capability.h>
__radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
if (shadow) {
- mapping->nrshadows++;
+ mapping->nrexceptional++;
/*
- * Make sure the nrshadows update is committed before
+ * Make sure the nrexceptional update is committed before
* the nrpages update so that final truncate racing
* with reclaim does not see both counters 0 at the
* same time and miss a shadow entry.
{
int err = 0;
+ if (dax_mapping(mapping) && mapping->nrexceptional) {
+ err = dax_writeback_mapping_range(mapping, lstart, lend);
+ if (err)
+ return err;
+ }
+
if (mapping->nrpages) {
err = __filemap_fdatawrite_range(mapping, lstart, lend,
WB_SYNC_ALL);
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
if (!radix_tree_exceptional_entry(p))
return -EEXIST;
+
+ if (WARN_ON(dax_mapping(mapping)))
+ return -EINVAL;
+
if (shadowp)
*shadowp = p;
- mapping->nrshadows--;
+ mapping->nrexceptional--;
if (node)
workingset_node_shadows_dec(node);
}
if (radix_tree_deref_retry(page))
goto restart;
/*
- * A shadow entry of a recently evicted page,
- * or a swap entry from shmem/tmpfs. Return
- * it without attempting to raise page count.
+ * A shadow entry of a recently evicted page, a swap
+ * entry from shmem/tmpfs or a DAX entry. Return it
+ * without attempting to raise page count.
*/
goto export;
}
}
EXPORT_SYMBOL(find_get_pages_tag);
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping: the address_space to search
+ * @start: the starting page cache index
+ * @tag: the tag index
+ * @nr_entries: the maximum number of entries
+ * @entries: where the resulting entries are placed
+ * @indices: the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+ int tag, unsigned int nr_entries,
+ struct page **entries, pgoff_t *indices)
+{
+ void **slot;
+ unsigned int ret = 0;
+ struct radix_tree_iter iter;
+
+ if (!nr_entries)
+ return 0;
+
+ rcu_read_lock();
+restart:
+ radix_tree_for_each_tagged(slot, &mapping->page_tree,
+ &iter, start, tag) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot(slot);
+ if (unlikely(!page))
+ continue;
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ /*
+ * Transient condition which can only trigger
+ * when entry at index 0 moves out of or back
+ * to root: none yet gotten, safe to restart.
+ */
+ goto restart;
+ }
+
+ /*
+ * A shadow entry of a recently evicted page, a swap
+ * entry from shmem/tmpfs or a DAX entry. Return it
+ * without attempting to raise page count.
+ */
+ goto export;
+ }
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *slot)) {
+ page_cache_release(page);
+ goto repeat;
+ }
+export:
+ indices[ret] = iter.index;
+ entries[ret] = page;
+ if (++ret == nr_entries)
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL(find_get_entries_tag);
+
/*
* CD/DVDs are error prone. When a medium error occurs, the driver may fail
* a _large_ part of the i/o request. Imagine the worst scenario:
struct inode *inode = file->f_mapping->host;
ssize_t ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0)
ret = __generic_file_write_iter(iocb, from);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret > 0) {
ssize_t err;
struct mm_struct *mm = tlb->mm;
int ret = 0;
- if (!pmd_trans_huge_lock(pmd, vma, &ptl))
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (!ptl)
goto out_unlocked;
orig_pmd = *pmd;
pmd_t orig_pmd;
spinlock_t *ptl;
- if (!__pmd_trans_huge_lock(pmd, vma, &ptl))
+ ptl = __pmd_trans_huge_lock(pmd, vma);
+ if (!ptl)
return 0;
/*
* For architectures like ppc64 we look at deposited pgtable
* We don't have to worry about the ordering of src and dst
* ptlocks because exclusive mmap_sem prevents deadlock.
*/
- if (__pmd_trans_huge_lock(old_pmd, vma, &old_ptl)) {
+ old_ptl = __pmd_trans_huge_lock(old_pmd, vma);
+ if (old_ptl) {
new_ptl = pmd_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
spinlock_t *ptl;
int ret = 0;
- if (__pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = __pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
pmd_t entry;
bool preserve_write = prot_numa && pmd_write(*pmd);
ret = 1;
* Note that if it returns true, this routine returns without unlocking page
* table lock. So callers must unlock it.
*/
-bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
- spinlock_t **ptl)
+spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
{
- *ptl = pmd_lock(vma->vm_mm, pmd);
+ spinlock_t *ptl;
+ ptl = pmd_lock(vma->vm_mm, pmd);
if (likely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
- return true;
- spin_unlock(*ptl);
- return false;
+ return ptl;
+ spin_unlock(ptl);
+ return NULL;
}
#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
if (likely(writable)) {
if (likely(referenced)) {
result = SCAN_SUCCEED;
- trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
+ trace_mm_collapse_huge_page_isolate(page, none_or_zero,
referenced, writable, result);
return 1;
}
out:
release_pte_pages(pte, _pte);
- trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero,
+ trace_mm_collapse_huge_page_isolate(page, none_or_zero,
referenced, writable, result);
return 0;
}
collapse_huge_page(mm, address, hpage, vma, node);
}
out:
- trace_mm_khugepaged_scan_pmd(mm, page_to_pfn(page), writable, referenced,
+ trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
none_or_zero, result);
return ret;
}
pte_t *pte;
spinlock_t *ptl;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
spin_unlock(ptl);
union mc_target target;
struct page *page;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
if (mc.precharge < HPAGE_PMD_NR) {
spin_unlock(ptl);
return 0;
unsigned char *vec = walk->private;
int nr = (end - addr) >> PAGE_SHIFT;
- if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+ ptl = pmd_trans_huge_lock(pmd, vma);
+ if (ptl) {
memset(vec, 1, nr);
spin_unlock(ptl);
goto out;
*/
unsigned int munlock_vma_page(struct page *page)
{
- unsigned int nr_pages;
+ int nr_pages;
struct zone *zone = page_zone(page);
/* For try_to_munlock() and to serialize with page migration */
/**
* pcpu_mem_free - free memory
* @ptr: memory to free
- * @size: size of the area
*
* Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc().
*/
-static void pcpu_mem_free(void *ptr, size_t size)
+static void pcpu_mem_free(void *ptr)
{
- if (size <= PAGE_SIZE)
- kfree(ptr);
- else
- vfree(ptr);
+ kvfree(ptr);
}
/**
* pcpu_mem_free() might end up calling vfree() which uses
* IRQ-unsafe lock and thus can't be called under pcpu_lock.
*/
- pcpu_mem_free(old, old_size);
- pcpu_mem_free(new, new_size);
+ pcpu_mem_free(old);
+ pcpu_mem_free(new);
return 0;
}
chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
sizeof(chunk->map[0]));
if (!chunk->map) {
- pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+ pcpu_mem_free(chunk);
return NULL;
}
{
if (!chunk)
return;
- pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
- pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+ pcpu_mem_free(chunk->map);
+ pcpu_mem_free(chunk);
}
/**
list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
- } else
- kfree(info->symlink);
+ }
simple_xattrs_free(&info->xattrs);
WARN_ON(inode->i_blocks);
if (whence != SEEK_DATA && whence != SEEK_HOLE)
return generic_file_llseek_size(file, offset, whence,
MAX_LFS_FILESIZE, i_size_read(inode));
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We're holding i_mutex so we can access i_size directly */
if (offset < 0)
if (offset >= 0)
offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
if (seals & ~(unsigned int)F_ALL_SEALS)
return -EINVAL;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (info->seals & F_SEAL_SEAL) {
error = -EPERM;
error = 0;
unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
EXPORT_SYMBOL_GPL(shmem_add_seals);
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_PUNCH_HOLE) {
struct address_space *mapping = file->f_mapping;
inode->i_private = NULL;
spin_unlock(&inode->i_lock);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
info = SHMEM_I(inode);
inode->i_size = len-1;
if (len <= SHORT_SYMLINK_LEN) {
- info->symlink = kmemdup(symname, len, GFP_KERNEL);
- if (!info->symlink) {
+ inode->i_link = kmemdup(symname, len, GFP_KERNEL);
+ if (!inode->i_link) {
iput(inode);
return -ENOMEM;
}
inode->i_op = &shmem_short_symlink_operations;
- inode->i_link = info->symlink;
} else {
inode_nohighmem(inode);
error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
static void shmem_destroy_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
+ kfree(inode->i_link);
kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}
set_blocksize(bdev, old_block_size);
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
} else {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode->i_flags &= ~S_SWAPFILE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
filp_close(swap_file, NULL);
p->flags |= SWP_BLKDEV;
} else if (S_ISREG(inode->i_mode)) {
p->bdev = inode->i_sb->s_bdev;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (IS_SWAPFILE(inode))
return -EBUSY;
} else
mapping = swap_file->f_mapping;
inode = mapping->host;
- /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
+ /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */
error = claim_swapfile(p, inode);
if (unlikely(error))
goto bad_swap;
vfree(cluster_info);
if (swap_file) {
if (inode && S_ISREG(inode->i_mode)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
inode = NULL;
}
filp_close(swap_file, NULL);
if (name)
putname(name);
if (inode && S_ISREG(inode->i_mode))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
#include <linux/kernel.h>
#include <linux/backing-dev.h>
+#include <linux/dax.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
return;
spin_lock_irq(&mapping->tree_lock);
- /*
- * Regular page slots are stabilized by the page lock even
- * without the tree itself locked. These unlocked entries
- * need verification under the tree lock.
- */
- if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
- goto unlock;
- if (*slot != entry)
- goto unlock;
- radix_tree_replace_slot(slot, NULL);
- mapping->nrshadows--;
- if (!node)
- goto unlock;
- workingset_node_shadows_dec(node);
- /*
- * Don't track node without shadow entries.
- *
- * Avoid acquiring the list_lru lock if already untracked.
- * The list_empty() test is safe as node->private_list is
- * protected by mapping->tree_lock.
- */
- if (!workingset_node_shadows(node) &&
- !list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes, &node->private_list);
- __radix_tree_delete_node(&mapping->page_tree, node);
+
+ if (dax_mapping(mapping)) {
+ if (radix_tree_delete_item(&mapping->page_tree, index, entry))
+ mapping->nrexceptional--;
+ } else {
+ /*
+ * Regular page slots are stabilized by the page lock even
+ * without the tree itself locked. These unlocked entries
+ * need verification under the tree lock.
+ */
+ if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
+ &slot))
+ goto unlock;
+ if (*slot != entry)
+ goto unlock;
+ radix_tree_replace_slot(slot, NULL);
+ mapping->nrexceptional--;
+ if (!node)
+ goto unlock;
+ workingset_node_shadows_dec(node);
+ /*
+ * Don't track node without shadow entries.
+ *
+ * Avoid acquiring the list_lru lock if already untracked.
+ * The list_empty() test is safe as node->private_list is
+ * protected by mapping->tree_lock.
+ */
+ if (!workingset_node_shadows(node) &&
+ !list_empty(&node->private_list))
+ list_lru_del(&workingset_shadow_nodes,
+ &node->private_list);
+ __radix_tree_delete_node(&mapping->page_tree, node);
+ }
unlock:
spin_unlock_irq(&mapping->tree_lock);
}
int i;
cleancache_invalidate_inode(mapping);
- if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+ if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return;
/* Offsets within partial pages */
*/
void truncate_inode_pages_final(struct address_space *mapping)
{
- unsigned long nrshadows;
+ unsigned long nrexceptional;
unsigned long nrpages;
/*
/*
* When reclaim installs eviction entries, it increases
- * nrshadows first, then decreases nrpages. Make sure we see
+ * nrexceptional first, then decreases nrpages. Make sure we see
* this in the right order or we might miss an entry.
*/
nrpages = mapping->nrpages;
smp_rmb();
- nrshadows = mapping->nrshadows;
+ nrexceptional = mapping->nrexceptional;
- if (nrpages || nrshadows) {
+ if (nrpages || nrexceptional) {
/*
* As truncation uses a lockless tree lookup, cycle
* the tree lock to make sure any ongoing tree
#include <linux/oom.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
+#include <linux/dax.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
* inode reclaim needs to empty out the radix tree or
* the nodes are lost. Don't plant shadows behind its
* back.
+ *
+ * We also don't store shadows for DAX mappings because the
+ * only page cache pages found in these are zero pages
+ * covering holes, and because we don't want to mix DAX
+ * exceptional entries and shadow exceptional entries in the
+ * same page_tree.
*/
if (reclaimed && page_is_file_cache(page) &&
- !mapping_exiting(mapping))
+ !mapping_exiting(mapping) && !dax_mapping(mapping))
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow, memcg);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
* Defer the checking for differentials to the
* shepherd thread on a different processor.
*/
- int r;
- /*
- * Shepherd work thread does not race since it never
- * changes the bit if its zero but the cpu
- * online / off line code may race if
- * worker threads are still allowed during
- * shutdown / startup.
- */
- r = cpumask_test_and_set_cpu(smp_processor_id(),
- cpu_stat_off);
- VM_BUG_ON(r);
+ cpumask_set_cpu(smp_processor_id(), cpu_stat_off);
}
}
node->slots[i] = NULL;
BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
- BUG_ON(!mapping->nrshadows);
- mapping->nrshadows--;
+ BUG_ON(!mapping->nrexceptional);
+ mapping->nrexceptional--;
}
}
BUG_ON(node->count);
* @unsent_req_list: accounting for requests that haven't been sent
* @req: current request being processed (if any)
* @tmp_buf: temporary buffer to read in header
- * @rsize: amount to read for current frame
- * @rpos: read position in current frame
- * @rbuf: current read buffer
+ * @rc: temporary fcall for reading current frame
* @wpos: write position for current frame
* @wsize: amount of data to write for current frame
* @wbuf: current write buffer
struct list_head unsent_req_list;
struct p9_req_t *req;
char tmp_buf[7];
- int rsize;
- int rpos;
- char *rbuf;
+ struct p9_fcall rc;
int wpos;
int wsize;
char *wbuf;
if (m->err < 0)
return;
- p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
+ p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
- if (!m->rbuf) {
- m->rbuf = m->tmp_buf;
- m->rpos = 0;
- m->rsize = 7; /* start by reading header */
+ if (!m->rc.sdata) {
+ m->rc.sdata = m->tmp_buf;
+ m->rc.offset = 0;
+ m->rc.capacity = 7; /* start by reading header */
}
clear_bit(Rpending, &m->wsched);
- p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n",
- m, m->rpos, m->rsize, m->rsize-m->rpos);
- err = p9_fd_read(m->client, m->rbuf + m->rpos,
- m->rsize - m->rpos);
+ p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
+ m, m->rc.offset, m->rc.capacity,
+ m->rc.capacity - m->rc.offset);
+ err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
+ m->rc.capacity - m->rc.offset);
p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
- if (err == -EAGAIN) {
+ if (err == -EAGAIN)
goto end_clear;
- }
if (err <= 0)
goto error;
- m->rpos += err;
+ m->rc.offset += err;
- if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
- u16 tag;
+ /* header read in */
+ if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new header\n");
- n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
- if (n >= m->client->msize) {
+ err = p9_parse_header(&m->rc, NULL, NULL, NULL, 0);
+ if (err) {
+ p9_debug(P9_DEBUG_ERROR,
+ "error parsing header: %d\n", err);
+ goto error;
+ }
+
+ if (m->rc.size >= m->client->msize) {
p9_debug(P9_DEBUG_ERROR,
- "requested packet size too big: %d\n", n);
+ "requested packet size too big: %d\n",
+ m->rc.size);
err = -EIO;
goto error;
}
- tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
p9_debug(P9_DEBUG_TRANS,
- "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
+ "mux %p pkt: size: %d bytes tag: %d\n",
+ m, m->rc.size, m->rc.tag);
- m->req = p9_tag_lookup(m->client, tag);
+ m->req = p9_tag_lookup(m->client, m->rc.tag);
if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
- tag);
+ m->rc.tag);
err = -EIO;
goto error;
}
if (m->req->rc == NULL) {
- m->req->rc = kmalloc(sizeof(struct p9_fcall) +
- m->client->msize, GFP_NOFS);
- if (!m->req->rc) {
- m->req = NULL;
- err = -ENOMEM;
- goto error;
- }
+ p9_debug(P9_DEBUG_ERROR,
+ "No recv fcall for tag %d (req %p), disconnecting!\n",
+ m->rc.tag, m->req);
+ m->req = NULL;
+ err = -EIO;
+ goto error;
}
- m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
- memcpy(m->rbuf, m->tmp_buf, m->rsize);
- m->rsize = n;
+ m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+ memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
+ m->rc.capacity = m->rc.size;
}
- /* not an else because some packets (like clunk) have no payload */
- if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+ /* packet is read in
+ * not an else because some packets (like clunk) have no payload
+ */
+ if ((m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
spin_lock(&m->client->lock);
if (m->req->status != REQ_STATUS_ERROR)
list_del(&m->req->req_list);
spin_unlock(&m->client->lock);
p9_client_cb(m->client, m->req, status);
- m->rbuf = NULL;
- m->rpos = 0;
- m->rsize = 0;
+ m->rc.sdata = NULL;
+ m->rc.offset = 0;
+ m->rc.capacity = 0;
m->req = NULL;
}
mutex_unlock(&virtio_9p_lock);
if (!found) {
- pr_err("no channels available\n");
+ pr_err("no channels available for device %s\n", devname);
return ret;
}
void *ticket_buf = NULL;
void *tp, *tpend;
void **ptp;
- struct ceph_timespec new_validity;
struct ceph_crypto_key new_session_key;
struct ceph_buffer *new_ticket_blob;
unsigned long new_expires, new_renew_after;
if (ret)
goto out;
- ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
- ceph_decode_timespec(&validity, &new_validity);
+ ceph_decode_timespec(&validity, dp);
+ dp += sizeof(struct ceph_timespec);
new_expires = get_seconds() + validity.tv_sec;
new_renew_after = new_expires - (validity.tv_sec / 4);
dout(" expires=%lu renew_after=%lu\n", new_expires,
ceph_buffer_put(th->ticket_blob);
th->session_key = new_session_key;
th->ticket_blob = new_ticket_blob;
- th->validity = new_validity;
th->secret_id = new_secret_id;
th->expires = new_expires;
th->renew_after = new_renew_after;
+ th->have_key = true;
dout(" got ticket service %d (%s) secret_id %lld len %d\n",
type, ceph_entity_type_name(type), th->secret_id,
(int)th->ticket_blob->vec.iov_len);
return -ERANGE;
}
+static bool need_key(struct ceph_x_ticket_handler *th)
+{
+ if (!th->have_key)
+ return true;
+
+ return get_seconds() >= th->renew_after;
+}
+
+static bool have_key(struct ceph_x_ticket_handler *th)
+{
+ if (th->have_key) {
+ if (get_seconds() >= th->expires)
+ th->have_key = false;
+ }
+
+ return th->have_key;
+}
+
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
{
int want = ac->want_keys;
continue;
th = get_ticket_handler(ac, service);
-
if (IS_ERR(th)) {
*pneed |= service;
continue;
}
- if (get_seconds() >= th->renew_after)
+ if (need_key(th))
*pneed |= service;
- if (get_seconds() >= th->expires)
+ if (!have_key(th))
xi->have_keys &= ~service;
}
}
-
static int ceph_x_build_request(struct ceph_auth_client *ac,
void *buf, void *end)
{
ac->private = NULL;
}
-static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
- int peer_type)
+static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
{
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th))
- memset(&th->validity, 0, sizeof(th->validity));
+ th->have_key = false;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+ int peer_type)
+{
+ /*
+ * We are to invalidate a service ticket in the hopes of
+ * getting a new, hopefully more valid, one. But, we won't get
+ * it unless our AUTH ticket is good, so invalidate AUTH ticket
+ * as well, just in case.
+ */
+ invalidate_ticket(ac, peer_type);
+ invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
}
static int calcu_signature(struct ceph_x_authorizer *au,
unsigned int service;
struct ceph_crypto_key session_key;
- struct ceph_timespec validity;
+ bool have_key;
u64 secret_id;
struct ceph_buffer *ticket_blob;
#include <linux/ceph/pagelist.h>
#include <linux/export.h>
-#define list_entry_next(pos, member) \
- list_entry(pos->member.next, typeof(*pos), member)
-
/*
* Ceph uses the messenger to exchange ceph_msg messages with other
* hosts in the system. The messenger provides ordered and reliable
}
con->in_seq = 0;
con->in_seq_acked = 0;
+
+ con->out_skip = 0;
}
/*
static void con_out_kvec_reset(struct ceph_connection *con)
{
+ BUG_ON(con->out_skip);
+
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
- int index;
+ int index = con->out_kvec_left;
- index = con->out_kvec_left;
+ BUG_ON(con->out_skip);
BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
con->out_kvec[index].iov_len = size;
con->out_kvec_bytes += size;
}
+/*
+ * Chop off a kvec from the end. Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+ int off = con->out_kvec_cur - con->out_kvec;
+ int skip = 0;
+
+ if (con->out_kvec_bytes > 0) {
+ skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+ BUG_ON(con->out_kvec_bytes < skip);
+ BUG_ON(!con->out_kvec_left);
+ con->out_kvec_bytes -= skip;
+ con->out_kvec_left--;
+ }
+
+ return skip;
+}
+
#ifdef CONFIG_BLOCK
/*
/* Move on to the next page */
BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
- cursor->page = list_entry_next(cursor->page, lru);
+ cursor->page = list_next_entry(cursor->page, lru);
cursor->last_piece = cursor->resid <= PAGE_SIZE;
return true;
if (!cursor->resid && cursor->total_resid) {
WARN_ON(!cursor->last_piece);
BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
- cursor->data = list_entry_next(cursor->data, links);
+ cursor->data = list_next_entry(cursor->data, links);
__ceph_msg_data_cursor_init(cursor);
new_piece = true;
}
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con);
- con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
u32 crc;
con_out_kvec_reset(con);
- con->out_kvec_is_msg = true;
con->out_msg_done = false;
/* Sneak an ack in there first? If we can get it into the same
/* tag + hdr + front + middle */
con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
- /* fill in crc (except data pages), footer */
+ /* fill in hdr crc and finalize hdr */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = 0;
+ memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
+ /* fill in front and middle crc, footer */
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
if (m->middle) {
dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
+ con->out_msg->footer.flags = 0;
/* is there a data payload? */
con->out_msg->footer.data_crc = 0;
}
}
con->out_kvec_left = 0;
- con->out_kvec_is_msg = false;
ret = 1;
out:
dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
{
int ret;
+ dout("%s %p %d left\n", __func__, con, con->out_skip);
while (con->out_skip > 0) {
size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
more_kvec:
/* kvec data queued? */
- if (con->out_skip) {
- ret = write_partial_skip(con);
+ if (con->out_kvec_left) {
+ ret = write_partial_kvec(con);
if (ret <= 0)
goto out;
}
- if (con->out_kvec_left) {
- ret = write_partial_kvec(con);
+ if (con->out_skip) {
+ ret = write_partial_skip(con);
if (ret <= 0)
goto out;
}
static void con_fault_finish(struct ceph_connection *con)
{
+ dout("%s %p\n", __func__, con);
+
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
*/
- if (con->auth_retry && con->ops->invalidate_authorizer) {
- dout("calling invalidate_authorizer()\n");
- con->ops->invalidate_authorizer(con);
+ if (con->auth_retry) {
+ dout("auth_retry %d, invalidating\n", con->auth_retry);
+ if (con->ops->invalidate_authorizer)
+ con->ops->invalidate_authorizer(con);
+ con->auth_retry = 0;
}
if (con->ops->fault)
ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("%s %p msg %p - was sending\n", __func__, con, msg);
- con->out_msg = NULL;
- if (con->out_kvec_is_msg) {
- con->out_skip = con->out_kvec_bytes;
- con->out_kvec_is_msg = false;
+ BUG_ON(con->out_skip);
+ /* footer */
+ if (con->out_msg_done) {
+ con->out_skip += con_out_kvec_skip(con);
+ } else {
+ BUG_ON(!msg->data_length);
+ if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+ con->out_skip += sizeof(msg->footer);
+ else
+ con->out_skip += sizeof(msg->old_footer);
}
+ /* data, middle, front */
+ if (msg->data_length)
+ con->out_skip += msg->cursor.total_resid;
+ if (msg->middle)
+ con->out_skip += con_out_kvec_skip(con);
+ con->out_skip += con_out_kvec_skip(con);
+
+ dout("%s %p msg %p - was sending, will write %d skip %d\n",
+ __func__, con, msg, con->out_kvec_bytes, con->out_skip);
msg->hdr.seq = 0;
-
+ con->out_msg = NULL;
ceph_msg_put(msg);
}
+
mutex_unlock(&con->mutex);
}
static void ceph_msg_release(struct kref *kref)
{
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
- LIST_HEAD(data);
- struct list_head *links;
- struct list_head *next;
+ struct ceph_msg_data *data, *next;
dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
m->middle = NULL;
}
- list_splice_init(&m->data, &data);
- list_for_each_safe(links, next, &data) {
- struct ceph_msg_data *data;
-
- data = list_entry(links, struct ceph_msg_data, links);
- list_del_init(links);
+ list_for_each_entry_safe(data, next, &m->data, links) {
+ list_del_init(&data->links);
ceph_msg_data_destroy(data);
}
m->data_length = 0;
return monc->client->have_fsid && monc->auth->global_id > 0;
}
-/*
- * The monitor responds with mount ack indicate mount success. The
- * included client ticket allows the client to talk to MDSs and OSDs.
- */
static void ceph_monc_handle_map(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
if (!n->tn_bits)
kmem_cache_free(trie_leaf_kmem, n);
- else if (n->tn_bits <= TNODE_KMALLOC_MAX)
- kfree(n);
else
- vfree(n);
+ kvfree(n);
}
#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
static void rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
- struct ib_device_attr *dev_attr;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
return;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- rdsdebug("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
ibdev_to_node(device));
if (!rds_ibdev)
- goto free_attr;
+ return;
spin_lock_init(&rds_ibdev->spinlock);
atomic_set(&rds_ibdev->refcount, 1);
INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
- rds_ibdev->max_wrs = dev_attr->max_qp_wr;
- rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
+ rds_ibdev->max_wrs = device->attrs.max_qp_wr;
+ rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
- rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
- rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
- min_t(unsigned int, (dev_attr->max_mr / 2),
+ rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
+ rds_ibdev->max_1m_fmrs = device->attrs.max_mr ?
+ min_t(unsigned int, (device->attrs.max_mr / 2),
rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
- rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
- min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+ rds_ibdev->max_8k_fmrs = device->attrs.max_mr ?
+ min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
- rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
- rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
+ rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
+ rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
rds_ibdev->dev = device;
rds_ibdev->pd = ib_alloc_pd(device);
}
rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
- dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+ device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
rds_ibdev->max_8k_fmrs);
put_dev:
rds_ib_dev_put(rds_ibdev);
-free_attr:
- kfree(dev_attr);
}
/*
static void rds_iw_add_one(struct ib_device *device)
{
struct rds_iw_device *rds_iwdev;
- struct ib_device_attr *dev_attr;
/* Only handle iwarp devices */
if (device->node_type != RDMA_NODE_RNIC)
return;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- rdsdebug("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
if (!rds_iwdev)
- goto free_attr;
+ return;
spin_lock_init(&rds_iwdev->spinlock);
- rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
- rds_iwdev->max_wrs = dev_attr->max_qp_wr;
- rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
+ rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
+ rds_iwdev->max_wrs = device->attrs.max_qp_wr;
+ rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
rds_iwdev->dev = device;
rds_iwdev->pd = ib_alloc_pd(device);
list_add_tail(&rds_iwdev->list, &rds_iw_devices);
ib_set_client_data(device, &rds_iw_client, rds_iwdev);
-
- goto free_attr;
+ return;
err_mr:
if (rds_iwdev->mr)
ib_dealloc_pd(rds_iwdev->pd);
free_dev:
kfree(rds_iwdev);
-free_attr:
- kfree(dev_attr);
}
static void rds_iw_remove_one(struct ib_device *device, void *client_data)
if (count == 0)
return 0;
- mutex_lock(&inode->i_mutex); /* protect against multiple concurrent
+ inode_lock(inode); /* protect against multiple concurrent
* readers on this file */
again:
spin_lock(&queue_lock);
}
if (rp->q.list.next == &cd->queue) {
spin_unlock(&queue_lock);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
WARN_ON_ONCE(rp->offset);
return 0;
}
}
if (err == -EAGAIN)
goto again;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err ? err : count;
}
if (!cd->cache_parse)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = cache_downcall(mapping, buf, count, cd);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return ret;
}
int need_release;
LIST_HEAD(free_list);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
spin_lock(&pipe->lock);
need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
pipe->nreaders = 0;
cancel_delayed_work_sync(&pipe->queue_timeout);
rpc_inode_setowner(inode, NULL);
RPC_I(inode)->pipe = NULL;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
static struct inode *
int first_open;
int res = -ENXIO;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
pipe->nwriters++;
res = 0;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
struct rpc_pipe_msg *msg;
int last_close;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
if (last_close && pipe->ops->release_pipe)
pipe->ops->release_pipe(inode);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
struct rpc_pipe_msg *msg;
int res = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
res = -EPIPE;
pipe->ops->destroy_msg(msg);
}
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
struct inode *inode = file_inode(filp);
int res;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
res = -EPIPE;
if (RPC_I(inode)->pipe != NULL)
res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
poll_wait(filp, &rpci->waitq, wait);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (rpci->pipe == NULL)
mask |= POLLERR | POLLHUP;
else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
mask |= POLLIN | POLLRDNORM;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return mask;
}
switch (cmd) {
case FIONREAD:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -EPIPE;
}
spin_lock(&pipe->lock);
len += msg->len - msg->copied;
}
spin_unlock(&pipe->lock);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return put_user(len, (int __user *)arg);
default:
return -EINVAL;
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
error = __rpc_rmdir(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
{
struct inode *dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(dir, I_MUTEX_CHILD);
__rpc_depopulate(parent, files, start, eof);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
}
static int rpc_populate(struct dentry *parent,
struct dentry *dentry;
int i, err;
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
for (i = start; i < eof; i++) {
dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
err = PTR_ERR(dentry);
if (err != 0)
goto out_bad;
}
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return 0;
out_bad:
__rpc_depopulate(parent, files, start, eof);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
__FILE__, __func__, parent);
return err;
struct inode *dir = d_inode(parent);
int error;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
goto err_rmdir;
}
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
err_rmdir:
__rpc_rmdir(dir, dentry);
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
if (depopulate != NULL)
depopulate(dentry);
error = __rpc_rmdir(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
if (pipe->ops->downcall == NULL)
umode &= ~S_IWUGO;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
if (err)
goto out_err;
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
out_err:
dentry = ERR_PTR(err);
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
error = __rpc_rmpipe(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
if (atomic_dec_and_test(&xprt->count))
xprt_destroy(xprt);
}
+EXPORT_SYMBOL_GPL(xprt_put);
rpcrdma-y := transport.o rpc_rdma.o verbs.o \
fmr_ops.o frwr_ops.o physical_ops.o \
- svc_rdma.o svc_rdma_transport.o \
+ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
module.o
rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr *devattr = &ia->ri_devattr;
int depth, delta;
ia->ri_max_frmr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- devattr->max_fast_reg_page_list_len);
+ ia->ri_device->attrs.max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
__func__, ia->ri_max_frmr_depth);
}
ep->rep_attr.cap.max_send_wr *= depth;
- if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
- cdata->max_requests = devattr->max_qp_wr / depth;
+ if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
+ cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
static unsigned int min_ord = 1;
static unsigned int max_ord = 4096;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
+unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
-/* Temporary NFS request map and context caches */
-struct kmem_cache *svc_rdma_map_cachep;
-struct kmem_cache *svc_rdma_ctxt_cachep;
-
struct workqueue_struct *svc_rdma_wq;
/*
svc_unreg_xprt_class(&svc_rdma_bc_class);
#endif
svc_unreg_xprt_class(&svc_rdma_class);
- kmem_cache_destroy(svc_rdma_map_cachep);
- kmem_cache_destroy(svc_rdma_ctxt_cachep);
}
int svc_rdma_init(void)
{
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
- dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
- dprintk("\tsq_depth : %d\n",
+ dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
+ dprintk("\tsq_depth : %u\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
+ dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
- /* Create the temporary map cache */
- svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
- sizeof(struct svc_rdma_req_map),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!svc_rdma_map_cachep) {
- printk(KERN_INFO "Could not allocate map cache.\n");
- goto err0;
- }
-
- /* Create the temporary context cache */
- svc_rdma_ctxt_cachep =
- kmem_cache_create("svc_rdma_ctxt_cache",
- sizeof(struct svc_rdma_op_ctxt),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!svc_rdma_ctxt_cachep) {
- printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
- goto err1;
- }
-
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
svc_reg_xprt_class(&svc_rdma_bc_class);
#endif
return 0;
- err1:
- kmem_cache_destroy(svc_rdma_map_cachep);
- err0:
- unregister_sysctl_table(svcrdma_table_header);
- destroy_workqueue(svc_rdma_wq);
- return -ENOMEM;
}
--- /dev/null
+/*
+ * Copyright (c) 2015 Oracle. All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA (server-side).
+ */
+
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+
+#undef SVCRDMA_BACKCHANNEL_DEBUG
+
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+ struct xdr_buf *rcvbuf)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct kvec *dst, *src = &rcvbuf->head[0];
+ struct rpc_rqst *req;
+ unsigned long cwnd;
+ u32 credits;
+ size_t len;
+ __be32 xid;
+ __be32 *p;
+ int ret;
+
+ p = (__be32 *)src->iov_base;
+ len = src->iov_len;
+ xid = rmsgp->rm_xid;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+ pr_info("%s: xid=%08x, length=%zu\n",
+ __func__, be32_to_cpu(xid), len);
+ pr_info("%s: RPC/RDMA: %*ph\n",
+ __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+ pr_info("%s: RPC: %*ph\n",
+ __func__, (int)len, p);
+#endif
+
+ ret = -EAGAIN;
+ if (src->iov_len < 24)
+ goto out_shortreply;
+
+ spin_lock_bh(&xprt->transport_lock);
+ req = xprt_lookup_rqst(xprt, xid);
+ if (!req)
+ goto out_notfound;
+
+ dst = &req->rq_private_buf.head[0];
+ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+ if (dst->iov_len < len)
+ goto out_unlock;
+ memcpy(dst->iov_base, p, len);
+
+ credits = be32_to_cpu(rmsgp->rm_credit);
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
+ credits = r_xprt->rx_buf.rb_bc_max_requests;
+
+ cwnd = xprt->cwnd;
+ xprt->cwnd = credits << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(req->rq_task);
+
+ ret = 0;
+ xprt_complete_rqst(req->rq_task, rcvbuf->len);
+ rcvbuf->len = 0;
+
+out_unlock:
+ spin_unlock_bh(&xprt->transport_lock);
+out:
+ return ret;
+
+out_shortreply:
+ dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
+ xprt, src->iov_len);
+ goto out;
+
+out_notfound:
+ dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
+ xprt, be32_to_cpu(xid));
+
+ goto out_unlock;
+}
+
+/* Send a backwards direction RPC call.
+ *
+ * Caller holds the connection's mutex and has already marshaled
+ * the RPC/RDMA request.
+ *
+ * This is similar to svc_rdma_reply, but takes an rpc_rqst
+ * instead, does not support chunks, and avoids blocking memory
+ * allocation.
+ *
+ * XXX: There is still an opportunity to block in svc_rdma_send()
+ * if there are no SQ entries to post the Send. This may occur if
+ * the adapter has a small maximum SQ depth.
+ */
+static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
+ struct rpc_rqst *rqst)
+{
+ struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
+ struct svc_rdma_op_ctxt *ctxt;
+ struct svc_rdma_req_map *vec;
+ struct ib_send_wr send_wr;
+ int ret;
+
+ vec = svc_rdma_get_req_map(rdma);
+ ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+ if (ret)
+ goto out_err;
+
+ /* Post a recv buffer to handle the reply for this request. */
+ ret = svc_rdma_post_recv(rdma, GFP_NOIO);
+ if (ret) {
+ pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
+ ret);
+ pr_err("svcrdma: closing transport %p.\n", rdma);
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ ret = -ENOTCONN;
+ goto out_err;
+ }
+
+ ctxt = svc_rdma_get_context(rdma);
+ ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
+ ctxt->count = 1;
+
+ ctxt->wr_op = IB_WR_SEND;
+ ctxt->direction = DMA_TO_DEVICE;
+ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
+ ctxt->sge[0].length = sndbuf->len;
+ ctxt->sge[0].addr =
+ ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
+ sndbuf->len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
+ ret = -EIO;
+ goto out_unmap;
+ }
+ atomic_inc(&rdma->sc_dma_used);
+
+ memset(&send_wr, 0, sizeof(send_wr));
+ send_wr.wr_id = (unsigned long)ctxt;
+ send_wr.sg_list = ctxt->sge;
+ send_wr.num_sge = 1;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = svc_rdma_send(rdma, &send_wr);
+ if (ret) {
+ ret = -EIO;
+ goto out_unmap;
+ }
+
+out_err:
+ svc_rdma_put_req_map(rdma, vec);
+ dprintk("svcrdma: %s returns %d\n", __func__, ret);
+ return ret;
+
+out_unmap:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 1);
+ goto out_err;
+}
+
+/* Server-side transport endpoint wants a whole page for its send
+ * buffer. The client RPC code constructs the RPC header in this
+ * buffer before it invokes ->send_request.
+ *
+ * Returns NULL if there was a temporary allocation failure.
+ */
+static void *
+xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ struct svcxprt_rdma *rdma;
+ struct page *page;
+
+ rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+
+ /* Prevent an infinite loop: try to make this case work */
+ if (size > PAGE_SIZE)
+ WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
+ size);
+
+ page = alloc_page(RPCRDMA_DEF_GFP);
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
+static void
+xprt_rdma_bc_free(void *buffer)
+{
+ /* No-op: ctxt and page have already been freed. */
+}
+
+static int
+rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
+{
+ struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+ int rc;
+
+ /* Space in the send buffer for an RPC/RDMA header is reserved
+ * via xprt->tsh_size.
+ */
+ headerp->rm_xid = rqst->rq_xid;
+ headerp->rm_vers = rpcrdma_version;
+ headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+ headerp->rm_type = rdma_msg;
+ headerp->rm_body.rm_chunks[0] = xdr_zero;
+ headerp->rm_body.rm_chunks[1] = xdr_zero;
+ headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+ pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
+#endif
+
+ rc = svc_rdma_bc_sendto(rdma, rqst);
+ if (rc)
+ goto drop_connection;
+ return rc;
+
+drop_connection:
+ dprintk("svcrdma: failed to send bc call\n");
+ xprt_disconnect_done(xprt);
+ return -ENOTCONN;
+}
+
+/* Send an RPC call on the passive end of a transport
+ * connection.
+ */
+static int
+xprt_rdma_bc_send_request(struct rpc_task *task)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ struct svcxprt_rdma *rdma;
+ int ret;
+
+ dprintk("svcrdma: sending bc call with xid: %08x\n",
+ be32_to_cpu(rqst->rq_xid));
+
+ if (!mutex_trylock(&sxprt->xpt_mutex)) {
+ rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
+ if (!mutex_trylock(&sxprt->xpt_mutex))
+ return -EAGAIN;
+ rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
+ }
+
+ ret = -ENOTCONN;
+ rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+ if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+
+ mutex_unlock(&sxprt->xpt_mutex);
+
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static void
+xprt_rdma_bc_close(struct rpc_xprt *xprt)
+{
+ dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+}
+
+static void
+xprt_rdma_bc_put(struct rpc_xprt *xprt)
+{
+ dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+
+ xprt_free(xprt);
+ module_put(THIS_MODULE);
+}
+
+static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+ .reserve_xprt = xprt_reserve_xprt_cong,
+ .release_xprt = xprt_release_xprt_cong,
+ .alloc_slot = xprt_alloc_slot,
+ .release_request = xprt_release_rqst_cong,
+ .buf_alloc = xprt_rdma_bc_allocate,
+ .buf_free = xprt_rdma_bc_free,
+ .send_request = xprt_rdma_bc_send_request,
+ .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .close = xprt_rdma_bc_close,
+ .destroy = xprt_rdma_bc_put,
+ .print_stats = xprt_rdma_print_stats
+};
+
+static const struct rpc_timeout xprt_rdma_bc_timeout = {
+ .to_initval = 60 * HZ,
+ .to_maxval = 60 * HZ,
+};
+
+/* It shouldn't matter if the number of backchannel session slots
+ * doesn't match the number of RPC/RDMA credits. That just means
+ * one or the other will have extra slots that aren't used.
+ */
+static struct rpc_xprt *
+xprt_setup_rdma_bc(struct xprt_create *args)
+{
+ struct rpc_xprt *xprt;
+ struct rpcrdma_xprt *new_xprt;
+
+ if (args->addrlen > sizeof(xprt->addr)) {
+ dprintk("RPC: %s: address too large\n", __func__);
+ return ERR_PTR(-EBADF);
+ }
+
+ xprt = xprt_alloc(args->net, sizeof(*new_xprt),
+ RPCRDMA_MAX_BC_REQUESTS,
+ RPCRDMA_MAX_BC_REQUESTS);
+ if (!xprt) {
+ dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
+ __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ xprt->timeout = &xprt_rdma_bc_timeout;
+ xprt_set_bound(xprt);
+ xprt_set_connected(xprt);
+ xprt->bind_timeout = RPCRDMA_BIND_TO;
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+
+ xprt->prot = XPRT_TRANSPORT_BC_RDMA;
+ xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
+ xprt->ops = &xprt_rdma_bc_procs;
+
+ memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+ xprt->addrlen = args->addrlen;
+ xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
+ xprt->resvport = 0;
+
+ xprt->max_payload = xprt_rdma_max_inline_read;
+
+ new_xprt = rpcx_to_rdmax(xprt);
+ new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
+
+ xprt_get(xprt);
+ args->bc_xprt->xpt_bc_xprt = xprt;
+ xprt->bc_xprt = args->bc_xprt;
+
+ if (!try_module_get(THIS_MODULE))
+ goto out_fail;
+
+ /* Final put for backchannel xprt is in __svc_rdma_free */
+ xprt_get(xprt);
+ return xprt;
+
+out_fail:
+ xprt_rdma_free_addresses(xprt);
+ args->bc_xprt->xpt_bc_xprt = NULL;
+ xprt_put(xprt);
+ xprt_free(xprt);
+ return ERR_PTR(-EINVAL);
+}
+
+struct xprt_class xprt_rdma_bc = {
+ .list = LIST_HEAD_INIT(xprt_rdma_bc.list),
+ .name = "rdma backchannel",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_BC_RDMA,
+ .setup = xprt_setup_rdma_bc,
+};
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
head->arg.page_len += len;
+
head->arg.len += len;
if (!pg_off)
head->count++;
goto err;
atomic_inc(&xprt->sc_dma_used);
- /* The lkey here is either a local dma lkey or a dma_mr lkey */
- ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[pno].length = len;
ctxt->count++;
return ret;
}
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+{
+ __be32 *p = (__be32 *)rmsgp;
+
+ if (!xprt->xpt_bc_xprt)
+ return false;
+
+ if (rmsgp->rm_type != rdma_msg)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+ return false;
+
+ /* sanity */
+ if (p[7] != rmsgp->rm_xid)
+ return false;
+ /* call direction */
+ if (p[8] == cpu_to_be32(RPC_CALL))
+ return false;
+
+ return true;
+}
+
/*
* Set up the rqstp thread context to point to the RQ buffer. If
* necessary, pull additional data from the client with an RDMA_READ
goto close_out;
}
+ if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
+ ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+ &rqstp->rq_arg);
+ svc_rdma_put_context(ctxt, 0);
+ if (ret)
+ goto repost;
+ return ret;
+ }
+
/* Read read-list data. */
ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
set_bit(XPT_CLOSE, &xprt->xpt_flags);
defer:
return 0;
+
+repost:
+ ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
+ if (ret) {
+ pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+ ret);
+ pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
+ set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
+ ret = -ENOTCONN;
+ }
+ return ret;
}
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-static int map_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
+int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
if (xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
- pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+ pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
return -EIO;
}
sge_no++;
}
- dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+ dprintk("svcrdma: %s: sge_no %d page_no %d "
"page_base %u page_len %u head_len %zu tail_len %zu\n",
- sge_no, page_no, xdr->page_base, xdr->page_len,
+ __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
xdr->head[0].iov_len, xdr->tail[0].iov_len);
vec->count = sge_no;
sge[sge_no].addr))
goto err;
atomic_inc(&xprt->sc_dma_used);
- sge[sge_no].lkey = xprt->sc_dma_lkey;
+ sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count++;
sge_off = 0;
sge_no++;
int ret;
/* Post a recv buffer to handle another request. */
- ret = svc_rdma_post_recv(rdma);
+ ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
if (ret) {
printk(KERN_INFO
"svcrdma: could not post a receive buffer, err=%d."
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
- ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
ctxt->sge[sge_no].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
- ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+ ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
if (byte_count != 0) {
/* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
- vec = svc_rdma_get_req_map();
- ret = map_xdr(rdma, &rqstp->rq_res, vec);
+ vec = svc_rdma_get_req_map(rdma);
+ ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
if (ret)
goto err0;
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
- res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ ret = -ENOMEM;
+ res_page = alloc_page(GFP_KERNEL);
+ if (!res_page)
+ goto err0;
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
inline_bytes);
- svc_rdma_put_req_map(vec);
+ svc_rdma_put_req_map(rdma, vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
err1:
put_page(res_page);
err0:
- svc_rdma_put_req_map(vec);
+ svc_rdma_put_req_map(rdma, vec);
svc_rdma_put_context(ctxt, 0);
return ret;
}
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
+ gfp_t flags)
{
struct svc_rdma_op_ctxt *ctxt;
- ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
- GFP_KERNEL | __GFP_NOFAIL);
- ctxt->xprt = xprt;
- INIT_LIST_HEAD(&ctxt->dto_q);
+ ctxt = kmalloc(sizeof(*ctxt), flags);
+ if (ctxt) {
+ ctxt->xprt = xprt;
+ INIT_LIST_HEAD(&ctxt->free);
+ INIT_LIST_HEAD(&ctxt->dto_q);
+ }
+ return ctxt;
+}
+
+static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
+{
+ unsigned int i;
+
+ /* Each RPC/RDMA credit can consume a number of send
+ * and receive WQEs. One ctxt is allocated for each.
+ */
+ i = xprt->sc_sq_depth + xprt->sc_rq_depth;
+
+ while (i--) {
+ struct svc_rdma_op_ctxt *ctxt;
+
+ ctxt = alloc_ctxt(xprt, GFP_KERNEL);
+ if (!ctxt) {
+ dprintk("svcrdma: No memory for RDMA ctxt\n");
+ return false;
+ }
+ list_add(&ctxt->free, &xprt->sc_ctxts);
+ }
+ return true;
+}
+
+struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_op_ctxt *ctxt = NULL;
+
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used++;
+ if (list_empty(&xprt->sc_ctxts))
+ goto out_empty;
+
+ ctxt = list_first_entry(&xprt->sc_ctxts,
+ struct svc_rdma_op_ctxt, free);
+ list_del_init(&ctxt->free);
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+out:
ctxt->count = 0;
ctxt->frmr = NULL;
- atomic_inc(&xprt->sc_ctxt_used);
return ctxt;
+
+out_empty:
+ /* Either pre-allocation missed the mark, or send
+ * queue accounting is broken.
+ */
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+ ctxt = alloc_ctxt(xprt, GFP_NOIO);
+ if (ctxt)
+ goto out;
+
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used--;
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+ WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
+ return NULL;
}
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
/*
* Unmap the DMA addr in the SGE if the lkey matches
- * the sc_dma_lkey, otherwise, ignore it since it is
+ * the local_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
- if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+ if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
atomic_dec(&xprt->sc_dma_used);
ib_dma_unmap_page(xprt->sc_cm_id->device,
ctxt->sge[i].addr,
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
{
- struct svcxprt_rdma *xprt;
+ struct svcxprt_rdma *xprt = ctxt->xprt;
int i;
- xprt = ctxt->xprt;
if (free_pages)
for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]);
- kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
- atomic_dec(&xprt->sc_ctxt_used);
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used--;
+ list_add(&ctxt->free, &xprt->sc_ctxts);
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
}
-/*
- * Temporary NFS req mappings are shared across all transport
- * instances. These are short lived and should be bounded by the number
- * of concurrent server threads * depth of the SQ.
- */
-struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
+{
+ while (!list_empty(&xprt->sc_ctxts)) {
+ struct svc_rdma_op_ctxt *ctxt;
+
+ ctxt = list_first_entry(&xprt->sc_ctxts,
+ struct svc_rdma_op_ctxt, free);
+ list_del(&ctxt->free);
+ kfree(ctxt);
+ }
+}
+
+static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
{
struct svc_rdma_req_map *map;
- map = kmem_cache_alloc(svc_rdma_map_cachep,
- GFP_KERNEL | __GFP_NOFAIL);
+
+ map = kmalloc(sizeof(*map), flags);
+ if (map)
+ INIT_LIST_HEAD(&map->free);
+ return map;
+}
+
+static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
+{
+ unsigned int i;
+
+ /* One for each receive buffer on this connection. */
+ i = xprt->sc_max_requests;
+
+ while (i--) {
+ struct svc_rdma_req_map *map;
+
+ map = alloc_req_map(GFP_KERNEL);
+ if (!map) {
+ dprintk("svcrdma: No memory for request map\n");
+ return false;
+ }
+ list_add(&map->free, &xprt->sc_maps);
+ }
+ return true;
+}
+
+struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_req_map *map = NULL;
+
+ spin_lock(&xprt->sc_map_lock);
+ if (list_empty(&xprt->sc_maps))
+ goto out_empty;
+
+ map = list_first_entry(&xprt->sc_maps,
+ struct svc_rdma_req_map, free);
+ list_del_init(&map->free);
+ spin_unlock(&xprt->sc_map_lock);
+
+out:
map->count = 0;
return map;
+
+out_empty:
+ spin_unlock(&xprt->sc_map_lock);
+
+ /* Pre-allocation amount was incorrect */
+ map = alloc_req_map(GFP_NOIO);
+ if (map)
+ goto out;
+
+ WARN_ONCE(1, "svcrdma: empty request map list?\n");
+ return NULL;
+}
+
+void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
+ struct svc_rdma_req_map *map)
+{
+ spin_lock(&xprt->sc_map_lock);
+ list_add(&map->free, &xprt->sc_maps);
+ spin_unlock(&xprt->sc_map_lock);
}
-void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
{
- kmem_cache_free(svc_rdma_map_cachep, map);
+ while (!list_empty(&xprt->sc_maps)) {
+ struct svc_rdma_req_map *map;
+
+ map = list_first_entry(&xprt->sc_maps,
+ struct svc_rdma_req_map, free);
+ list_del(&map->free);
+ kfree(map);
+ }
}
/* ib_cq event handler */
static void process_context(struct svcxprt_rdma *xprt,
struct svc_rdma_op_ctxt *ctxt)
{
+ struct svc_rdma_op_ctxt *read_hdr;
+ int free_pages = 0;
+
svc_rdma_unmap_dma(ctxt);
switch (ctxt->wr_op) {
case IB_WR_SEND:
- if (ctxt->frmr)
- pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
- svc_rdma_put_context(ctxt, 1);
+ free_pages = 1;
break;
case IB_WR_RDMA_WRITE:
- if (ctxt->frmr)
- pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
- svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
svc_rdma_put_frmr(xprt, ctxt->frmr);
- if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
- struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
- if (read_hdr) {
- spin_lock_bh(&xprt->sc_rq_dto_lock);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- list_add_tail(&read_hdr->dto_q,
- &xprt->sc_read_complete_q);
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
- } else {
- pr_err("svcrdma: ctxt->read_hdr == NULL\n");
- }
- svc_xprt_enqueue(&xprt->sc_xprt);
- }
+
+ if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
+ break;
+
+ read_hdr = ctxt->read_hdr;
svc_rdma_put_context(ctxt, 0);
- break;
+
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ list_add_tail(&read_hdr->dto_q,
+ &xprt->sc_read_complete_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ svc_xprt_enqueue(&xprt->sc_xprt);
+ return;
default:
- printk(KERN_ERR "svcrdma: unexpected completion type, "
- "opcode=%d\n",
- ctxt->wr_op);
+ dprintk("svcrdma: unexpected completion opcode=%d\n",
+ ctxt->wr_op);
break;
}
+
+ svc_rdma_put_context(ctxt, free_pages);
}
/*
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
+ INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+ INIT_LIST_HEAD(&cma_xprt->sc_maps);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
spin_lock_init(&cma_xprt->sc_frmr_q_lock);
-
- cma_xprt->sc_ord = svcrdma_ord;
-
- cma_xprt->sc_max_req_size = svcrdma_max_req_size;
- cma_xprt->sc_max_requests = svcrdma_max_requests;
- cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
- atomic_set(&cma_xprt->sc_sq_count, 0);
- atomic_set(&cma_xprt->sc_ctxt_used, 0);
+ spin_lock_init(&cma_xprt->sc_ctxt_lock);
+ spin_lock_init(&cma_xprt->sc_map_lock);
if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
return cma_xprt;
}
-int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
+int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
{
struct ib_recv_wr recv_wr, *bad_recv_wr;
struct svc_rdma_op_ctxt *ctxt;
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err_put_ctxt;
}
- page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ page = alloc_page(flags);
+ if (!page)
+ goto err_put_ctxt;
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
atomic_inc(&xprt->sc_dma_used);
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
- ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count = sge_no + 1;
buflen += PAGE_SIZE;
}
struct rdma_conn_param conn_param;
struct ib_cq_init_attr cq_attr = {};
struct ib_qp_init_attr qp_attr;
- struct ib_device_attr devattr;
- int uninitialized_var(dma_mr_acc);
- int need_dma_mr = 0;
- int ret;
- int i;
+ struct ib_device *dev;
+ unsigned int i;
+ int ret = 0;
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
newxprt, newxprt->sc_cm_id);
- ret = ib_query_device(newxprt->sc_cm_id->device, &devattr);
- if (ret) {
- dprintk("svcrdma: could not query device attributes on "
- "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
- goto errout;
- }
+ dev = newxprt->sc_cm_id->device;
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
- newxprt->sc_max_sge = min((size_t)devattr.max_sge,
+ newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
(size_t)RPCSVC_MAXPAGES);
- newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd,
+ newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
RPCSVC_MAXPAGES);
- newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
- (size_t)svcrdma_max_requests);
- newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
+ newxprt->sc_max_req_size = svcrdma_max_req_size;
+ newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
+ svcrdma_max_requests);
+ newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
+ svcrdma_max_bc_requests);
+ newxprt->sc_rq_depth = newxprt->sc_max_requests +
+ newxprt->sc_max_bc_requests;
+ newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+
+ if (!svc_rdma_prealloc_ctxts(newxprt))
+ goto errout;
+ if (!svc_rdma_prealloc_maps(newxprt))
+ goto errout;
/*
* Limit ORD based on client limit, local device limit, and
* configured svcrdma limit.
*/
- newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+ newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
- newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
+ newxprt->sc_pd = ib_alloc_pd(dev);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
goto errout;
}
cq_attr.cqe = newxprt->sc_sq_depth;
- newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+ newxprt->sc_sq_cq = ib_create_cq(dev,
sq_comp_handler,
cq_event_handler,
newxprt,
dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout;
}
- cq_attr.cqe = newxprt->sc_max_requests;
- newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+ cq_attr.cqe = newxprt->sc_rq_depth;
+ newxprt->sc_rq_cq = ib_create_cq(dev,
rq_comp_handler,
cq_event_handler,
newxprt,
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = &newxprt->sc_xprt;
qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
- qp_attr.cap.max_recv_wr = newxprt->sc_max_requests;
+ qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
" cap.max_send_sge = %d\n"
" cap.max_recv_sge = %d\n",
newxprt->sc_cm_id, newxprt->sc_pd,
- newxprt->sc_cm_id->device, newxprt->sc_pd->device,
+ dev, newxprt->sc_pd->device,
qp_attr.cap.max_send_wr,
qp_attr.cap.max_recv_wr,
qp_attr.cap.max_send_sge,
* of an RDMA_READ. IB does not.
*/
newxprt->sc_reader = rdma_read_chunk_lcl;
- if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
newxprt->sc_frmr_pg_list_len =
- devattr.max_fast_reg_page_list_len;
+ dev->attrs.max_fast_reg_page_list_len;
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
newxprt->sc_reader = rdma_read_chunk_frmr;
}
/*
* Determine if a DMA MR is required and if so, what privs are required
*/
- if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num) &&
- !rdma_ib_or_roce(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num))
+ if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
+ !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
goto errout;
- if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
- !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
- need_dma_mr = 1;
- dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
- if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num) &&
- !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
- dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
- }
-
- if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num))
+ if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
- /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
- if (need_dma_mr) {
- /* Register all of physical memory */
- newxprt->sc_phys_mr =
- ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
- if (IS_ERR(newxprt->sc_phys_mr)) {
- dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
- ret);
- goto errout;
- }
- newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
- } else
- newxprt->sc_dma_lkey =
- newxprt->sc_cm_id->device->local_dma_lkey;
-
/* Post receive buffers */
- for (i = 0; i < newxprt->sc_max_requests; i++) {
- ret = svc_rdma_post_recv(newxprt);
+ for (i = 0; i < newxprt->sc_rq_depth; i++) {
+ ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
if (ret) {
dprintk("svcrdma: failure posting receive buffers\n");
goto errout;
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
- dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+ struct svc_xprt *xprt = &rdma->sc_xprt;
+
+ dprintk("svcrdma: %s(%p)\n", __func__, rdma);
/* We should only be called from kref_put */
- if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+ if (atomic_read(&xprt->xpt_ref.refcount) != 0)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",
- atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
+ atomic_read(&xprt->xpt_ref.refcount));
/*
* Destroy queued, but not processed read completions. Note
}
/* Warn if we leaked a resource or under-referenced */
- if (atomic_read(&rdma->sc_ctxt_used) != 0)
+ if (rdma->sc_ctxt_used != 0)
pr_err("svcrdma: ctxt still in use? (%d)\n",
- atomic_read(&rdma->sc_ctxt_used));
+ rdma->sc_ctxt_used);
if (atomic_read(&rdma->sc_dma_used) != 0)
pr_err("svcrdma: dma still in use? (%d)\n",
atomic_read(&rdma->sc_dma_used));
- /* De-allocate fastreg mr */
+ /* Final put of backchannel client transport */
+ if (xprt->xpt_bc_xprt) {
+ xprt_put(xprt->xpt_bc_xprt);
+ xprt->xpt_bc_xprt = NULL;
+ }
+
rdma_dealloc_frmr_q(rdma);
+ svc_rdma_destroy_ctxts(rdma);
+ svc_rdma_destroy_maps(rdma);
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
ib_destroy_cq(rdma->sc_rq_cq);
- if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
- ib_dereg_mr(rdma->sc_phys_mr);
-
if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
ib_dealloc_pd(rdma->sc_pd);
int length;
int ret;
- p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ p = alloc_page(GFP_KERNEL);
+ if (!p)
+ return;
va = page_address(p);
/* XDR encode error */
return;
}
atomic_inc(&xprt->sc_dma_used);
- ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[0].length = length;
/* Prepare SEND WR */
*/
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
-static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
#endif
-#define RPCRDMA_BIND_TO (60U * HZ)
-#define RPCRDMA_INIT_REEST_TO (5U * HZ)
-#define RPCRDMA_MAX_REEST_TO (30U * HZ)
-#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
-
-static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
+static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
static void
xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
}
-static void
+void
xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
{
char buf[128];
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
}
-static void
+void
xprt_rdma_free_addresses(struct rpc_xprt *xprt)
{
unsigned int i;
if (req == NULL)
return NULL;
- flags = GFP_NOIO | __GFP_NOWARN;
+ flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
return -ENOTCONN; /* implies disconnect */
}
-static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
long idle_time = 0;
rpcrdma_destroy_wq();
frwr_destroy_recovery_wq();
+
+ rc = xprt_unregister_transport(&xprt_rdma_bc);
+ if (rc)
+ dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
+ __func__, rc);
}
int xprt_rdma_init(void)
return rc;
}
+ rc = xprt_register_transport(&xprt_rdma_bc);
+ if (rc) {
+ xprt_unregister_transport(&xprt_rdma);
+ rpcrdma_destroy_wq();
+ frwr_destroy_recovery_wq();
+ return rc;
+ }
+
dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
dprintk("Defaults:\n");
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
- struct ib_device_attr *devattr = &ia->ri_devattr;
int rc;
ia->ri_dma_mr = NULL;
goto out2;
}
- rc = ib_query_device(ia->ri_device, devattr);
- if (rc) {
- dprintk("RPC: %s: ib_query_device failed %d\n",
- __func__, rc);
- goto out3;
- }
-
if (memreg == RPCRDMA_FRMR) {
- if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
- (devattr->max_fast_reg_page_list_len == 0)) {
+ if (!(ia->ri_device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS) ||
+ (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) {
dprintk("RPC: %s: FRMR registration "
"not supported by HCA\n", __func__);
memreg = RPCRDMA_MTHCAFMR;
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr *devattr = &ia->ri_devattr;
struct ib_cq *sendcq, *recvcq;
struct ib_cq_init_attr cq_attr = {};
unsigned int max_qp_wr;
int rc, err;
- if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
+ if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
dprintk("RPC: %s: insufficient sge's available\n",
__func__);
return -ENOMEM;
}
- if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
+ if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
dprintk("RPC: %s: insufficient wqe's available\n",
__func__);
return -ENOMEM;
}
- max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;
+ max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS;
/* check provider's send/recv wr limits */
if (cdata->max_requests > max_qp_wr)
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
ep->rep_remote_cma.responder_resources = 32;
else
ep->rep_remote_cma.responder_resources =
- devattr->max_qp_rd_atom;
+ ia->ri_device->attrs.max_qp_rd_atom;
ep->rep_remote_cma.retry_count = 7;
ep->rep_remote_cma.flow_control = 0;
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
+#define RPCRDMA_BIND_TO (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
+
/*
* Interface Adapter -- one per transport instance
*/
struct completion ri_done;
int ri_async_rc;
unsigned int ri_max_frmr_depth;
- struct ib_device_attr ri_devattr;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
return (struct rpcrdma_msg *)rb->rg_base;
}
+#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
+
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
struct list_head rb_allreqs;
+
+ u32 rb_bc_max_requests;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
/* RPC/RDMA module init - xprtrdma/transport.c
*/
+extern unsigned int xprt_rdma_max_inline_read;
+void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
+void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-/* Temporary NFS request map cache. Created in svc_rdma.c */
-extern struct kmem_cache *svc_rdma_map_cachep;
-/* WR context cache. Created in svc_rdma.c */
-extern struct kmem_cache *svc_rdma_ctxt_cachep;
-/* Workqueue created in svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
+extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
dir = d_inode(parent);
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
dentry = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(dentry))
goto out;
}
d_instantiate(dentry, inode);
dget(dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
out1:
dput(dentry);
dentry = ERR_PTR(error);
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
simple_release_fs(&mount, &mount_count);
return dentry;
}
if (!parent || d_really_is_negative(parent))
return;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
if (simple_positive(dentry)) {
if (d_is_dir(dentry))
simple_rmdir(d_inode(parent), dentry);
simple_unlink(d_inode(parent), dentry);
dput(dentry);
}
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
simple_release_fs(&mount, &mount_count);
}
EXPORT_SYMBOL_GPL(securityfs_remove);
if (!(mode & FMODE_WRITE))
return;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (atomic_read(&inode->i_writecount) == 1) {
if ((iint->version != inode->i_version) ||
(iint->flags & IMA_NEW_FILE)) {
ima_update_xattr(iint, file);
}
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
/**
if (action & IMA_FILE_APPRAISE)
function = FILE_CHECK;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (action) {
iint = integrity_inode_get(inode);
if (pathbuf)
__putname(pathbuf);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE))
return -EACCES;
return 0;
goto err;
if (i_size_read(inode) != security_policydb_len()) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, security_policydb_len());
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
rc = security_read_policy(&plm->data, &plm->len);
return -EFAULT;
if (tlv.length < sizeof(unsigned int) * 2)
return -EINVAL;
+ if (!tlv.numid)
+ return -EINVAL;
down_read(&card->controls_rwsem);
kctl = snd_ctl_find_numid(card, tlv.numid);
if (kctl == NULL) {
struct snd_hrtimer *stime = t->private_data;
atomic_set(&stime->running, 0);
- hrtimer_cancel(&stime->hrt);
+ hrtimer_try_to_cancel(&stime->hrt);
hrtimer_start(&stime->hrt, ns_to_ktime(t->sticks * resolution),
HRTIMER_MODE_REL);
atomic_set(&stime->running, 1);
{
struct snd_hrtimer *stime = t->private_data;
atomic_set(&stime->running, 0);
+ hrtimer_try_to_cancel(&stime->hrt);
return 0;
}
if (! (runtime = substream->runtime))
return -ENOTTY;
- /* only fifo_size is different, so just copy all */
- data = memdup_user(data32, sizeof(*data32));
- if (IS_ERR(data))
- return PTR_ERR(data);
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* only fifo_size (RO from userspace) is different, so just copy all */
+ if (copy_from_user(data, data32, sizeof(*data32))) {
+ err = -EFAULT;
+ goto error;
+ }
if (refine)
err = snd_pcm_hw_refine(substream, data);
struct snd_seq_port_info *data;
mm_segment_t fs;
- data = memdup_user(data32, sizeof(*data32));
- if (IS_ERR(data))
- return PTR_ERR(data);
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
- if (get_user(data->flags, &data32->flags) ||
+ if (copy_from_user(data, data32, sizeof(*data32)) ||
+ get_user(data->flags, &data32->flags) ||
get_user(data->time_queue, &data32->time_queue))
goto error;
data->kernel = NULL;
int qtail;
int qused;
int queue_size;
+ bool disconnected;
struct snd_timer_read *queue;
struct snd_timer_tread *tqueue;
spinlock_t qlock;
mutex_unlock(®ister_mutex);
return -ENOMEM;
}
+ /* take a card refcount for safe disconnection */
+ if (timer->card)
+ get_device(&timer->card->card_dev);
timeri->slave_class = tid->dev_sclass;
timeri->slave_id = slave_id;
if (list_empty(&timer->open_list_head) && timer->hw.open)
}
spin_unlock(&timer->lock);
spin_unlock_irq(&slave_active_lock);
+ /* release a card refcount for safe disconnection */
+ if (timer->card)
+ put_device(&timer->card->card_dev);
mutex_unlock(®ister_mutex);
}
out:
timer = timeri->timer;
if (timer == NULL)
return -EINVAL;
+ if (timer->card && timer->card->shutdown)
+ return -ENODEV;
spin_lock_irqsave(&timer->lock, flags);
timeri->ticks = timeri->cticks = ticks;
timeri->pticks = 0;
spin_lock_irqsave(&timer->lock, flags);
list_del_init(&timeri->ack_list);
list_del_init(&timeri->active_list);
+ if (timer->card && timer->card->shutdown) {
+ spin_unlock_irqrestore(&timer->lock, flags);
+ return 0;
+ }
if ((timeri->flags & SNDRV_TIMER_IFLG_RUNNING) &&
!(--timer->running)) {
timer->hw.stop(timer);
timer = timeri->timer;
if (! timer)
return -EINVAL;
+ if (timer->card && timer->card->shutdown)
+ return -ENODEV;
spin_lock_irqsave(&timer->lock, flags);
if (!timeri->cticks)
timeri->cticks = 1;
unsigned long resolution, ticks;
unsigned long flags;
+ if (timer->card && timer->card->shutdown)
+ return;
+
spin_lock_irqsave(&timer->lock, flags);
/* now process all callbacks */
while (!list_empty(&timer->sack_list_head)) {
if (timer == NULL)
return;
+ if (timer->card && timer->card->shutdown)
+ return;
+
spin_lock_irqsave(&timer->lock, flags);
/* remember the current resolution */
static int snd_timer_dev_disconnect(struct snd_device *device)
{
struct snd_timer *timer = device->device_data;
+ struct snd_timer_instance *ti;
+
mutex_lock(®ister_mutex);
list_del_init(&timer->device_list);
+ /* wake up pending sleepers */
+ list_for_each_entry(ti, &timer->open_list_head, open_list) {
+ if (ti->disconnect)
+ ti->disconnect(ti);
+ }
mutex_unlock(®ister_mutex);
return 0;
}
unsigned long resolution = 0;
struct snd_timer_instance *ti, *ts;
+ if (timer->card && timer->card->shutdown)
+ return;
if (! (timer->hw.flags & SNDRV_TIMER_HW_SLAVE))
return;
if (snd_BUG_ON(event < SNDRV_TIMER_EVENT_MSTART ||
mutex_lock(®ister_mutex);
list_for_each_entry(timer, &snd_timer_list, device_list) {
+ if (timer->card && timer->card->shutdown)
+ continue;
switch (timer->tmr_class) {
case SNDRV_TIMER_CLASS_GLOBAL:
snd_iprintf(buffer, "G%i: ", timer->tmr_device);
wake_up(&tu->qchange_sleep);
}
+static void snd_timer_user_disconnect(struct snd_timer_instance *timeri)
+{
+ struct snd_timer_user *tu = timeri->callback_data;
+
+ tu->disconnected = true;
+ wake_up(&tu->qchange_sleep);
+}
+
static void snd_timer_user_tinterrupt(struct snd_timer_instance *timeri,
unsigned long resolution,
unsigned long ticks)
? snd_timer_user_tinterrupt : snd_timer_user_interrupt;
tu->timeri->ccallback = snd_timer_user_ccallback;
tu->timeri->callback_data = (void *)tu;
+ tu->timeri->disconnect = snd_timer_user_disconnect;
}
__err:
remove_wait_queue(&tu->qchange_sleep, &wait);
+ if (tu->disconnected) {
+ err = -ENODEV;
+ break;
+ }
if (signal_pending(current)) {
err = -ERESTARTSYS;
break;
mask = 0;
if (tu->qused)
mask |= POLLIN | POLLRDNORM;
+ if (tu->disconnected)
+ mask |= POLLERR;
return mask;
}
out_err:
kfree(acomp);
bus->audio_component = NULL;
- dev_err(dev, "failed to add i915 component master (%d)\n", ret);
+ dev_info(dev, "failed to add i915 component master (%d)\n", ret);
return ret;
}
return device_attach(hda_codec_dev(codec)) > 0 && codec->preset;
}
-/* try to auto-load and bind the codec module */
-static void codec_bind_module(struct hda_codec *codec)
+/* try to auto-load codec module */
+static void request_codec_module(struct hda_codec *codec)
{
#ifdef MODULE
char modalias[32];
+ const char *mod = NULL;
+
+ switch (codec->probe_id) {
+ case HDA_CODEC_ID_GENERIC_HDMI:
+#if IS_MODULE(CONFIG_SND_HDA_CODEC_HDMI)
+ mod = "snd-hda-codec-hdmi";
+#endif
+ break;
+ case HDA_CODEC_ID_GENERIC:
+#if IS_MODULE(CONFIG_SND_HDA_GENERIC)
+ mod = "snd-hda-codec-generic";
+#endif
+ break;
+ default:
+ snd_hdac_codec_modalias(&codec->core, modalias, sizeof(modalias));
+ mod = modalias;
+ break;
+ }
+
+ if (mod)
+ request_module(mod);
+#endif /* MODULE */
+}
- snd_hdac_codec_modalias(&codec->core, modalias, sizeof(modalias));
- request_module(modalias);
+/* try to auto-load and bind the codec module */
+static void codec_bind_module(struct hda_codec *codec)
+{
+#ifdef MODULE
+ request_codec_module(codec);
if (codec_probed(codec))
return;
#endif
if (is_likely_hdmi_codec(codec)) {
codec->probe_id = HDA_CODEC_ID_GENERIC_HDMI;
-#if IS_MODULE(CONFIG_SND_HDA_CODEC_HDMI)
- request_module("snd-hda-codec-hdmi");
-#endif
+ request_codec_module(codec);
if (codec_probed(codec))
return 0;
}
codec->probe_id = HDA_CODEC_ID_GENERIC;
-#if IS_MODULE(CONFIG_SND_HDA_GENERIC)
- request_module("snd-hda-codec-generic");
-#endif
+ request_codec_module(codec);
if (codec_probed(codec))
return 0;
return -ENODEV;
* for other chips, still continue probing as other
* codecs can be on the same link.
*/
- if (CONTROLLER_IN_GPU(pci))
+ if (CONTROLLER_IN_GPU(pci)) {
+ dev_err(chip->card->dev,
+ "HSW/BDW HD-audio HDMI/DP requires binding with gfx driver\n");
goto out_free;
- else
+ } else
goto skip_i915;
}
static void azx_remove(struct pci_dev *pci)
{
struct snd_card *card = pci_get_drvdata(pci);
+ struct azx *chip;
+ struct hda_intel *hda;
+
+ if (card) {
+ /* flush the pending probing work */
+ chip = card->private_data;
+ hda = container_of(chip, struct hda_intel, chip);
+ flush_work(&hda->probe_work);
- if (card)
snd_card_free(card);
+ }
}
static void azx_shutdown(struct pci_dev *pci)
SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_BASS_1A),
+ SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),
runtime->hw = snd_at73c213_playback_hw;
chip->substream = substream;
+ clk_enable(chip->ssc->clk);
+
return 0;
}
{
struct snd_at73c213 *chip = snd_pcm_substream_chip(substream);
chip->substream = NULL;
+ clk_disable(chip->ssc->clk);
return 0;
}
chip->card = card;
chip->irq = -1;
+ clk_enable(chip->ssc->clk);
+
retval = request_irq(irq, snd_at73c213_interrupt, 0, "at73c213", chip);
if (retval) {
dev_dbg(&chip->spi->dev, "unable to request irq %d\n", irq);
free_irq(chip->irq, chip);
chip->irq = -1;
out:
+ clk_disable(chip->ssc->clk);
+
return retval;
}
int retval;
/* Stop playback. */
+ clk_enable(chip->ssc->clk);
ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+ clk_disable(chip->ssc->clk);
/* Mute sound. */
retval = snd_at73c213_write_reg(chip, DAC_LMPG, 0x3f);
struct snd_at73c213 *chip = card->private_data;
ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXDIS));
+ clk_disable(chip->ssc->clk);
clk_disable(chip->board->dac_clk);
return 0;
struct snd_at73c213 *chip = card->private_data;
clk_enable(chip->board->dac_clk);
+ clk_enable(chip->ssc->clk);
ssc_writel(chip->ssc->regs, CR, SSC_BIT(CR_TXEN));
return 0;
{ "NET_TX_SOFTIRQ", 2 },
{ "NET_RX_SOFTIRQ", 3 },
{ "BLOCK_SOFTIRQ", 4 },
- { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+ { "IRQ_POLL_SOFTIRQ", 5 },
{ "TASKLET_SOFTIRQ", 6 },
{ "SCHED_SOFTIRQ", 7 },
{ "HRTIMER_SOFTIRQ", 8 },
{ "NET_TX_SOFTIRQ", 2 },
{ "NET_RX_SOFTIRQ", 3 },
{ "BLOCK_SOFTIRQ", 4 },
- { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+ { "IRQ_POLL_SOFTIRQ", 5 },
{ "TASKLET_SOFTIRQ", 6 },
{ "SCHED_SOFTIRQ", 7 },
{ "HRTIMER_SOFTIRQ", 8 },