Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Apr 2014 21:49:50 +0000 (14:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Apr 2014 21:49:50 +0000 (14:49 -0700)
Pull vfs updates from Al Viro:
 "The first vfs pile, with deep apologies for being very late in this
  window.

  Assorted cleanups and fixes, plus a large preparatory part of iov_iter
  work.  There's a lot more of that, but it'll probably go into the next
  merge window - it *does* shape up nicely, removes a lot of
  boilerplate, gets rid of locking inconsistencie between aio_write and
  splice_write and I hope to get Kent's direct-io rewrite merged into
  the same queue, but some of the stuff after this point is having
  (mostly trivial) conflicts with the things already merged into
  mainline and with some I want more testing.

  This one passes LTP and xfstests without regressions, in addition to
  usual beating.  BTW, readahead02 in ltp syscalls testsuite has started
  giving failures since "mm/readahead.c: fix readahead failure for
  memoryless NUMA nodes and limit readahead pages" - might be a false
  positive, might be a real regression..."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (63 commits)
  missing bits of "splice: fix racy pipe->buffers uses"
  cifs: fix the race in cifs_writev()
  ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure
  kill generic_file_buffered_write()
  ocfs2_file_aio_write(): switch to generic_perform_write()
  ceph_aio_write(): switch to generic_perform_write()
  xfs_file_buffered_aio_write(): switch to generic_perform_write()
  export generic_perform_write(), start getting rid of generic_file_buffer_write()
  generic_file_direct_write(): get rid of ppos argument
  btrfs_file_aio_write(): get rid of ppos
  kill the 5th argument of generic_file_buffered_write()
  kill the 4th argument of __generic_file_aio_write()
  lustre: don't open-code kernel_recvmsg()
  ocfs2: don't open-code kernel_recvmsg()
  drbd: don't open-code kernel_recvmsg()
  constify blk_rq_map_user_iov() and friends
  lustre: switch to kernel_sendmsg()
  ocfs2: don't open-code kernel_sendmsg()
  take iov_iter stuff to mm/iov_iter.c
  process_vm_access: tidy up a bit
  ...

46 files changed:
1  2 
Documentation/filesystems/Locking
Documentation/filesystems/vfs.txt
arch/s390/configs/default_defconfig
arch/xtensa/configs/iss_defconfig
arch/xtensa/configs/s6105_defconfig
block/blk-map.c
drivers/block/drbd/drbd_receiver.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
drivers/staging/usbip/stub_dev.c
drivers/staging/usbip/usbip_common.c
drivers/staging/usbip/usbip_common.h
drivers/staging/usbip/vhci_hcd.c
drivers/staging/usbip/vhci_sysfs.c
fs/bio.c
fs/block_dev.c
fs/btrfs/file.c
fs/buffer.c
fs/cachefiles/namei.c
fs/ceph/file.c
fs/cifs/cifsfs.c
fs/cifs/file.c
fs/exec.c
fs/ext4/file.c
fs/file.c
fs/file_table.c
fs/fuse/file.c
fs/namei.c
fs/ncpfs/inode.c
fs/ntfs/inode.c
fs/ocfs2/cluster/tcp.c
fs/ocfs2/file.c
fs/open.c
fs/xfs/xfs_file.c
include/linux/bio.h
include/linux/blkdev.h
include/linux/fs.h
include/linux/pipe_fs_i.h
kernel/relay.c
kernel/trace/trace.c
lib/Kconfig.debug
mm/Makefile
mm/filemap.c
mm/process_vm_access.c
mm/shmem.c
security/integrity/evm/evm_crypto.c
security/integrity/evm/evm_main.c

index efca5c1bbb1028245a6d75e846e96a6ecb4150bd,bb2534bc0b0301d7493defb6ee078ecd91efc693..eba7901342531d2dc089c9a39d990aa924b86526
@@@ -47,8 -47,6 +47,8 @@@ prototypes
        int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
 +      int (*rename2) (struct inode *, struct dentry *,
 +                      struct inode *, struct dentry *, unsigned int);
        int (*readlink) (struct dentry *, char __user *,int);
        void * (*follow_link) (struct dentry *, struct nameidata *);
        void (*put_link) (struct dentry *, struct nameidata *, void *);
@@@ -80,7 -78,6 +80,7 @@@ mkdir:                ye
  unlink:               yes (both)
  rmdir:                yes (both)      (see below)
  rename:               yes (all)       (see below)
 +rename2:      yes (all)       (see below)
  readlink:     no
  follow_link:  no
  put_link:     no
@@@ -99,8 -96,7 +99,8 @@@ tmpfile:      n
  
        Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
  victim.
 -      cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
 +      cross-directory ->rename() and rename2() has (per-superblock)
 +->s_vfs_rename_sem.
  
  See Documentation/filesystems/directory-locking for more detailed discussion
  of the locking scheme for directory operations.
@@@ -202,7 -198,7 +202,7 @@@ prototypes
                                unsigned long *);
        int (*migratepage)(struct address_space *, struct page *, struct page *);
        int (*launder_page)(struct page *);
-       int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
+       int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
        int (*error_remove_page)(struct address_space *, struct page *);
        int (*swap_activate)(struct file *);
        int (*swap_deactivate)(struct file *);
@@@ -529,7 -525,6 +529,7 @@@ locking rules
  open:         yes
  close:                yes
  fault:                yes             can return with page locked
 +map_pages:    yes
  page_mkwrite: yes             can return with page locked
  access:               yes
  
@@@ -541,15 -536,6 +541,15 @@@ the page, then ensure it is not alread
  subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
  locked. The VM will unlock the page.
  
 +      ->map_pages() is called when VM asks to map easy accessible pages.
 +Filesystem should find and map pages associated with offsets from "pgoff"
 +till "max_pgoff". ->map_pages() is called with page table locked and must
 +not block.  If it's not possible to reach a page without blocking,
 +filesystem should skip it. Filesystem should use do_set_pte() to setup
 +page table entry. Pointer to entry associated with offset "pgoff" is
 +passed in "pte" field in vm_fault structure. Pointers to entries for other
 +offsets should be calculated relative to "pte".
 +
        ->page_mkwrite() is called when a previously read-only pte is
  about to become writeable. The filesystem again must ensure that there are
  no truncate/invalidate races, and then return with the page locked. If
index 94eb86287bcb08f3ebc0fa826438fed1af8ded1a,419e7348c481679da09a5721711cc56a32279abd..617f6d70c0778ce37716d25fde6f0c158f492707
@@@ -347,8 -347,6 +347,8 @@@ struct inode_operations 
        int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
 +      int (*rename2) (struct inode *, struct dentry *,
 +                      struct inode *, struct dentry *, unsigned int);
        int (*readlink) (struct dentry *, char __user *,int);
          void * (*follow_link) (struct dentry *, struct nameidata *);
          void (*put_link) (struct dentry *, struct nameidata *, void *);
@@@ -416,20 -414,6 +416,20 @@@ otherwise noted
    rename: called by the rename(2) system call to rename the object to
        have the parent and name given by the second inode and dentry.
  
 +  rename2: this has an additional flags argument compared to rename.
 +      If no flags are supported by the filesystem then this method
 +      need not be implemented.  If some flags are supported then the
 +      filesystem must return -EINVAL for any unsupported or unknown
 +      flags.  Currently the following flags are implemented:
 +      (1) RENAME_NOREPLACE: this flag indicates that if the target
 +      of the rename exists the rename should fail with -EEXIST
 +      instead of replacing the target.  The VFS already checks for
 +      existence, so for local filesystems the RENAME_NOREPLACE
 +      implementation is equivalent to plain rename.
 +      (2) RENAME_EXCHANGE: exchange source and target.  Both must
 +      exist; this is checked by the VFS.  Unlike plain rename,
 +      source and target may be of different type.
 +
    readlink: called by the readlink(2) system call. Only required if
        you want to support reading symbolic links
  
@@@ -596,7 -580,7 +596,7 @@@ struct address_space_operations 
        /* migrate the contents of a page to the specified target */
        int (*migratepage) (struct page *, struct page *);
        int (*launder_page) (struct page *);
-       int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+       int (*is_partially_uptodate) (struct page *, unsigned long,
                                        unsigned long);
        void (*is_dirty_writeback) (struct page *, bool *, bool *);
        int (*error_remove_page) (struct mapping *mapping, struct page *page);
index ddaae2f5c9137d0155ef5d5e943b40d097471881,3f538468a86e4c5a7561d819e967930ae25ffa6f..8df022c43af7e7db7afb8d803b19fe4a162c18ca
@@@ -46,7 -46,6 +46,7 @@@ CONFIG_UNIXWARE_DISKLABEL=
  CONFIG_CFQ_GROUP_IOSCHED=y
  CONFIG_DEFAULT_DEADLINE=y
  CONFIG_MARCH_Z9_109=y
 +CONFIG_NR_CPUS=256
  CONFIG_PREEMPT=y
  CONFIG_HZ_100=y
  CONFIG_MEMORY_HOTPLUG=y
@@@ -59,6 -58,7 +59,6 @@@ CONFIG_HOTPLUG_PCI=
  CONFIG_HOTPLUG_PCI_S390=y
  CONFIG_CHSC_SCH=y
  CONFIG_CRASH_DUMP=y
 -CONFIG_ZFCPDUMP=y
  # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
  CONFIG_BINFMT_MISC=m
  CONFIG_HIBERNATION=y
@@@ -101,6 -101,7 +101,6 @@@ CONFIG_TCP_CONG_VENO=
  CONFIG_TCP_CONG_YEAH=m
  CONFIG_TCP_CONG_ILLINOIS=m
  CONFIG_IPV6=y
 -CONFIG_IPV6_PRIVACY=y
  CONFIG_IPV6_ROUTER_PREF=y
  CONFIG_INET6_AH=m
  CONFIG_INET6_ESP=m
@@@ -110,7 -111,6 +110,7 @@@ CONFIG_INET6_XFRM_MODE_TRANSPORT=
  CONFIG_INET6_XFRM_MODE_TUNNEL=m
  CONFIG_INET6_XFRM_MODE_BEET=m
  CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
 +CONFIG_IPV6_VTI=m
  CONFIG_IPV6_SIT=m
  CONFIG_IPV6_GRE=m
  CONFIG_IPV6_MULTIPLE_TABLES=y
@@@ -135,17 -135,7 +135,17 @@@ CONFIG_NF_CONNTRACK_SIP=
  CONFIG_NF_CONNTRACK_TFTP=m
  CONFIG_NF_CT_NETLINK=m
  CONFIG_NF_CT_NETLINK_TIMEOUT=m
 -CONFIG_NETFILTER_TPROXY=m
 +CONFIG_NF_TABLES=m
 +CONFIG_NFT_EXTHDR=m
 +CONFIG_NFT_META=m
 +CONFIG_NFT_CT=m
 +CONFIG_NFT_RBTREE=m
 +CONFIG_NFT_HASH=m
 +CONFIG_NFT_COUNTER=m
 +CONFIG_NFT_LOG=m
 +CONFIG_NFT_LIMIT=m
 +CONFIG_NFT_NAT=m
 +CONFIG_NFT_COMPAT=m
  CONFIG_NETFILTER_XT_SET=m
  CONFIG_NETFILTER_XT_TARGET_AUDIT=m
  CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@@ -214,9 -204,7 +214,9 @@@ CONFIG_IP_SET_HASH_IP=
  CONFIG_IP_SET_HASH_IPPORT=m
  CONFIG_IP_SET_HASH_IPPORTIP=m
  CONFIG_IP_SET_HASH_IPPORTNET=m
 +CONFIG_IP_SET_HASH_NETPORTNET=m
  CONFIG_IP_SET_HASH_NET=m
 +CONFIG_IP_SET_HASH_NETNET=m
  CONFIG_IP_SET_HASH_NETPORT=m
  CONFIG_IP_SET_HASH_NETIFACE=m
  CONFIG_IP_SET_LIST_SET=m
@@@ -239,11 -227,6 +239,11 @@@ CONFIG_IP_VS_FTP=
  CONFIG_IP_VS_PE_SIP=m
  CONFIG_NF_CONNTRACK_IPV4=m
  # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
 +CONFIG_NF_TABLES_IPV4=m
 +CONFIG_NFT_REJECT_IPV4=m
 +CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 +CONFIG_NFT_CHAIN_NAT_IPV4=m
 +CONFIG_NF_TABLES_ARP=m
  CONFIG_IP_NF_IPTABLES=m
  CONFIG_IP_NF_MATCH_AH=m
  CONFIG_IP_NF_MATCH_ECN=m
@@@ -266,9 -249,6 +266,9 @@@ CONFIG_IP_NF_ARPTABLES=
  CONFIG_IP_NF_ARPFILTER=m
  CONFIG_IP_NF_ARP_MANGLE=m
  CONFIG_NF_CONNTRACK_IPV6=m
 +CONFIG_NF_TABLES_IPV6=m
 +CONFIG_NFT_CHAIN_ROUTE_IPV6=m
 +CONFIG_NFT_CHAIN_NAT_IPV6=m
  CONFIG_IP6_NF_IPTABLES=m
  CONFIG_IP6_NF_MATCH_AH=m
  CONFIG_IP6_NF_MATCH_EUI64=m
@@@ -288,7 -268,6 +288,7 @@@ CONFIG_IP6_NF_SECURITY=
  CONFIG_NF_NAT_IPV6=m
  CONFIG_IP6_NF_TARGET_MASQUERADE=m
  CONFIG_IP6_NF_TARGET_NPT=m
 +CONFIG_NF_TABLES_BRIDGE=m
  CONFIG_NET_SCTPPROBE=m
  CONFIG_RDS=m
  CONFIG_RDS_RDMA=m
@@@ -335,7 -314,6 +335,7 @@@ CONFIG_NET_CLS_RSVP=
  CONFIG_NET_CLS_RSVP6=m
  CONFIG_NET_CLS_FLOW=m
  CONFIG_NET_CLS_CGROUP=y
 +CONFIG_NET_CLS_BPF=m
  CONFIG_NET_CLS_ACT=y
  CONFIG_NET_ACT_POLICE=m
  CONFIG_NET_ACT_GACT=m
@@@ -403,8 -381,8 +403,8 @@@ CONFIG_BLK_DEV_DM=
  CONFIG_DM_CRYPT=m
  CONFIG_DM_SNAPSHOT=m
  CONFIG_DM_MIRROR=m
 -CONFIG_DM_RAID=m
  CONFIG_DM_LOG_USERSPACE=m
 +CONFIG_DM_RAID=m
  CONFIG_DM_ZERO=m
  CONFIG_DM_MULTIPATH=m
  CONFIG_DM_MULTIPATH_QL=m
@@@ -456,6 -434,7 +456,6 @@@ CONFIG_TN3270_FS=
  CONFIG_WATCHDOG=y
  CONFIG_WATCHDOG_NOWAYOUT=y
  CONFIG_SOFT_WATCHDOG=m
 -CONFIG_ZVM_WATCHDOG=m
  # CONFIG_HID is not set
  # CONFIG_USB_SUPPORT is not set
  CONFIG_INFINIBAND=m
@@@ -555,23 -534,13 +555,23 @@@ CONFIG_UNUSED_SYMBOLS=
  CONFIG_MAGIC_SYSRQ=y
  CONFIG_DEBUG_KERNEL=y
  CONFIG_DEBUG_PAGEALLOC=y
 +CONFIG_DEBUG_OBJECTS=y
 +CONFIG_DEBUG_OBJECTS_SELFTEST=y
 +CONFIG_DEBUG_OBJECTS_FREE=y
 +CONFIG_DEBUG_OBJECTS_TIMERS=y
 +CONFIG_DEBUG_OBJECTS_WORK=y
 +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
  CONFIG_SLUB_DEBUG_ON=y
  CONFIG_SLUB_STATS=y
 +CONFIG_DEBUG_KMEMLEAK=y
  CONFIG_DEBUG_STACK_USAGE=y
  CONFIG_DEBUG_VM=y
  CONFIG_DEBUG_VM_RB=y
  CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
  CONFIG_DEBUG_PER_CPU_MAPS=y
 +CONFIG_DEBUG_SHIRQ=y
 +CONFIG_DETECT_HUNG_TASK=y
  CONFIG_TIMER_STATS=y
  CONFIG_DEBUG_RT_MUTEXES=y
  CONFIG_RT_MUTEX_TESTER=y
@@@ -581,7 -550,6 +581,6 @@@ CONFIG_LOCK_STAT=
  CONFIG_DEBUG_LOCKDEP=y
  CONFIG_DEBUG_ATOMIC_SLEEP=y
  CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
- CONFIG_DEBUG_WRITECOUNT=y
  CONFIG_DEBUG_LIST=y
  CONFIG_DEBUG_SG=y
  CONFIG_DEBUG_NOTIFIERS=y
@@@ -604,11 -572,9 +603,11 @@@ CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=
  CONFIG_BLK_DEV_IO_TRACE=y
  # CONFIG_KPROBE_EVENT is not set
  CONFIG_LKDTM=m
 +CONFIG_TEST_LIST_SORT=y
  CONFIG_KPROBES_SANITY_TEST=y
 -CONFIG_RBTREE_TEST=m
 +CONFIG_RBTREE_TEST=y
  CONFIG_INTERVAL_TREE_TEST=m
 +CONFIG_PERCPU_TEST=m
  CONFIG_ATOMIC64_SELFTEST=y
  CONFIG_DMA_API_DEBUG=y
  # CONFIG_STRICT_DEVMEM is not set
@@@ -671,6 -637,7 +670,6 @@@ CONFIG_CRYPTO_AES_S390=
  CONFIG_CRYPTO_GHASH_S390=m
  CONFIG_ASYMMETRIC_KEY_TYPE=m
  CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 -CONFIG_PUBLIC_KEY_ALGO_RSA=m
  CONFIG_X509_CERTIFICATE_PARSER=m
  CONFIG_CRC7=m
  CONFIG_CRC8=m
index d57d917ff2406c308b6b12b4790bf6654f4d900a,711f8aa14743f79c59d8d7db7735aec192076ed9..1493c68352d11454a50a7fdfb5f11f49ab54de88
@@@ -11,7 -11,7 +11,7 @@@ CONFIG_GENERIC_FIND_NEXT_BIT=
  CONFIG_GENERIC_HWEIGHT=y
  # CONFIG_ARCH_HAS_ILOG2_U32 is not set
  # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 -CONFIG_NO_IOPORT=y
 +CONFIG_NO_IOPORT_MAP=y
  CONFIG_HZ=100
  CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
  CONFIG_CONSTRUCTORS=y
@@@ -627,7 -627,6 +627,6 @@@ CONFIG_SCHED_DEBUG=
  # CONFIG_DEBUG_KOBJECT is not set
  # CONFIG_DEBUG_INFO is not set
  # CONFIG_DEBUG_VM is not set
- # CONFIG_DEBUG_WRITECOUNT is not set
  # CONFIG_DEBUG_MEMORY_INIT is not set
  # CONFIG_DEBUG_LIST is not set
  # CONFIG_DEBUG_SG is not set
index 583c2b0974cab79dfb08e7381836702688c2ed52,78318a76fa162ffc35e6a738b38ac6d7af4bedaa..12a492ab6d17f9fbf74dedd1a9723a269ee7cbef
@@@ -11,7 -11,7 +11,7 @@@ CONFIG_GENERIC_FIND_NEXT_BIT=
  CONFIG_GENERIC_HWEIGHT=y
  # CONFIG_ARCH_HAS_ILOG2_U32 is not set
  # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 -CONFIG_NO_IOPORT=y
 +CONFIG_NO_IOPORT_MAP=y
  CONFIG_HZ=100
  CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
  
@@@ -569,7 -569,6 +569,6 @@@ CONFIG_DEBUG_SPINLOCK_SLEEP=
  # CONFIG_DEBUG_INFO is not set
  # CONFIG_DEBUG_VM is not set
  CONFIG_DEBUG_NOMMU_REGIONS=y
- # CONFIG_DEBUG_WRITECOUNT is not set
  # CONFIG_DEBUG_MEMORY_INIT is not set
  # CONFIG_DEBUG_LIST is not set
  # CONFIG_DEBUG_SG is not set
diff --combined block/blk-map.c
index cca6356d216d13977665e17a846aef31ab1e4a87,86d93779c066ae8e5e3f05ca283cb69b457f90e7..f7b22bc215180d4b7f467135faeaf52975a77013
@@@ -188,7 -188,7 +188,7 @@@ EXPORT_SYMBOL(blk_rq_map_user)
   *    unmapping.
   */
  int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
-                       struct rq_map_data *map_data, struct sg_iovec *iov,
+                       struct rq_map_data *map_data, const struct sg_iovec *iov,
                        int iov_count, unsigned int len, gfp_t gfp_mask)
  {
        struct bio *bio;
@@@ -285,7 -285,7 +285,7 @@@ EXPORT_SYMBOL(blk_rq_unmap_user)
   *
   * Description:
   *    Data will be mapped directly if possible. Otherwise a bounce
 - *    buffer is used. Can be called multple times to append multple
 + *    buffer is used. Can be called multiple times to append multiple
   *    buffers.
   */
  int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
index 18c76e84d54085c0819d46f3af1f1ef3aa6fcb3b,1385714eccb705f30c3cbe29850701981732a330..68e3992e88381cd4974ebfa2da3400708ab4afa0
@@@ -44,7 -44,6 +44,7 @@@
  #include <linux/string.h>
  #include <linux/scatterlist.h>
  #include "drbd_int.h"
 +#include "drbd_protocol.h"
  #include "drbd_req.h"
  
  #include "drbd_vli.h"
@@@ -62,11 -61,11 +62,11 @@@ enum finish_epoch 
        FE_RECYCLED,
  };
  
 -static int drbd_do_features(struct drbd_tconn *tconn);
 -static int drbd_do_auth(struct drbd_tconn *tconn);
 -static int drbd_disconnected(struct drbd_conf *mdev);
 +static int drbd_do_features(struct drbd_connection *connection);
 +static int drbd_do_auth(struct drbd_connection *connection);
 +static int drbd_disconnected(struct drbd_peer_device *);
  
 -static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
 +static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
  static int e_end_block(struct drbd_work *, int);
  
  
@@@ -151,7 -150,7 +151,7 @@@ static void page_chain_add(struct page 
        *head = chain_first;
  }
  
 -static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
 +static struct page *__drbd_alloc_pages(struct drbd_device *device,
                                       unsigned int number)
  {
        struct page *page = NULL;
        return NULL;
  }
  
 -static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
 +static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
                                           struct list_head *to_be_freed)
  {
 -      struct drbd_peer_request *peer_req;
 -      struct list_head *le, *tle;
 +      struct drbd_peer_request *peer_req, *tmp;
  
        /* The EEs are always appended to the end of the list. Since
           they are sent in order over the wire, they have to finish
           in order. As soon as we see the first not finished we can
           stop to examine the list... */
  
 -      list_for_each_safe(le, tle, &mdev->net_ee) {
 -              peer_req = list_entry(le, struct drbd_peer_request, w.list);
 +      list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
                if (drbd_peer_req_has_active_page(peer_req))
                        break;
 -              list_move(le, to_be_freed);
 +              list_move(&peer_req->w.list, to_be_freed);
        }
  }
  
 -static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
 +static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
  {
        LIST_HEAD(reclaimed);
        struct drbd_peer_request *peer_req, *t;
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      reclaim_finished_net_peer_reqs(mdev, &reclaimed);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_lock_irq(&device->resource->req_lock);
 +      reclaim_finished_net_peer_reqs(device, &reclaimed);
 +      spin_unlock_irq(&device->resource->req_lock);
  
        list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
 -              drbd_free_net_peer_req(mdev, peer_req);
 +              drbd_free_net_peer_req(device, peer_req);
  }
  
  /**
   * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
 - * @mdev:     DRBD device.
 + * @device:   DRBD device.
   * @number:   number of pages requested
   * @retry:    whether to retry, if not enough pages are available right now
   *
   *
   * Returns a page chain linked via page->private.
   */
 -struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
 +struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
                              bool retry)
  {
 +      struct drbd_device *device = peer_device->device;
        struct page *page = NULL;
        struct net_conf *nc;
        DEFINE_WAIT(wait);
        /* Yes, we may run up to @number over max_buffers. If we
         * follow it strictly, the admin will get it wrong anyways. */
        rcu_read_lock();
 -      nc = rcu_dereference(mdev->tconn->net_conf);
 +      nc = rcu_dereference(peer_device->connection->net_conf);
        mxb = nc ? nc->max_buffers : 1000000;
        rcu_read_unlock();
  
 -      if (atomic_read(&mdev->pp_in_use) < mxb)
 -              page = __drbd_alloc_pages(mdev, number);
 +      if (atomic_read(&device->pp_in_use) < mxb)
 +              page = __drbd_alloc_pages(device, number);
  
        while (page == NULL) {
                prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
  
 -              drbd_kick_lo_and_reclaim_net(mdev);
 +              drbd_kick_lo_and_reclaim_net(device);
  
 -              if (atomic_read(&mdev->pp_in_use) < mxb) {
 -                      page = __drbd_alloc_pages(mdev, number);
 +              if (atomic_read(&device->pp_in_use) < mxb) {
 +                      page = __drbd_alloc_pages(device, number);
                        if (page)
                                break;
                }
                        break;
  
                if (signal_pending(current)) {
 -                      dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
 +                      drbd_warn(device, "drbd_alloc_pages interrupted!\n");
                        break;
                }
  
        finish_wait(&drbd_pp_wait, &wait);
  
        if (page)
 -              atomic_add(number, &mdev->pp_in_use);
 +              atomic_add(number, &device->pp_in_use);
        return page;
  }
  
  /* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
 - * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
 + * Is also used from inside an other spin_lock_irq(&resource->req_lock);
   * Either links the page chain back to the global pool,
   * or returns all pages to the system. */
 -static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
 +static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
  {
 -      atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
 +      atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
        int i;
  
        if (page == NULL)
        }
        i = atomic_sub_return(i, a);
        if (i < 0)
 -              dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
 +              drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
                        is_net ? "pp_in_use_by_net" : "pp_in_use", i);
        wake_up(&drbd_pp_wait);
  }
@@@ -330,26 -330,25 +330,26 @@@ You must not have the req_lock
  */
  
  struct drbd_peer_request *
 -drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
 +drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
                    unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
  {
 +      struct drbd_device *device = peer_device->device;
        struct drbd_peer_request *peer_req;
        struct page *page = NULL;
        unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
  
 -      if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
 +      if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
                return NULL;
  
        peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
        if (!peer_req) {
                if (!(gfp_mask & __GFP_NOWARN))
 -                      dev_err(DEV, "%s: allocation failed\n", __func__);
 +                      drbd_err(device, "%s: allocation failed\n", __func__);
                return NULL;
        }
  
        if (data_size) {
 -              page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
 +              page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
                if (!page)
                        goto fail;
        }
        peer_req->i.waiting = false;
  
        peer_req->epoch = NULL;
 -      peer_req->w.mdev = mdev;
 +      peer_req->peer_device = peer_device;
        peer_req->pages = page;
        atomic_set(&peer_req->pending_bios, 0);
        peer_req->flags = 0;
        return NULL;
  }
  
 -void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
 +void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
                       int is_net)
  {
        if (peer_req->flags & EE_HAS_DIGEST)
                kfree(peer_req->digest);
 -      drbd_free_pages(mdev, peer_req->pages, is_net);
 -      D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
 -      D_ASSERT(drbd_interval_empty(&peer_req->i));
 +      drbd_free_pages(device, peer_req->pages, is_net);
 +      D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
 +      D_ASSERT(device, drbd_interval_empty(&peer_req->i));
        mempool_free(peer_req, drbd_ee_mempool);
  }
  
 -int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
 +int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
  {
        LIST_HEAD(work_list);
        struct drbd_peer_request *peer_req, *t;
        int count = 0;
 -      int is_net = list == &mdev->net_ee;
 +      int is_net = list == &device->net_ee;
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 +      spin_lock_irq(&device->resource->req_lock);
        list_splice_init(list, &work_list);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_unlock_irq(&device->resource->req_lock);
  
        list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
 -              __drbd_free_peer_req(mdev, peer_req, is_net);
 +              __drbd_free_peer_req(device, peer_req, is_net);
                count++;
        }
        return count;
  /*
   * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
   */
 -static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
 +static int drbd_finish_peer_reqs(struct drbd_device *device)
  {
        LIST_HEAD(work_list);
        LIST_HEAD(reclaimed);
        struct drbd_peer_request *peer_req, *t;
        int err = 0;
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      reclaim_finished_net_peer_reqs(mdev, &reclaimed);
 -      list_splice_init(&mdev->done_ee, &work_list);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_lock_irq(&device->resource->req_lock);
 +      reclaim_finished_net_peer_reqs(device, &reclaimed);
 +      list_splice_init(&device->done_ee, &work_list);
 +      spin_unlock_irq(&device->resource->req_lock);
  
        list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
 -              drbd_free_net_peer_req(mdev, peer_req);
 +              drbd_free_net_peer_req(device, peer_req);
  
        /* possible callbacks here:
         * e_end_block, and e_end_resync_block, e_send_superseded.
                err2 = peer_req->w.cb(&peer_req->w, !!err);
                if (!err)
                        err = err2;
 -              drbd_free_peer_req(mdev, peer_req);
 +              drbd_free_peer_req(device, peer_req);
        }
 -      wake_up(&mdev->ee_wait);
 +      wake_up(&device->ee_wait);
  
        return err;
  }
  
 -static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
 +static void _drbd_wait_ee_list_empty(struct drbd_device *device,
                                     struct list_head *head)
  {
        DEFINE_WAIT(wait);
        /* avoids spin_lock/unlock
         * and calling prepare_to_wait in the fast path */
        while (!list_empty(head)) {
 -              prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
 -              spin_unlock_irq(&mdev->tconn->req_lock);
 +              prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
 +              spin_unlock_irq(&device->resource->req_lock);
                io_schedule();
 -              finish_wait(&mdev->ee_wait, &wait);
 -              spin_lock_irq(&mdev->tconn->req_lock);
 +              finish_wait(&device->ee_wait, &wait);
 +              spin_lock_irq(&device->resource->req_lock);
        }
  }
  
 -static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
 +static void drbd_wait_ee_list_empty(struct drbd_device *device,
                                    struct list_head *head)
  {
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      _drbd_wait_ee_list_empty(mdev, head);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_lock_irq(&device->resource->req_lock);
 +      _drbd_wait_ee_list_empty(device, head);
 +      spin_unlock_irq(&device->resource->req_lock);
  }
  
  static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
  {
-       mm_segment_t oldfs;
        struct kvec iov = {
                .iov_base = buf,
                .iov_len = size,
        };
        struct msghdr msg = {
-               .msg_iovlen = 1,
-               .msg_iov = (struct iovec *)&iov,
                .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
        };
-       int rv;
-       oldfs = get_fs();
-       set_fs(KERNEL_DS);
-       rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
-       set_fs(oldfs);
-       return rv;
+       return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
  }
  
 -static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
 +static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
  {
        int rv;
  
 -      rv = drbd_recv_short(tconn->data.socket, buf, size, 0);
 +      rv = drbd_recv_short(connection->data.socket, buf, size, 0);
  
        if (rv < 0) {
                if (rv == -ECONNRESET)
 -                      conn_info(tconn, "sock was reset by peer\n");
 +                      drbd_info(connection, "sock was reset by peer\n");
                else if (rv != -ERESTARTSYS)
 -                      conn_err(tconn, "sock_recvmsg returned %d\n", rv);
 +                      drbd_err(connection, "sock_recvmsg returned %d\n", rv);
        } else if (rv == 0) {
 -              if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
 +              if (test_bit(DISCONNECT_SENT, &connection->flags)) {
                        long t;
                        rcu_read_lock();
 -                      t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
 +                      t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
                        rcu_read_unlock();
  
 -                      t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t);
 +                      t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
  
                        if (t)
                                goto out;
                }
 -              conn_info(tconn, "sock was shut down by peer\n");
 +              drbd_info(connection, "sock was shut down by peer\n");
        }
  
        if (rv != size)
 -              conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
 +              conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
  
  out:
        return rv;
  }
  
 -static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
 +static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
  {
        int err;
  
 -      err = drbd_recv(tconn, buf, size);
 +      err = drbd_recv(connection, buf, size);
        if (err != size) {
                if (err >= 0)
                        err = -EIO;
        return err;
  }
  
 -static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
 +static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
  {
        int err;
  
 -      err = drbd_recv_all(tconn, buf, size);
 +      err = drbd_recv_all(connection, buf, size);
        if (err && !signal_pending(current))
 -              conn_warn(tconn, "short read (expected size %d)\n", (int)size);
 +              drbd_warn(connection, "short read (expected size %d)\n", (int)size);
        return err;
  }
  
@@@ -564,7 -553,7 +554,7 @@@ static void drbd_setbufsize(struct sock
        }
  }
  
 -static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
 +static struct socket *drbd_try_connect(struct drbd_connection *connection)
  {
        const char *what;
        struct socket *sock;
        int disconnect_on_error = 1;
  
        rcu_read_lock();
 -      nc = rcu_dereference(tconn->net_conf);
 +      nc = rcu_dereference(connection->net_conf);
        if (!nc) {
                rcu_read_unlock();
                return NULL;
        connect_int = nc->connect_int;
        rcu_read_unlock();
  
 -      my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
 -      memcpy(&src_in6, &tconn->my_addr, my_addr_len);
 +      my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
 +      memcpy(&src_in6, &connection->my_addr, my_addr_len);
  
 -      if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
 +      if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
                src_in6.sin6_port = 0;
        else
                ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
  
 -      peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
 -      memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
 +      peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
 +      memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
  
        what = "sock_create_kern";
        err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
@@@ -643,17 -632,17 +633,17 @@@ out
                        disconnect_on_error = 0;
                        break;
                default:
 -                      conn_err(tconn, "%s failed, err = %d\n", what, err);
 +                      drbd_err(connection, "%s failed, err = %d\n", what, err);
                }
                if (disconnect_on_error)
 -                      conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +                      conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
        }
  
        return sock;
  }
  
  struct accept_wait_data {
 -      struct drbd_tconn *tconn;
 +      struct drbd_connection *connection;
        struct socket *s_listen;
        struct completion door_bell;
        void (*original_sk_state_change)(struct sock *sk);
@@@ -671,7 -660,7 +661,7 @@@ static void drbd_incoming_connection(st
        state_change(sk);
  }
  
 -static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad)
 +static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
  {
        int err, sndbuf_size, rcvbuf_size, my_addr_len;
        struct sockaddr_in6 my_addr;
        const char *what;
  
        rcu_read_lock();
 -      nc = rcu_dereference(tconn->net_conf);
 +      nc = rcu_dereference(connection->net_conf);
        if (!nc) {
                rcu_read_unlock();
                return -EIO;
        rcvbuf_size = nc->rcvbuf_size;
        rcu_read_unlock();
  
 -      my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
 -      memcpy(&my_addr, &tconn->my_addr, my_addr_len);
 +      my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
 +      memcpy(&my_addr, &connection->my_addr, my_addr_len);
  
        what = "sock_create_kern";
        err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
@@@ -726,8 -715,8 +716,8 @@@ out
                sock_release(s_listen);
        if (err < 0) {
                if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
 -                      conn_err(tconn, "%s failed, err = %d\n", what, err);
 -                      conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +                      drbd_err(connection, "%s failed, err = %d\n", what, err);
 +                      conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
                }
        }
  
@@@ -742,14 -731,14 +732,14 @@@ static void unregister_state_change(str
        write_unlock_bh(&sk->sk_callback_lock);
  }
  
 -static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad)
 +static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
  {
        int timeo, connect_int, err = 0;
        struct socket *s_estab = NULL;
        struct net_conf *nc;
  
        rcu_read_lock();
 -      nc = rcu_dereference(tconn->net_conf);
 +      nc = rcu_dereference(connection->net_conf);
        if (!nc) {
                rcu_read_unlock();
                return NULL;
        err = kernel_accept(ad->s_listen, &s_estab, 0);
        if (err < 0) {
                if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
 -                      conn_err(tconn, "accept failed, err = %d\n", err);
 -                      conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +                      drbd_err(connection, "accept failed, err = %d\n", err);
 +                      conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
                }
        }
  
        return s_estab;
  }
  
 -static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
 +static int decode_header(struct drbd_connection *, void *, struct packet_info *);
  
 -static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
 +static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
                             enum drbd_packet cmd)
  {
 -      if (!conn_prepare_command(tconn, sock))
 +      if (!conn_prepare_command(connection, sock))
                return -EIO;
 -      return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
 +      return conn_send_command(connection, sock, cmd, 0, NULL, 0);
  }
  
 -static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
 +static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
  {
 -      unsigned int header_size = drbd_header_size(tconn);
 +      unsigned int header_size = drbd_header_size(connection);
        struct packet_info pi;
        int err;
  
 -      err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
 +      err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
        if (err != header_size) {
                if (err >= 0)
                        err = -EIO;
                return err;
        }
 -      err = decode_header(tconn, tconn->data.rbuf, &pi);
 +      err = decode_header(connection, connection->data.rbuf, &pi);
        if (err)
                return err;
        return pi.cmd;
@@@ -831,29 -820,28 +821,29 @@@ static int drbd_socket_okay(struct sock
  }
  /* Gets called if a connection is established, or if a new minor gets created
     in a connection */
 -int drbd_connected(struct drbd_conf *mdev)
 +int drbd_connected(struct drbd_peer_device *peer_device)
  {
 +      struct drbd_device *device = peer_device->device;
        int err;
  
 -      atomic_set(&mdev->packet_seq, 0);
 -      mdev->peer_seq = 0;
 +      atomic_set(&device->packet_seq, 0);
 +      device->peer_seq = 0;
  
 -      mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
 -              &mdev->tconn->cstate_mutex :
 -              &mdev->own_state_mutex;
 +      device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
 +              &peer_device->connection->cstate_mutex :
 +              &device->own_state_mutex;
  
 -      err = drbd_send_sync_param(mdev);
 +      err = drbd_send_sync_param(peer_device);
        if (!err)
 -              err = drbd_send_sizes(mdev, 0, 0);
 +              err = drbd_send_sizes(peer_device, 0, 0);
        if (!err)
 -              err = drbd_send_uuids(mdev);
 +              err = drbd_send_uuids(peer_device);
        if (!err)
 -              err = drbd_send_current_state(mdev);
 -      clear_bit(USE_DEGR_WFC_T, &mdev->flags);
 -      clear_bit(RESIZE_PENDING, &mdev->flags);
 -      atomic_set(&mdev->ap_in_flight, 0);
 -      mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
 +              err = drbd_send_current_state(peer_device);
 +      clear_bit(USE_DEGR_WFC_T, &device->flags);
 +      clear_bit(RESIZE_PENDING, &device->flags);
 +      atomic_set(&device->ap_in_flight, 0);
 +      mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
        return err;
  }
  
   *     no point in trying again, please go standalone.
   *  -2 We do not have a network config...
   */
 -static int conn_connect(struct drbd_tconn *tconn)
 +static int conn_connect(struct drbd_connection *connection)
  {
        struct drbd_socket sock, msock;
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
        struct net_conf *nc;
        int vnr, timeout, h, ok;
        bool discard_my_data;
        enum drbd_state_rv rv;
        struct accept_wait_data ad = {
 -              .tconn = tconn,
 +              .connection = connection,
                .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
        };
  
 -      clear_bit(DISCONNECT_SENT, &tconn->flags);
 -      if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
 +      clear_bit(DISCONNECT_SENT, &connection->flags);
 +      if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
                return -2;
  
        mutex_init(&sock.mutex);
 -      sock.sbuf = tconn->data.sbuf;
 -      sock.rbuf = tconn->data.rbuf;
 +      sock.sbuf = connection->data.sbuf;
 +      sock.rbuf = connection->data.rbuf;
        sock.socket = NULL;
        mutex_init(&msock.mutex);
 -      msock.sbuf = tconn->meta.sbuf;
 -      msock.rbuf = tconn->meta.rbuf;
 +      msock.sbuf = connection->meta.sbuf;
 +      msock.rbuf = connection->meta.rbuf;
        msock.socket = NULL;
  
        /* Assume that the peer only understands protocol 80 until we know better.  */
 -      tconn->agreed_pro_version = 80;
 +      connection->agreed_pro_version = 80;
  
 -      if (prepare_listen_socket(tconn, &ad))
 +      if (prepare_listen_socket(connection, &ad))
                return 0;
  
        do {
                struct socket *s;
  
 -              s = drbd_try_connect(tconn);
 +              s = drbd_try_connect(connection);
                if (s) {
                        if (!sock.socket) {
                                sock.socket = s;
 -                              send_first_packet(tconn, &sock, P_INITIAL_DATA);
 +                              send_first_packet(connection, &sock, P_INITIAL_DATA);
                        } else if (!msock.socket) {
 -                              clear_bit(RESOLVE_CONFLICTS, &tconn->flags);
 +                              clear_bit(RESOLVE_CONFLICTS, &connection->flags);
                                msock.socket = s;
 -                              send_first_packet(tconn, &msock, P_INITIAL_META);
 +                              send_first_packet(connection, &msock, P_INITIAL_META);
                        } else {
 -                              conn_err(tconn, "Logic error in conn_connect()\n");
 +                              drbd_err(connection, "Logic error in conn_connect()\n");
                                goto out_release_sockets;
                        }
                }
  
                if (sock.socket && msock.socket) {
                        rcu_read_lock();
 -                      nc = rcu_dereference(tconn->net_conf);
 +                      nc = rcu_dereference(connection->net_conf);
                        timeout = nc->ping_timeo * HZ / 10;
                        rcu_read_unlock();
                        schedule_timeout_interruptible(timeout);
                }
  
  retry:
 -              s = drbd_wait_for_connect(tconn, &ad);
 +              s = drbd_wait_for_connect(connection, &ad);
                if (s) {
 -                      int fp = receive_first_packet(tconn, s);
 +                      int fp = receive_first_packet(connection, s);
                        drbd_socket_okay(&sock.socket);
                        drbd_socket_okay(&msock.socket);
                        switch (fp) {
                        case P_INITIAL_DATA:
                                if (sock.socket) {
 -                                      conn_warn(tconn, "initial packet S crossed\n");
 +                                      drbd_warn(connection, "initial packet S crossed\n");
                                        sock_release(sock.socket);
                                        sock.socket = s;
                                        goto randomize;
                                sock.socket = s;
                                break;
                        case P_INITIAL_META:
 -                              set_bit(RESOLVE_CONFLICTS, &tconn->flags);
 +                              set_bit(RESOLVE_CONFLICTS, &connection->flags);
                                if (msock.socket) {
 -                                      conn_warn(tconn, "initial packet M crossed\n");
 +                                      drbd_warn(connection, "initial packet M crossed\n");
                                        sock_release(msock.socket);
                                        msock.socket = s;
                                        goto randomize;
                                msock.socket = s;
                                break;
                        default:
 -                              conn_warn(tconn, "Error receiving initial packet\n");
 +                              drbd_warn(connection, "Error receiving initial packet\n");
                                sock_release(s);
  randomize:
                                if (prandom_u32() & 1)
                        }
                }
  
 -              if (tconn->cstate <= C_DISCONNECTING)
 +              if (connection->cstate <= C_DISCONNECTING)
                        goto out_release_sockets;
                if (signal_pending(current)) {
                        flush_signals(current);
                        smp_rmb();
 -                      if (get_t_state(&tconn->receiver) == EXITING)
 +                      if (get_t_state(&connection->receiver) == EXITING)
                                goto out_release_sockets;
                }
  
        msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
  
        /* NOT YET ...
 -       * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
 +       * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
         * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
         * first set it to the P_CONNECTION_FEATURES timeout,
         * which we set to 4x the configured ping_timeout. */
        rcu_read_lock();
 -      nc = rcu_dereference(tconn->net_conf);
 +      nc = rcu_dereference(connection->net_conf);
  
        sock.socket->sk->sk_sndtimeo =
        sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
        drbd_tcp_nodelay(sock.socket);
        drbd_tcp_nodelay(msock.socket);
  
 -      tconn->data.socket = sock.socket;
 -      tconn->meta.socket = msock.socket;
 -      tconn->last_received = jiffies;
 +      connection->data.socket = sock.socket;
 +      connection->meta.socket = msock.socket;
 +      connection->last_received = jiffies;
  
 -      h = drbd_do_features(tconn);
 +      h = drbd_do_features(connection);
        if (h <= 0)
                return h;
  
 -      if (tconn->cram_hmac_tfm) {
 -              /* drbd_request_state(mdev, NS(conn, WFAuth)); */
 -              switch (drbd_do_auth(tconn)) {
 +      if (connection->cram_hmac_tfm) {
 +              /* drbd_request_state(device, NS(conn, WFAuth)); */
 +              switch (drbd_do_auth(connection)) {
                case -1:
 -                      conn_err(tconn, "Authentication of peer failed\n");
 +                      drbd_err(connection, "Authentication of peer failed\n");
                        return -1;
                case 0:
 -                      conn_err(tconn, "Authentication of peer failed, trying again.\n");
 +                      drbd_err(connection, "Authentication of peer failed, trying again.\n");
                        return 0;
                }
        }
  
 -      tconn->data.socket->sk->sk_sndtimeo = timeout;
 -      tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
 +      connection->data.socket->sk->sk_sndtimeo = timeout;
 +      connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
  
 -      if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
 +      if (drbd_send_protocol(connection) == -EOPNOTSUPP)
                return -1;
  
 -      set_bit(STATE_SENT, &tconn->flags);
 +      set_bit(STATE_SENT, &connection->flags);
  
        rcu_read_lock();
 -      idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 -              kref_get(&mdev->kref);
 +      idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 +              struct drbd_device *device = peer_device->device;
 +              kref_get(&device->kref);
                rcu_read_unlock();
  
                /* Prevent a race between resync-handshake and
                 * drbd_set_role() is finished, and any incoming drbd_set_role
                 * will see the STATE_SENT flag, and wait for it to be cleared.
                 */
 -              mutex_lock(mdev->state_mutex);
 -              mutex_unlock(mdev->state_mutex);
 +              mutex_lock(device->state_mutex);
 +              mutex_unlock(device->state_mutex);
  
                if (discard_my_data)
 -                      set_bit(DISCARD_MY_DATA, &mdev->flags);
 +                      set_bit(DISCARD_MY_DATA, &device->flags);
                else
 -                      clear_bit(DISCARD_MY_DATA, &mdev->flags);
 +                      clear_bit(DISCARD_MY_DATA, &device->flags);
  
 -              drbd_connected(mdev);
 -              kref_put(&mdev->kref, &drbd_minor_destroy);
 +              drbd_connected(peer_device);
 +              kref_put(&device->kref, drbd_destroy_device);
                rcu_read_lock();
        }
        rcu_read_unlock();
  
 -      rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
 -      if (rv < SS_SUCCESS || tconn->cstate != C_WF_REPORT_PARAMS) {
 -              clear_bit(STATE_SENT, &tconn->flags);
 +      rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
 +      if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
 +              clear_bit(STATE_SENT, &connection->flags);
                return 0;
        }
  
 -      drbd_thread_start(&tconn->asender);
 +      drbd_thread_start(&connection->asender);
  
 -      mutex_lock(&tconn->conf_update);
 +      mutex_lock(&connection->resource->conf_update);
        /* The discard_my_data flag is a single-shot modifier to the next
         * connection attempt, the handshake of which is now well underway.
         * No need for rcu style copying of the whole struct
         * just to clear a single value. */
 -      tconn->net_conf->discard_my_data = 0;
 -      mutex_unlock(&tconn->conf_update);
 +      connection->net_conf->discard_my_data = 0;
 +      mutex_unlock(&connection->resource->conf_update);
  
        return h;
  
@@@ -1093,15 -1080,15 +1083,15 @@@ out_release_sockets
        return -1;
  }
  
 -static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
 +static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
  {
 -      unsigned int header_size = drbd_header_size(tconn);
 +      unsigned int header_size = drbd_header_size(connection);
  
        if (header_size == sizeof(struct p_header100) &&
            *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
                struct p_header100 *h = header;
                if (h->pad != 0) {
 -                      conn_err(tconn, "Header padding is not zero\n");
 +                      drbd_err(connection, "Header padding is not zero\n");
                        return -EINVAL;
                }
                pi->vnr = be16_to_cpu(h->volume);
                pi->size = be16_to_cpu(h->length);
                pi->vnr = 0;
        } else {
 -              conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
 +              drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
                         be32_to_cpu(*(__be32 *)header),
 -                       tconn->agreed_pro_version);
 +                       connection->agreed_pro_version);
                return -EINVAL;
        }
        pi->data = header + header_size;
        return 0;
  }
  
 -static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      void *buffer = tconn->data.rbuf;
 +      void *buffer = connection->data.rbuf;
        int err;
  
 -      err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
 +      err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
        if (err)
                return err;
  
 -      err = decode_header(tconn, buffer, pi);
 -      tconn->last_received = jiffies;
 +      err = decode_header(connection, buffer, pi);
 +      connection->last_received = jiffies;
  
        return err;
  }
  
 -static void drbd_flush(struct drbd_tconn *tconn)
 +static void drbd_flush(struct drbd_connection *connection)
  {
        int rv;
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
        int vnr;
  
 -      if (tconn->write_ordering >= WO_bdev_flush) {
 +      if (connection->write_ordering >= WO_bdev_flush) {
                rcu_read_lock();
 -              idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 -                      if (!get_ldev(mdev))
 +              idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 +                      struct drbd_device *device = peer_device->device;
 +
 +                      if (!get_ldev(device))
                                continue;
 -                      kref_get(&mdev->kref);
 +                      kref_get(&device->kref);
                        rcu_read_unlock();
  
 -                      rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
 +                      rv = blkdev_issue_flush(device->ldev->backing_bdev,
                                        GFP_NOIO, NULL);
                        if (rv) {
 -                              dev_info(DEV, "local disk flush failed with status %d\n", rv);
 +                              drbd_info(device, "local disk flush failed with status %d\n", rv);
                                /* would rather check on EOPNOTSUPP, but that is not reliable.
                                 * don't try again for ANY return value != 0
                                 * if (rv == -EOPNOTSUPP) */
 -                              drbd_bump_write_ordering(tconn, WO_drain_io);
 +                              drbd_bump_write_ordering(connection, WO_drain_io);
                        }
 -                      put_ldev(mdev);
 -                      kref_put(&mdev->kref, &drbd_minor_destroy);
 +                      put_ldev(device);
 +                      kref_put(&device->kref, drbd_destroy_device);
  
                        rcu_read_lock();
                        if (rv)
  
  /**
   * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
 - * @mdev:     DRBD device.
 + * @device:   DRBD device.
   * @epoch:    Epoch object.
   * @ev:               Epoch event.
   */
 -static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
 +static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
                                               struct drbd_epoch *epoch,
                                               enum epoch_event ev)
  {
        struct drbd_epoch *next_epoch;
        enum finish_epoch rv = FE_STILL_LIVE;
  
 -      spin_lock(&tconn->epoch_lock);
 +      spin_lock(&connection->epoch_lock);
        do {
                next_epoch = NULL;
  
                    atomic_read(&epoch->active) == 0 &&
                    (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
                        if (!(ev & EV_CLEANUP)) {
 -                              spin_unlock(&tconn->epoch_lock);
 -                              drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
 -                              spin_lock(&tconn->epoch_lock);
 +                              spin_unlock(&connection->epoch_lock);
 +                              drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
 +                              spin_lock(&connection->epoch_lock);
                        }
  #if 0
                        /* FIXME: dec unacked on connection, once we have
                         * something to count pending connection packets in. */
                        if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
 -                              dec_unacked(epoch->tconn);
 +                              dec_unacked(epoch->connection);
  #endif
  
 -                      if (tconn->current_epoch != epoch) {
 +                      if (connection->current_epoch != epoch) {
                                next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
                                list_del(&epoch->list);
                                ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
 -                              tconn->epochs--;
 +                              connection->epochs--;
                                kfree(epoch);
  
                                if (rv == FE_STILL_LIVE)
                epoch = next_epoch;
        } while (1);
  
 -      spin_unlock(&tconn->epoch_lock);
 +      spin_unlock(&connection->epoch_lock);
  
        return rv;
  }
  
  /**
   * drbd_bump_write_ordering() - Fall back to an other write ordering method
 - * @tconn:    DRBD connection.
 + * @connection:       DRBD connection.
   * @wo:               Write ordering method to try.
   */
 -void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
 +void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
  {
        struct disk_conf *dc;
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
        enum write_ordering_e pwo;
        int vnr;
        static char *write_ordering_str[] = {
                [WO_bdev_flush] = "flush",
        };
  
 -      pwo = tconn->write_ordering;
 +      pwo = connection->write_ordering;
        wo = min(pwo, wo);
        rcu_read_lock();
 -      idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 -              if (!get_ldev_if_state(mdev, D_ATTACHING))
 +      idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 +              struct drbd_device *device = peer_device->device;
 +
 +              if (!get_ldev_if_state(device, D_ATTACHING))
                        continue;
 -              dc = rcu_dereference(mdev->ldev->disk_conf);
 +              dc = rcu_dereference(device->ldev->disk_conf);
  
                if (wo == WO_bdev_flush && !dc->disk_flushes)
                        wo = WO_drain_io;
                if (wo == WO_drain_io && !dc->disk_drain)
                        wo = WO_none;
 -              put_ldev(mdev);
 +              put_ldev(device);
        }
        rcu_read_unlock();
 -      tconn->write_ordering = wo;
 -      if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
 -              conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
 +      connection->write_ordering = wo;
 +      if (pwo != connection->write_ordering || wo == WO_bdev_flush)
 +              drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
  }
  
  /**
   * drbd_submit_peer_request()
 - * @mdev:     DRBD device.
 + * @device:   DRBD device.
   * @peer_req: peer request
   * @rw:               flag field, see bio->bi_rw
   *
   *  on certain Xen deployments.
   */
  /* TODO allocate from our own bio_set. */
 -int drbd_submit_peer_request(struct drbd_conf *mdev,
 +int drbd_submit_peer_request(struct drbd_device *device,
                             struct drbd_peer_request *peer_req,
                             const unsigned rw, const int fault_type)
  {
  next_bio:
        bio = bio_alloc(GFP_NOIO, nr_pages);
        if (!bio) {
 -              dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
 +              drbd_err(device, "submit_ee: Allocation of a bio failed\n");
                goto fail;
        }
        /* > peer_req->i.sector, unless this is the first bio */
        bio->bi_iter.bi_sector = sector;
 -      bio->bi_bdev = mdev->ldev->backing_bdev;
 +      bio->bi_bdev = device->ldev->backing_bdev;
        bio->bi_rw = rw;
        bio->bi_private = peer_req;
        bio->bi_end_io = drbd_peer_request_endio;
                         * But in case it fails anyways,
                         * we deal with it, and complain (below). */
                        if (bio->bi_vcnt == 0) {
 -                              dev_err(DEV,
 +                              drbd_err(device,
                                        "bio_add_page failed for len=%u, "
                                        "bi_vcnt=0 (bi_sector=%llu)\n",
                                        len, (uint64_t)bio->bi_iter.bi_sector);
                sector += len >> 9;
                --nr_pages;
        }
 -      D_ASSERT(page == NULL);
 -      D_ASSERT(ds == 0);
 +      D_ASSERT(device, page == NULL);
 +      D_ASSERT(device, ds == 0);
  
        atomic_set(&peer_req->pending_bios, n_bios);
        do {
                bios = bios->bi_next;
                bio->bi_next = NULL;
  
 -              drbd_generic_make_request(mdev, fault_type, bio);
 +              drbd_generic_make_request(device, fault_type, bio);
        } while (bios);
        return 0;
  
@@@ -1392,44 -1375,36 +1382,44 @@@ fail
        return err;
  }
  
 -static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
 +static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
                                             struct drbd_peer_request *peer_req)
  {
        struct drbd_interval *i = &peer_req->i;
  
 -      drbd_remove_interval(&mdev->write_requests, i);
 +      drbd_remove_interval(&device->write_requests, i);
        drbd_clear_interval(i);
  
        /* Wake up any processes waiting for this peer request to complete.  */
        if (i->waiting)
 -              wake_up(&mdev->misc_wait);
 +              wake_up(&device->misc_wait);
  }
  
 -void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
 +static void conn_wait_active_ee_empty(struct drbd_connection *connection)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
        int vnr;
  
        rcu_read_lock();
 -      idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 -              kref_get(&mdev->kref);
 +      idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 +              struct drbd_device *device = peer_device->device;
 +
 +              kref_get(&device->kref);
                rcu_read_unlock();
 -              drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
 -              kref_put(&mdev->kref, &drbd_minor_destroy);
 +              drbd_wait_ee_list_empty(device, &device->active_ee);
 +              kref_put(&device->kref, drbd_destroy_device);
                rcu_read_lock();
        }
        rcu_read_unlock();
  }
  
 -static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
 +static struct drbd_peer_device *
 +conn_peer_device(struct drbd_connection *connection, int volume_number)
 +{
 +      return idr_find(&connection->peer_devices, volume_number);
 +}
 +
 +static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
  {
        int rv;
        struct p_barrier *p = pi->data;
        /* FIXME these are unacked on connection,
         * not a specific (peer)device.
         */
 -      tconn->current_epoch->barrier_nr = p->barrier;
 -      tconn->current_epoch->tconn = tconn;
 -      rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
 +      connection->current_epoch->barrier_nr = p->barrier;
 +      connection->current_epoch->connection = connection;
 +      rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
  
        /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
         * the activity log, which means it would not be resynced in case the
         * R_PRIMARY crashes now.
         * Therefore we must send the barrier_ack after the barrier request was
         * completed. */
 -      switch (tconn->write_ordering) {
 +      switch (connection->write_ordering) {
        case WO_none:
                if (rv == FE_RECYCLED)
                        return 0;
                if (epoch)
                        break;
                else
 -                      conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
 +                      drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
                        /* Fall through */
  
        case WO_bdev_flush:
        case WO_drain_io:
 -              conn_wait_active_ee_empty(tconn);
 -              drbd_flush(tconn);
 +              conn_wait_active_ee_empty(connection);
 +              drbd_flush(connection);
  
 -              if (atomic_read(&tconn->current_epoch->epoch_size)) {
 +              if (atomic_read(&connection->current_epoch->epoch_size)) {
                        epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
                        if (epoch)
                                break;
  
                return 0;
        default:
 -              conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
 +              drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
                return -EIO;
        }
  
        atomic_set(&epoch->epoch_size, 0);
        atomic_set(&epoch->active, 0);
  
 -      spin_lock(&tconn->epoch_lock);
 -      if (atomic_read(&tconn->current_epoch->epoch_size)) {
 -              list_add(&epoch->list, &tconn->current_epoch->list);
 -              tconn->current_epoch = epoch;
 -              tconn->epochs++;
 +      spin_lock(&connection->epoch_lock);
 +      if (atomic_read(&connection->current_epoch->epoch_size)) {
 +              list_add(&epoch->list, &connection->current_epoch->list);
 +              connection->current_epoch = epoch;
 +              connection->epochs++;
        } else {
                /* The current_epoch got recycled while we allocated this one... */
                kfree(epoch);
        }
 -      spin_unlock(&tconn->epoch_lock);
 +      spin_unlock(&connection->epoch_lock);
  
        return 0;
  }
  /* used from receive_RSDataReply (recv_resync_read)
   * and from receive_Data */
  static struct drbd_peer_request *
 -read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
 +read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
              int data_size) __must_hold(local)
  {
 -      const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
 +      struct drbd_device *device = peer_device->device;
 +      const sector_t capacity = drbd_get_capacity(device->this_bdev);
        struct drbd_peer_request *peer_req;
        struct page *page;
        int dgs, ds, err;
 -      void *dig_in = mdev->tconn->int_dig_in;
 -      void *dig_vv = mdev->tconn->int_dig_vv;
 +      void *dig_in = peer_device->connection->int_dig_in;
 +      void *dig_vv = peer_device->connection->int_dig_vv;
        unsigned long *data;
  
        dgs = 0;
 -      if (mdev->tconn->peer_integrity_tfm) {
 -              dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
 +      if (peer_device->connection->peer_integrity_tfm) {
 +              dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
                /*
                 * FIXME: Receive the incoming digest into the receive buffer
                 *        here, together with its struct p_data?
                 */
 -              err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
 +              err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
                if (err)
                        return NULL;
                data_size -= dgs;
        /* even though we trust out peer,
         * we sometimes have to double check. */
        if (sector + (data_size>>9) > capacity) {
 -              dev_err(DEV, "request from peer beyond end of local disk: "
 +              drbd_err(device, "request from peer beyond end of local disk: "
                        "capacity: %llus < sector: %llus + size: %u\n",
                        (unsigned long long)capacity,
                        (unsigned long long)sector, data_size);
        /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
         * "criss-cross" setup, that might cause write-out on some other DRBD,
         * which in turn might block on the other node at this very place.  */
 -      peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
 +      peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO);
        if (!peer_req)
                return NULL;
  
        page_chain_for_each(page) {
                unsigned len = min_t(int, ds, PAGE_SIZE);
                data = kmap(page);
 -              err = drbd_recv_all_warn(mdev->tconn, data, len);
 -              if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
 -                      dev_err(DEV, "Fault injection: Corrupting data on receive\n");
 +              err = drbd_recv_all_warn(peer_device->connection, data, len);
 +              if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
 +                      drbd_err(device, "Fault injection: Corrupting data on receive\n");
                        data[0] = data[0] ^ (unsigned long)-1;
                }
                kunmap(page);
                if (err) {
 -                      drbd_free_peer_req(mdev, peer_req);
 +                      drbd_free_peer_req(device, peer_req);
                        return NULL;
                }
                ds -= len;
        }
  
        if (dgs) {
 -              drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
 +              drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
                if (memcmp(dig_in, dig_vv, dgs)) {
 -                      dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
 +                      drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
                                (unsigned long long)sector, data_size);
 -                      drbd_free_peer_req(mdev, peer_req);
 +                      drbd_free_peer_req(device, peer_req);
                        return NULL;
                }
        }
 -      mdev->recv_cnt += data_size>>9;
 +      device->recv_cnt += data_size>>9;
        return peer_req;
  }
  
  /* drbd_drain_block() just takes a data block
   * out of the socket input buffer, and discards it.
   */
 -static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
 +static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
  {
        struct page *page;
        int err = 0;
        if (!data_size)
                return 0;
  
 -      page = drbd_alloc_pages(mdev, 1, 1);
 +      page = drbd_alloc_pages(peer_device, 1, 1);
  
        data = kmap(page);
        while (data_size) {
                unsigned int len = min_t(int, data_size, PAGE_SIZE);
  
 -              err = drbd_recv_all_warn(mdev->tconn, data, len);
 +              err = drbd_recv_all_warn(peer_device->connection, data, len);
                if (err)
                        break;
                data_size -= len;
        }
        kunmap(page);
 -      drbd_free_pages(mdev, page, 0);
 +      drbd_free_pages(peer_device->device, page, 0);
        return err;
  }
  
 -static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
 +static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
                           sector_t sector, int data_size)
  {
        struct bio_vec bvec;
        struct bvec_iter iter;
        struct bio *bio;
        int dgs, err, expect;
 -      void *dig_in = mdev->tconn->int_dig_in;
 -      void *dig_vv = mdev->tconn->int_dig_vv;
 +      void *dig_in = peer_device->connection->int_dig_in;
 +      void *dig_vv = peer_device->connection->int_dig_vv;
  
        dgs = 0;
 -      if (mdev->tconn->peer_integrity_tfm) {
 -              dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
 -              err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
 +      if (peer_device->connection->peer_integrity_tfm) {
 +              dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
 +              err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
                if (err)
                        return err;
                data_size -= dgs;
  
        /* optimistically update recv_cnt.  if receiving fails below,
         * we disconnect anyways, and counters will be reset. */
 -      mdev->recv_cnt += data_size>>9;
 +      peer_device->device->recv_cnt += data_size>>9;
  
        bio = req->master_bio;
 -      D_ASSERT(sector == bio->bi_iter.bi_sector);
 +      D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
  
        bio_for_each_segment(bvec, bio, iter) {
                void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
                expect = min_t(int, data_size, bvec.bv_len);
 -              err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
 +              err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
                kunmap(bvec.bv_page);
                if (err)
                        return err;
        }
  
        if (dgs) {
 -              drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
 +              drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
                if (memcmp(dig_in, dig_vv, dgs)) {
 -                      dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
 +                      drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
                        return -EINVAL;
                }
        }
  
 -      D_ASSERT(data_size == 0);
 +      D_ASSERT(peer_device->device, data_size == 0);
        return 0;
  }
  
@@@ -1664,67 -1638,64 +1654,67 @@@ static int e_end_resync_block(struct dr
  {
        struct drbd_peer_request *peer_req =
                container_of(w, struct drbd_peer_request, w);
 -      struct drbd_conf *mdev = w->mdev;
 +      struct drbd_peer_device *peer_device = peer_req->peer_device;
 +      struct drbd_device *device = peer_device->device;
        sector_t sector = peer_req->i.sector;
        int err;
  
 -      D_ASSERT(drbd_interval_empty(&peer_req->i));
 +      D_ASSERT(device, drbd_interval_empty(&peer_req->i));
  
        if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 -              drbd_set_in_sync(mdev, sector, peer_req->i.size);
 -              err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
 +              drbd_set_in_sync(device, sector, peer_req->i.size);
 +              err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
        } else {
                /* Record failure to sync */
 -              drbd_rs_failed_io(mdev, sector, peer_req->i.size);
 +              drbd_rs_failed_io(device, sector, peer_req->i.size);
  
 -              err  = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
 +              err  = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
        }
 -      dec_unacked(mdev);
 +      dec_unacked(device);
  
        return err;
  }
  
 -static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
 +static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
 +                          int data_size) __releases(local)
  {
 +      struct drbd_device *device = peer_device->device;
        struct drbd_peer_request *peer_req;
  
 -      peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
 +      peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size);
        if (!peer_req)
                goto fail;
  
 -      dec_rs_pending(mdev);
 +      dec_rs_pending(device);
  
 -      inc_unacked(mdev);
 +      inc_unacked(device);
        /* corresponding dec_unacked() in e_end_resync_block()
         * respective _drbd_clear_done_ee */
  
        peer_req->w.cb = e_end_resync_block;
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      list_add(&peer_req->w.list, &mdev->sync_ee);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_lock_irq(&device->resource->req_lock);
 +      list_add(&peer_req->w.list, &device->sync_ee);
 +      spin_unlock_irq(&device->resource->req_lock);
  
 -      atomic_add(data_size >> 9, &mdev->rs_sect_ev);
 -      if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
 +      atomic_add(data_size >> 9, &device->rs_sect_ev);
 +      if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
                return 0;
  
        /* don't care for the reason here */
 -      dev_err(DEV, "submit failed, triggering re-connect\n");
 -      spin_lock_irq(&mdev->tconn->req_lock);
 +      drbd_err(device, "submit failed, triggering re-connect\n");
 +      spin_lock_irq(&device->resource->req_lock);
        list_del(&peer_req->w.list);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_unlock_irq(&device->resource->req_lock);
  
 -      drbd_free_peer_req(mdev, peer_req);
 +      drbd_free_peer_req(device, peer_req);
  fail:
 -      put_ldev(mdev);
 +      put_ldev(device);
        return -EIO;
  }
  
  static struct drbd_request *
 -find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
 +find_request(struct drbd_device *device, struct rb_root *root, u64 id,
             sector_t sector, bool missing_ok, const char *func)
  {
        struct drbd_request *req;
        if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
                return req;
        if (!missing_ok) {
 -              dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
 +              drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
                        (unsigned long)id, (unsigned long long)sector);
        }
        return NULL;
  }
  
 -static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct drbd_request *req;
        sector_t sector;
        int err;
        struct p_data *p = pi->data;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
        sector = be64_to_cpu(p->sector);
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_lock_irq(&device->resource->req_lock);
 +      req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
 +      spin_unlock_irq(&device->resource->req_lock);
        if (unlikely(!req))
                return -EIO;
  
        /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
         * special casing it there for the various failure cases.
         * still no race with drbd_fail_pending_reads */
 -      err = recv_dless_read(mdev, req, sector, pi->size);
 +      err = recv_dless_read(peer_device, req, sector, pi->size);
        if (!err)
                req_mod(req, DATA_RECEIVED);
        /* else: nothing. handled from drbd_disconnect...
        return err;
  }
  
 -static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        sector_t sector;
        int err;
        struct p_data *p = pi->data;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
        sector = be64_to_cpu(p->sector);
 -      D_ASSERT(p->block_id == ID_SYNCER);
 +      D_ASSERT(device, p->block_id == ID_SYNCER);
  
 -      if (get_ldev(mdev)) {
 +      if (get_ldev(device)) {
                /* data is submitted to disk within recv_resync_read.
                 * corresponding put_ldev done below on error,
                 * or in drbd_peer_request_endio. */
 -              err = recv_resync_read(mdev, sector, pi->size);
 +              err = recv_resync_read(peer_device, sector, pi->size);
        } else {
                if (__ratelimit(&drbd_ratelimit_state))
 -                      dev_err(DEV, "Can not write resync data to local disk.\n");
 +                      drbd_err(device, "Can not write resync data to local disk.\n");
  
 -              err = drbd_drain_block(mdev, pi->size);
 +              err = drbd_drain_block(peer_device, pi->size);
  
 -              drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
 +              drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
        }
  
 -      atomic_add(pi->size >> 9, &mdev->rs_sect_in);
 +      atomic_add(pi->size >> 9, &device->rs_sect_in);
  
        return err;
  }
  
 -static void restart_conflicting_writes(struct drbd_conf *mdev,
 +static void restart_conflicting_writes(struct drbd_device *device,
                                       sector_t sector, int size)
  {
        struct drbd_interval *i;
        struct drbd_request *req;
  
 -      drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
 +      drbd_for_each_overlap(i, &device->write_requests, sector, size) {
                if (!i->local)
                        continue;
                req = container_of(i, struct drbd_request, i);
@@@ -1836,53 -1803,52 +1826,53 @@@ static int e_end_block(struct drbd_wor
  {
        struct drbd_peer_request *peer_req =
                container_of(w, struct drbd_peer_request, w);
 -      struct drbd_conf *mdev = w->mdev;
 +      struct drbd_peer_device *peer_device = peer_req->peer_device;
 +      struct drbd_device *device = peer_device->device;
        sector_t sector = peer_req->i.sector;
        int err = 0, pcmd;
  
        if (peer_req->flags & EE_SEND_WRITE_ACK) {
                if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
 -                      pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
 -                              mdev->state.conn <= C_PAUSED_SYNC_T &&
 +                      pcmd = (device->state.conn >= C_SYNC_SOURCE &&
 +                              device->state.conn <= C_PAUSED_SYNC_T &&
                                peer_req->flags & EE_MAY_SET_IN_SYNC) ?
                                P_RS_WRITE_ACK : P_WRITE_ACK;
 -                      err = drbd_send_ack(mdev, pcmd, peer_req);
 +                      err = drbd_send_ack(peer_device, pcmd, peer_req);
                        if (pcmd == P_RS_WRITE_ACK)
 -                              drbd_set_in_sync(mdev, sector, peer_req->i.size);
 +                              drbd_set_in_sync(device, sector, peer_req->i.size);
                } else {
 -                      err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
 +                      err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
                        /* we expect it to be marked out of sync anyways...
                         * maybe assert this?  */
                }
 -              dec_unacked(mdev);
 +              dec_unacked(device);
        }
        /* we delete from the conflict detection hash _after_ we sent out the
         * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right.  */
        if (peer_req->flags & EE_IN_INTERVAL_TREE) {
 -              spin_lock_irq(&mdev->tconn->req_lock);
 -              D_ASSERT(!drbd_interval_empty(&peer_req->i));
 -              drbd_remove_epoch_entry_interval(mdev, peer_req);
 +              spin_lock_irq(&device->resource->req_lock);
 +              D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
 +              drbd_remove_epoch_entry_interval(device, peer_req);
                if (peer_req->flags & EE_RESTART_REQUESTS)
 -                      restart_conflicting_writes(mdev, sector, peer_req->i.size);
 -              spin_unlock_irq(&mdev->tconn->req_lock);
 +                      restart_conflicting_writes(device, sector, peer_req->i.size);
 +              spin_unlock_irq(&device->resource->req_lock);
        } else
 -              D_ASSERT(drbd_interval_empty(&peer_req->i));
 +              D_ASSERT(device, drbd_interval_empty(&peer_req->i));
  
 -      drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
 +      drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
  
        return err;
  }
  
  static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
  {
 -      struct drbd_conf *mdev = w->mdev;
        struct drbd_peer_request *peer_req =
                container_of(w, struct drbd_peer_request, w);
 +      struct drbd_peer_device *peer_device = peer_req->peer_device;
        int err;
  
 -      err = drbd_send_ack(mdev, ack, peer_req);
 -      dec_unacked(mdev);
 +      err = drbd_send_ack(peer_device, ack, peer_req);
 +      dec_unacked(peer_device->device);
  
        return err;
  }
@@@ -1894,11 -1860,9 +1884,11 @@@ static int e_send_superseded(struct drb
  
  static int e_send_retry_write(struct drbd_work *w, int unused)
  {
 -      struct drbd_tconn *tconn = w->mdev->tconn;
 +      struct drbd_peer_request *peer_req =
 +              container_of(w, struct drbd_peer_request, w);
 +      struct drbd_connection *connection = peer_req->peer_device->connection;
  
 -      return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
 +      return e_send_ack(w, connection->agreed_pro_version >= 100 ?
                             P_RETRY_WRITE : P_SUPERSEDED);
  }
  
@@@ -1917,19 -1881,18 +1907,19 @@@ static u32 seq_max(u32 a, u32 b
        return seq_greater(a, b) ? a : b;
  }
  
 -static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
 +static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
  {
 +      struct drbd_device *device = peer_device->device;
        unsigned int newest_peer_seq;
  
 -      if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) {
 -              spin_lock(&mdev->peer_seq_lock);
 -              newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
 -              mdev->peer_seq = newest_peer_seq;
 -              spin_unlock(&mdev->peer_seq_lock);
 -              /* wake up only if we actually changed mdev->peer_seq */
 +      if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
 +              spin_lock(&device->peer_seq_lock);
 +              newest_peer_seq = seq_max(device->peer_seq, peer_seq);
 +              device->peer_seq = newest_peer_seq;
 +              spin_unlock(&device->peer_seq_lock);
 +              /* wake up only if we actually changed device->peer_seq */
                if (peer_seq == newest_peer_seq)
 -                      wake_up(&mdev->seq_wait);
 +                      wake_up(&device->seq_wait);
        }
  }
  
@@@ -1939,20 -1902,20 +1929,20 @@@ static inline int overlaps(sector_t s1
  }
  
  /* maybe change sync_ee into interval trees as well? */
 -static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
 +static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
  {
        struct drbd_peer_request *rs_req;
        bool rv = 0;
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
 +      spin_lock_irq(&device->resource->req_lock);
 +      list_for_each_entry(rs_req, &device->sync_ee, w.list) {
                if (overlaps(peer_req->i.sector, peer_req->i.size,
                             rs_req->i.sector, rs_req->i.size)) {
                        rv = 1;
                        break;
                }
        }
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_unlock_irq(&device->resource->req_lock);
  
        return rv;
  }
   *
   * Note: we don't care for Ack packets overtaking P_DATA packets.
   *
 - * In case packet_seq is larger than mdev->peer_seq number, there are
 + * In case packet_seq is larger than device->peer_seq number, there are
   * outstanding packets on the msock. We wait for them to arrive.
 - * In case we are the logically next packet, we update mdev->peer_seq
 + * In case we are the logically next packet, we update device->peer_seq
   * ourselves. Correctly handles 32bit wrap around.
   *
   * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
   *
   * returns 0 if we may process the packet,
   * -ERESTARTSYS if we were interrupted (by disconnect signal). */
 -static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
 +static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
  {
 +      struct drbd_device *device = peer_device->device;
        DEFINE_WAIT(wait);
        long timeout;
        int ret = 0, tp;
  
 -      if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags))
 +      if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
                return 0;
  
 -      spin_lock(&mdev->peer_seq_lock);
 +      spin_lock(&device->peer_seq_lock);
        for (;;) {
 -              if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
 -                      mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
 +              if (!seq_greater(peer_seq - 1, device->peer_seq)) {
 +                      device->peer_seq = seq_max(device->peer_seq, peer_seq);
                        break;
                }
  
                }
  
                rcu_read_lock();
 -              tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
 +              tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
                rcu_read_unlock();
  
                if (!tp)
                        break;
  
                /* Only need to wait if two_primaries is enabled */
 -              prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
 -              spin_unlock(&mdev->peer_seq_lock);
 +              prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
 +              spin_unlock(&device->peer_seq_lock);
                rcu_read_lock();
 -              timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
 +              timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
                rcu_read_unlock();
                timeout = schedule_timeout(timeout);
 -              spin_lock(&mdev->peer_seq_lock);
 +              spin_lock(&device->peer_seq_lock);
                if (!timeout) {
                        ret = -ETIMEDOUT;
 -                      dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
 +                      drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
                        break;
                }
        }
 -      spin_unlock(&mdev->peer_seq_lock);
 -      finish_wait(&mdev->seq_wait, &wait);
 +      spin_unlock(&device->peer_seq_lock);
 +      finish_wait(&device->seq_wait, &wait);
        return ret;
  }
  
  /* see also bio_flags_to_wire()
   * DRBD_REQ_*, because we need to semantically map the flags to data packet
   * flags and back. We may replicate to other kernel versions. */
 -static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
 +static unsigned long wire_flags_to_bio(u32 dpf)
  {
        return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
                (dpf & DP_FUA ? REQ_FUA : 0) |
                (dpf & DP_DISCARD ? REQ_DISCARD : 0);
  }
  
 -static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
 +static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
                                    unsigned int size)
  {
        struct drbd_interval *i;
  
      repeat:
 -      drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
 +      drbd_for_each_overlap(i, &device->write_requests, sector, size) {
                struct drbd_request *req;
                struct bio_and_error m;
  
                        continue;
                req->rq_state &= ~RQ_POSTPONED;
                __req_mod(req, NEG_ACKED, &m);
 -              spin_unlock_irq(&mdev->tconn->req_lock);
 +              spin_unlock_irq(&device->resource->req_lock);
                if (m.bio)
 -                      complete_master_bio(mdev, &m);
 -              spin_lock_irq(&mdev->tconn->req_lock);
 +                      complete_master_bio(device, &m);
 +              spin_lock_irq(&device->resource->req_lock);
                goto repeat;
        }
  }
  
 -static int handle_write_conflicts(struct drbd_conf *mdev,
 +static int handle_write_conflicts(struct drbd_device *device,
                                  struct drbd_peer_request *peer_req)
  {
 -      struct drbd_tconn *tconn = mdev->tconn;
 -      bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags);
 +      struct drbd_connection *connection = peer_req->peer_device->connection;
 +      bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
        sector_t sector = peer_req->i.sector;
        const unsigned int size = peer_req->i.size;
        struct drbd_interval *i;
         * Inserting the peer request into the write_requests tree will prevent
         * new conflicting local requests from being added.
         */
 -      drbd_insert_interval(&mdev->write_requests, &peer_req->i);
 +      drbd_insert_interval(&device->write_requests, &peer_req->i);
  
      repeat:
 -      drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
 +      drbd_for_each_overlap(i, &device->write_requests, sector, size) {
                if (i == &peer_req->i)
                        continue;
  
                         * should not happen in a two-node setup.  Wait for the
                         * earlier peer request to complete.
                         */
 -                      err = drbd_wait_misc(mdev, i);
 +                      err = drbd_wait_misc(device, i);
                        if (err)
                                goto out;
                        goto repeat;
                                       (i->size >> 9) >= sector + (size >> 9);
  
                        if (!equal)
 -                              dev_alert(DEV, "Concurrent writes detected: "
 +                              drbd_alert(device, "Concurrent writes detected: "
                                               "local=%llus +%u, remote=%llus +%u, "
                                               "assuming %s came first\n",
                                          (unsigned long long)i->sector, i->size,
                                          (unsigned long long)sector, size,
                                          superseded ? "local" : "remote");
  
 -                      inc_unacked(mdev);
 +                      inc_unacked(device);
                        peer_req->w.cb = superseded ? e_send_superseded :
                                                   e_send_retry_write;
 -                      list_add_tail(&peer_req->w.list, &mdev->done_ee);
 -                      wake_asender(mdev->tconn);
 +                      list_add_tail(&peer_req->w.list, &device->done_ee);
 +                      wake_asender(connection);
  
                        err = -ENOENT;
                        goto out;
                                container_of(i, struct drbd_request, i);
  
                        if (!equal)
 -                              dev_alert(DEV, "Concurrent writes detected: "
 +                              drbd_alert(device, "Concurrent writes detected: "
                                               "local=%llus +%u, remote=%llus +%u\n",
                                          (unsigned long long)i->sector, i->size,
                                          (unsigned long long)sector, size);
                                 * request to finish locally before submitting
                                 * the conflicting peer request.
                                 */
 -                              err = drbd_wait_misc(mdev, &req->i);
 +                              err = drbd_wait_misc(device, &req->i);
                                if (err) {
 -                                      _conn_request_state(mdev->tconn,
 -                                                          NS(conn, C_TIMEOUT),
 -                                                          CS_HARD);
 -                                      fail_postponed_requests(mdev, sector, size);
 +                                      _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
 +                                      fail_postponed_requests(device, sector, size);
                                        goto out;
                                }
                                goto repeat;
  
      out:
        if (err)
 -              drbd_remove_epoch_entry_interval(mdev, peer_req);
 +              drbd_remove_epoch_entry_interval(device, peer_req);
        return err;
  }
  
  /* mirrored write */
 -static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        sector_t sector;
        struct drbd_peer_request *peer_req;
        struct p_data *p = pi->data;
        u32 dp_flags;
        int err, tp;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      if (!get_ldev(mdev)) {
 +      if (!get_ldev(device)) {
                int err2;
  
 -              err = wait_for_and_update_peer_seq(mdev, peer_seq);
 -              drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
 -              atomic_inc(&tconn->current_epoch->epoch_size);
 -              err2 = drbd_drain_block(mdev, pi->size);
 +              err = wait_for_and_update_peer_seq(peer_device, peer_seq);
 +              drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
 +              atomic_inc(&connection->current_epoch->epoch_size);
 +              err2 = drbd_drain_block(peer_device, pi->size);
                if (!err)
                        err = err2;
                return err;
         */
  
        sector = be64_to_cpu(p->sector);
 -      peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
 +      peer_req = read_in_block(peer_device, p->block_id, sector, pi->size);
        if (!peer_req) {
 -              put_ldev(mdev);
 +              put_ldev(device);
                return -EIO;
        }
  
        peer_req->w.cb = e_end_block;
  
        dp_flags = be32_to_cpu(p->dp_flags);
 -      rw |= wire_flags_to_bio(mdev, dp_flags);
 +      rw |= wire_flags_to_bio(dp_flags);
        if (peer_req->pages == NULL) {
 -              D_ASSERT(peer_req->i.size == 0);
 -              D_ASSERT(dp_flags & DP_FLUSH);
 +              D_ASSERT(device, peer_req->i.size == 0);
 +              D_ASSERT(device, dp_flags & DP_FLUSH);
        }
  
        if (dp_flags & DP_MAY_SET_IN_SYNC)
                peer_req->flags |= EE_MAY_SET_IN_SYNC;
  
 -      spin_lock(&tconn->epoch_lock);
 -      peer_req->epoch = tconn->current_epoch;
 +      spin_lock(&connection->epoch_lock);
 +      peer_req->epoch = connection->current_epoch;
        atomic_inc(&peer_req->epoch->epoch_size);
        atomic_inc(&peer_req->epoch->active);
 -      spin_unlock(&tconn->epoch_lock);
 +      spin_unlock(&connection->epoch_lock);
  
        rcu_read_lock();
 -      tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
 +      tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
        rcu_read_unlock();
        if (tp) {
                peer_req->flags |= EE_IN_INTERVAL_TREE;
 -              err = wait_for_and_update_peer_seq(mdev, peer_seq);
 +              err = wait_for_and_update_peer_seq(peer_device, peer_seq);
                if (err)
                        goto out_interrupted;
 -              spin_lock_irq(&mdev->tconn->req_lock);
 -              err = handle_write_conflicts(mdev, peer_req);
 +              spin_lock_irq(&device->resource->req_lock);
 +              err = handle_write_conflicts(device, peer_req);
                if (err) {
 -                      spin_unlock_irq(&mdev->tconn->req_lock);
 +                      spin_unlock_irq(&device->resource->req_lock);
                        if (err == -ENOENT) {
 -                              put_ldev(mdev);
 +                              put_ldev(device);
                                return 0;
                        }
                        goto out_interrupted;
                }
        } else {
 -              update_peer_seq(mdev, peer_seq);
 -              spin_lock_irq(&mdev->tconn->req_lock);
 +              update_peer_seq(peer_device, peer_seq);
 +              spin_lock_irq(&device->resource->req_lock);
        }
 -      list_add(&peer_req->w.list, &mdev->active_ee);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      list_add(&peer_req->w.list, &device->active_ee);
 +      spin_unlock_irq(&device->resource->req_lock);
  
 -      if (mdev->state.conn == C_SYNC_TARGET)
 -              wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req));
 +      if (device->state.conn == C_SYNC_TARGET)
 +              wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
  
 -      if (mdev->tconn->agreed_pro_version < 100) {
 +      if (peer_device->connection->agreed_pro_version < 100) {
                rcu_read_lock();
 -              switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
 +              switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
                case DRBD_PROT_C:
                        dp_flags |= DP_SEND_WRITE_ACK;
                        break;
  
        if (dp_flags & DP_SEND_WRITE_ACK) {
                peer_req->flags |= EE_SEND_WRITE_ACK;
 -              inc_unacked(mdev);
 +              inc_unacked(device);
                /* corresponding dec_unacked() in e_end_block()
                 * respective _drbd_clear_done_ee */
        }
        if (dp_flags & DP_SEND_RECEIVE_ACK) {
                /* I really don't like it that the receiver thread
                 * sends on the msock, but anyways */
 -              drbd_send_ack(mdev, P_RECV_ACK, peer_req);
 +              drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
        }
  
 -      if (mdev->state.pdsk < D_INCONSISTENT) {
 +      if (device->state.pdsk < D_INCONSISTENT) {
                /* In case we have the only disk of the cluster, */
 -              drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
 +              drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
                peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
                peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
 -              drbd_al_begin_io(mdev, &peer_req->i, true);
 +              drbd_al_begin_io(device, &peer_req->i, true);
        }
  
 -      err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
 +      err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
        if (!err)
                return 0;
  
        /* don't care for the reason here */
 -      dev_err(DEV, "submit failed, triggering re-connect\n");
 -      spin_lock_irq(&mdev->tconn->req_lock);
 +      drbd_err(device, "submit failed, triggering re-connect\n");
 +      spin_lock_irq(&device->resource->req_lock);
        list_del(&peer_req->w.list);
 -      drbd_remove_epoch_entry_interval(mdev, peer_req);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      drbd_remove_epoch_entry_interval(device, peer_req);
 +      spin_unlock_irq(&device->resource->req_lock);
        if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
 -              drbd_al_complete_io(mdev, &peer_req->i);
 +              drbd_al_complete_io(device, &peer_req->i);
  
  out_interrupted:
 -      drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
 -      put_ldev(mdev);
 -      drbd_free_peer_req(mdev, peer_req);
 +      drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
 +      put_ldev(device);
 +      drbd_free_peer_req(device, peer_req);
        return err;
  }
  
   * The current sync rate used here uses only the most recent two step marks,
   * to have a short time average so we can react faster.
   */
 -int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
 +int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
  {
 -      struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
 +      struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
        unsigned long db, dt, dbdt;
        struct lc_element *tmp;
        int curr_events;
        unsigned int c_min_rate;
  
        rcu_read_lock();
 -      c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
 +      c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
        rcu_read_unlock();
  
        /* feature disabled? */
        if (c_min_rate == 0)
                return 0;
  
 -      spin_lock_irq(&mdev->al_lock);
 -      tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
 +      spin_lock_irq(&device->al_lock);
 +      tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
        if (tmp) {
                struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
                if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
 -                      spin_unlock_irq(&mdev->al_lock);
 +                      spin_unlock_irq(&device->al_lock);
                        return 0;
                }
                /* Do not slow down if app IO is already waiting for this extent */
        }
 -      spin_unlock_irq(&mdev->al_lock);
 +      spin_unlock_irq(&device->al_lock);
  
        curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
                      (int)part_stat_read(&disk->part0, sectors[1]) -
 -                      atomic_read(&mdev->rs_sect_ev);
 +                      atomic_read(&device->rs_sect_ev);
  
 -      if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
 +      if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
                unsigned long rs_left;
                int i;
  
 -              mdev->rs_last_events = curr_events;
 +              device->rs_last_events = curr_events;
  
                /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
                 * approx. */
 -              i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
 +              i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
  
 -              if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
 -                      rs_left = mdev->ov_left;
 +              if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
 +                      rs_left = device->ov_left;
                else
 -                      rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
 +                      rs_left = drbd_bm_total_weight(device) - device->rs_failed;
  
 -              dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
 +              dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
                if (!dt)
                        dt++;
 -              db = mdev->rs_mark_left[i] - rs_left;
 +              db = device->rs_mark_left[i] - rs_left;
                dbdt = Bit2KB(db/dt);
  
                if (dbdt > c_min_rate)
  }
  
  
 -static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        sector_t sector;
        sector_t capacity;
        struct drbd_peer_request *peer_req;
        unsigned int fault_type;
        struct p_block_req *p = pi->data;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 -      capacity = drbd_get_capacity(mdev->this_bdev);
 +      device = peer_device->device;
 +      capacity = drbd_get_capacity(device->this_bdev);
  
        sector = be64_to_cpu(p->sector);
        size   = be32_to_cpu(p->blksize);
  
        if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
 -              dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
 +              drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
                                (unsigned long long)sector, size);
                return -EINVAL;
        }
        if (sector + (size>>9) > capacity) {
 -              dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
 +              drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
                                (unsigned long long)sector, size);
                return -EINVAL;
        }
  
 -      if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
 +      if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
                verb = 1;
                switch (pi->cmd) {
                case P_DATA_REQUEST:
 -                      drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
 +                      drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
                        break;
                case P_RS_DATA_REQUEST:
                case P_CSUM_RS_REQUEST:
                case P_OV_REQUEST:
 -                      drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
 +                      drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
                        break;
                case P_OV_REPLY:
                        verb = 0;
 -                      dec_rs_pending(mdev);
 -                      drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
 +                      dec_rs_pending(device);
 +                      drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
                        break;
                default:
                        BUG();
                }
                if (verb && __ratelimit(&drbd_ratelimit_state))
 -                      dev_err(DEV, "Can not satisfy peer's read request, "
 +                      drbd_err(device, "Can not satisfy peer's read request, "
                            "no local data.\n");
  
                /* drain possibly payload */
 -              return drbd_drain_block(mdev, pi->size);
 +              return drbd_drain_block(peer_device, pi->size);
        }
  
        /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
         * "criss-cross" setup, that might cause write-out on some other DRBD,
         * which in turn might block on the other node at this very place.  */
 -      peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
 +      peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO);
        if (!peer_req) {
 -              put_ldev(mdev);
 +              put_ldev(device);
                return -ENOMEM;
        }
  
                peer_req->w.cb = w_e_end_rsdata_req;
                fault_type = DRBD_FAULT_RS_RD;
                /* used in the sector offset progress display */
 -              mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
 +              device->bm_resync_fo = BM_SECT_TO_BIT(sector);
                break;
  
        case P_OV_REPLY:
                peer_req->digest = di;
                peer_req->flags |= EE_HAS_DIGEST;
  
 -              if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
 +              if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
                        goto out_free_e;
  
                if (pi->cmd == P_CSUM_RS_REQUEST) {
 -                      D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
 +                      D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
                        peer_req->w.cb = w_e_end_csum_rs_req;
                        /* used in the sector offset progress display */
 -                      mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
 +                      device->bm_resync_fo = BM_SECT_TO_BIT(sector);
                } else if (pi->cmd == P_OV_REPLY) {
                        /* track progress, we may need to throttle */
 -                      atomic_add(size >> 9, &mdev->rs_sect_in);
 +                      atomic_add(size >> 9, &device->rs_sect_in);
                        peer_req->w.cb = w_e_end_ov_reply;
 -                      dec_rs_pending(mdev);
 +                      dec_rs_pending(device);
                        /* drbd_rs_begin_io done when we sent this request,
                         * but accounting still needs to be done. */
                        goto submit_for_resync;
                break;
  
        case P_OV_REQUEST:
 -              if (mdev->ov_start_sector == ~(sector_t)0 &&
 -                  mdev->tconn->agreed_pro_version >= 90) {
 +              if (device->ov_start_sector == ~(sector_t)0 &&
 +                  peer_device->connection->agreed_pro_version >= 90) {
                        unsigned long now = jiffies;
                        int i;
 -                      mdev->ov_start_sector = sector;
 -                      mdev->ov_position = sector;
 -                      mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
 -                      mdev->rs_total = mdev->ov_left;
 +                      device->ov_start_sector = sector;
 +                      device->ov_position = sector;
 +                      device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
 +                      device->rs_total = device->ov_left;
                        for (i = 0; i < DRBD_SYNC_MARKS; i++) {
 -                              mdev->rs_mark_left[i] = mdev->ov_left;
 -                              mdev->rs_mark_time[i] = now;
 +                              device->rs_mark_left[i] = device->ov_left;
 +                              device->rs_mark_time[i] = now;
                        }
 -                      dev_info(DEV, "Online Verify start sector: %llu\n",
 +                      drbd_info(device, "Online Verify start sector: %llu\n",
                                        (unsigned long long)sector);
                }
                peer_req->w.cb = w_e_end_ov_req;
         * we would also throttle its application reads.
         * In that case, throttling is done on the SyncTarget only.
         */
 -      if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
 +      if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
                schedule_timeout_uninterruptible(HZ/10);
 -      if (drbd_rs_begin_io(mdev, sector))
 +      if (drbd_rs_begin_io(device, sector))
                goto out_free_e;
  
  submit_for_resync:
 -      atomic_add(size >> 9, &mdev->rs_sect_ev);
 +      atomic_add(size >> 9, &device->rs_sect_ev);
  
  submit:
 -      inc_unacked(mdev);
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      list_add_tail(&peer_req->w.list, &mdev->read_ee);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      inc_unacked(device);
 +      spin_lock_irq(&device->resource->req_lock);
 +      list_add_tail(&peer_req->w.list, &device->read_ee);
 +      spin_unlock_irq(&device->resource->req_lock);
  
 -      if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
 +      if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
                return 0;
  
        /* don't care for the reason here */
 -      dev_err(DEV, "submit failed, triggering re-connect\n");
 -      spin_lock_irq(&mdev->tconn->req_lock);
 +      drbd_err(device, "submit failed, triggering re-connect\n");
 +      spin_lock_irq(&device->resource->req_lock);
        list_del(&peer_req->w.list);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_unlock_irq(&device->resource->req_lock);
        /* no drbd_rs_complete_io(), we are dropping the connection anyways */
  
  out_free_e:
 -      put_ldev(mdev);
 -      drbd_free_peer_req(mdev, peer_req);
 +      put_ldev(device);
 +      drbd_free_peer_req(device, peer_req);
        return -EIO;
  }
  
 -static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
 +/**
 + * drbd_asb_recover_0p  -  Recover after split-brain with no remaining primaries
 + */
 +static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
  {
 +      struct drbd_device *device = peer_device->device;
        int self, peer, rv = -100;
        unsigned long ch_self, ch_peer;
        enum drbd_after_sb_p after_sb_0p;
  
 -      self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
 -      peer = mdev->p_uuid[UI_BITMAP] & 1;
 +      self = device->ldev->md.uuid[UI_BITMAP] & 1;
 +      peer = device->p_uuid[UI_BITMAP] & 1;
  
 -      ch_peer = mdev->p_uuid[UI_SIZE];
 -      ch_self = mdev->comm_bm_set;
 +      ch_peer = device->p_uuid[UI_SIZE];
 +      ch_self = device->comm_bm_set;
  
        rcu_read_lock();
 -      after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
 +      after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
        rcu_read_unlock();
        switch (after_sb_0p) {
        case ASB_CONSENSUS:
        case ASB_DISCARD_SECONDARY:
        case ASB_CALL_HELPER:
        case ASB_VIOLENTLY:
 -              dev_err(DEV, "Configuration error.\n");
 +              drbd_err(device, "Configuration error.\n");
                break;
        case ASB_DISCONNECT:
                break;
                        break;
                }
                /* Else fall through to one of the other strategies... */
 -              dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
 +              drbd_warn(device, "Discard younger/older primary did not find a decision\n"
                     "Using discard-least-changes instead\n");
        case ASB_DISCARD_ZERO_CHG:
                if (ch_peer == 0 && ch_self == 0) {
 -                      rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
 +                      rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
                                ? -1 : 1;
                        break;
                } else {
                        rv =  1;
                else /* ( ch_self == ch_peer ) */
                     /* Well, then use something else. */
 -                      rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
 +                      rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
                                ? -1 : 1;
                break;
        case ASB_DISCARD_LOCAL:
        return rv;
  }
  
 -static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
 +/**
 + * drbd_asb_recover_1p  -  Recover after split-brain with one remaining primary
 + */
 +static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
  {
 +      struct drbd_device *device = peer_device->device;
        int hg, rv = -100;
        enum drbd_after_sb_p after_sb_1p;
  
        rcu_read_lock();
 -      after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
 +      after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
        rcu_read_unlock();
        switch (after_sb_1p) {
        case ASB_DISCARD_YOUNGER_PRI:
        case ASB_DISCARD_LOCAL:
        case ASB_DISCARD_REMOTE:
        case ASB_DISCARD_ZERO_CHG:
 -              dev_err(DEV, "Configuration error.\n");
 +              drbd_err(device, "Configuration error.\n");
                break;
        case ASB_DISCONNECT:
                break;
        case ASB_CONSENSUS:
 -              hg = drbd_asb_recover_0p(mdev);
 -              if (hg == -1 && mdev->state.role == R_SECONDARY)
 +              hg = drbd_asb_recover_0p(peer_device);
 +              if (hg == -1 && device->state.role == R_SECONDARY)
                        rv = hg;
 -              if (hg == 1  && mdev->state.role == R_PRIMARY)
 +              if (hg == 1  && device->state.role == R_PRIMARY)
                        rv = hg;
                break;
        case ASB_VIOLENTLY:
 -              rv = drbd_asb_recover_0p(mdev);
 +              rv = drbd_asb_recover_0p(peer_device);
                break;
        case ASB_DISCARD_SECONDARY:
 -              return mdev->state.role == R_PRIMARY ? 1 : -1;
 +              return device->state.role == R_PRIMARY ? 1 : -1;
        case ASB_CALL_HELPER:
 -              hg = drbd_asb_recover_0p(mdev);
 -              if (hg == -1 && mdev->state.role == R_PRIMARY) {
 +              hg = drbd_asb_recover_0p(peer_device);
 +              if (hg == -1 && device->state.role == R_PRIMARY) {
                        enum drbd_state_rv rv2;
  
                         /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
                          * we might be here in C_WF_REPORT_PARAMS which is transient.
                          * we do not need to wait for the after state change work either. */
 -                      rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
 +                      rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
                        if (rv2 != SS_SUCCESS) {
 -                              drbd_khelper(mdev, "pri-lost-after-sb");
 +                              drbd_khelper(device, "pri-lost-after-sb");
                        } else {
 -                              dev_warn(DEV, "Successfully gave up primary role.\n");
 +                              drbd_warn(device, "Successfully gave up primary role.\n");
                                rv = hg;
                        }
                } else
        return rv;
  }
  
 -static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
 +/**
 + * drbd_asb_recover_2p  -  Recover after split-brain with two remaining primaries
 + */
 +static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
  {
 +      struct drbd_device *device = peer_device->device;
        int hg, rv = -100;
        enum drbd_after_sb_p after_sb_2p;
  
        rcu_read_lock();
 -      after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
 +      after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
        rcu_read_unlock();
        switch (after_sb_2p) {
        case ASB_DISCARD_YOUNGER_PRI:
        case ASB_CONSENSUS:
        case ASB_DISCARD_SECONDARY:
        case ASB_DISCARD_ZERO_CHG:
 -              dev_err(DEV, "Configuration error.\n");
 +              drbd_err(device, "Configuration error.\n");
                break;
        case ASB_VIOLENTLY:
 -              rv = drbd_asb_recover_0p(mdev);
 +              rv = drbd_asb_recover_0p(peer_device);
                break;
        case ASB_DISCONNECT:
                break;
        case ASB_CALL_HELPER:
 -              hg = drbd_asb_recover_0p(mdev);
 +              hg = drbd_asb_recover_0p(peer_device);
                if (hg == -1) {
                        enum drbd_state_rv rv2;
  
                         /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
                          * we might be here in C_WF_REPORT_PARAMS which is transient.
                          * we do not need to wait for the after state change work either. */
 -                      rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
 +                      rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
                        if (rv2 != SS_SUCCESS) {
 -                              drbd_khelper(mdev, "pri-lost-after-sb");
 +                              drbd_khelper(device, "pri-lost-after-sb");
                        } else {
 -                              dev_warn(DEV, "Successfully gave up primary role.\n");
 +                              drbd_warn(device, "Successfully gave up primary role.\n");
                                rv = hg;
                        }
                } else
        return rv;
  }
  
 -static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
 +static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
                           u64 bits, u64 flags)
  {
        if (!uuid) {
 -              dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
 +              drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
                return;
        }
 -      dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
 +      drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
             text,
             (unsigned long long)uuid[UI_CURRENT],
             (unsigned long long)uuid[UI_BITMAP],
  -1091   requires proto 91
  -1096   requires proto 96
   */
 -static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
 +static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
  {
        u64 self, peer;
        int i, j;
  
 -      self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
 -      peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
 +      self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
 +      peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
  
        *rule_nr = 10;
        if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
        if (self == peer) {
                int rct, dc; /* roles at crash time */
  
 -              if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
 +              if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
  
 -                      if (mdev->tconn->agreed_pro_version < 91)
 +                      if (first_peer_device(device)->connection->agreed_pro_version < 91)
                                return -1091;
  
 -                      if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
 -                          (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
 -                              dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
 -                              drbd_uuid_move_history(mdev);
 -                              mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
 -                              mdev->ldev->md.uuid[UI_BITMAP] = 0;
 +                      if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
 +                          (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
 +                              drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
 +                              drbd_uuid_move_history(device);
 +                              device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
 +                              device->ldev->md.uuid[UI_BITMAP] = 0;
  
 -                              drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
 -                                             mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
 +                              drbd_uuid_dump(device, "self", device->ldev->md.uuid,
 +                                             device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
                                *rule_nr = 34;
                        } else {
 -                              dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
 +                              drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
                                *rule_nr = 36;
                        }
  
                        return 1;
                }
  
 -              if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
 +              if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
  
 -                      if (mdev->tconn->agreed_pro_version < 91)
 +                      if (first_peer_device(device)->connection->agreed_pro_version < 91)
                                return -1091;
  
 -                      if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
 -                          (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
 -                              dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
 +                      if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
 +                          (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
 +                              drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
  
 -                              mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
 -                              mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
 -                              mdev->p_uuid[UI_BITMAP] = 0UL;
 +                              device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
 +                              device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
 +                              device->p_uuid[UI_BITMAP] = 0UL;
  
 -                              drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
 +                              drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
                                *rule_nr = 35;
                        } else {
 -                              dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
 +                              drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
                                *rule_nr = 37;
                        }
  
                }
  
                /* Common power [off|failure] */
 -              rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
 -                      (mdev->p_uuid[UI_FLAGS] & 2);
 +              rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
 +                      (device->p_uuid[UI_FLAGS] & 2);
                /* lowest bit is set when we were primary,
                 * next bit (weight 2) is set when peer was primary */
                *rule_nr = 40;
                case 1: /*  self_pri && !peer_pri */ return 1;
                case 2: /* !self_pri &&  peer_pri */ return -1;
                case 3: /*  self_pri &&  peer_pri */
 -                      dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
 +                      dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
                        return dc ? -1 : 1;
                }
        }
  
        *rule_nr = 50;
 -      peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
 +      peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
        if (self == peer)
                return -1;
  
        *rule_nr = 51;
 -      peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
 +      peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
        if (self == peer) {
 -              if (mdev->tconn->agreed_pro_version < 96 ?
 -                  (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
 -                  (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
 -                  peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
 +              if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
 +                  (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
 +                  (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
 +                  peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
                        /* The last P_SYNC_UUID did not get though. Undo the last start of
                           resync as sync source modifications of the peer's UUIDs. */
  
 -                      if (mdev->tconn->agreed_pro_version < 91)
 +                      if (first_peer_device(device)->connection->agreed_pro_version < 91)
                                return -1091;
  
 -                      mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
 -                      mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
 +                      device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
 +                      device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
  
 -                      dev_info(DEV, "Lost last syncUUID packet, corrected:\n");
 -                      drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
 +                      drbd_info(device, "Lost last syncUUID packet, corrected:\n");
 +                      drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
  
                        return -1;
                }
        }
  
        *rule_nr = 60;
 -      self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
 +      self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
        for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
 -              peer = mdev->p_uuid[i] & ~((u64)1);
 +              peer = device->p_uuid[i] & ~((u64)1);
                if (self == peer)
                        return -2;
        }
  
        *rule_nr = 70;
 -      self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
 -      peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
 +      self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
 +      peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
        if (self == peer)
                return 1;
  
        *rule_nr = 71;
 -      self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
 +      self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
        if (self == peer) {
 -              if (mdev->tconn->agreed_pro_version < 96 ?
 -                  (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
 -                  (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
 -                  self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
 +              if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
 +                  (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
 +                  (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
 +                  self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
                        /* The last P_SYNC_UUID did not get though. Undo the last start of
                           resync as sync source modifications of our UUIDs. */
  
 -                      if (mdev->tconn->agreed_pro_version < 91)
 +                      if (first_peer_device(device)->connection->agreed_pro_version < 91)
                                return -1091;
  
 -                      __drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
 -                      __drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
 +                      __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
 +                      __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
  
 -                      dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
 -                      drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
 -                                     mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
 +                      drbd_info(device, "Last syncUUID did not get through, corrected:\n");
 +                      drbd_uuid_dump(device, "self", device->ldev->md.uuid,
 +                                     device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
  
                        return 1;
                }
  
  
        *rule_nr = 80;
 -      peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
 +      peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
        for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
 -              self = mdev->ldev->md.uuid[i] & ~((u64)1);
 +              self = device->ldev->md.uuid[i] & ~((u64)1);
                if (self == peer)
                        return 2;
        }
  
        *rule_nr = 90;
 -      self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
 -      peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
 +      self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
 +      peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
        if (self == peer && self != ((u64)0))
                return 100;
  
        *rule_nr = 100;
        for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
 -              self = mdev->ldev->md.uuid[i] & ~((u64)1);
 +              self = device->ldev->md.uuid[i] & ~((u64)1);
                for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
 -                      peer = mdev->p_uuid[j] & ~((u64)1);
 +                      peer = device->p_uuid[j] & ~((u64)1);
                        if (self == peer)
                                return -100;
                }
  /* drbd_sync_handshake() returns the new conn state on success, or
     CONN_MASK (-1) on failure.
   */
 -static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
 +static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
 +                                         enum drbd_role peer_role,
                                           enum drbd_disk_state peer_disk) __must_hold(local)
  {
 +      struct drbd_device *device = peer_device->device;
        enum drbd_conns rv = C_MASK;
        enum drbd_disk_state mydisk;
        struct net_conf *nc;
        int hg, rule_nr, rr_conflict, tentative;
  
 -      mydisk = mdev->state.disk;
 +      mydisk = device->state.disk;
        if (mydisk == D_NEGOTIATING)
 -              mydisk = mdev->new_state_tmp.disk;
 +              mydisk = device->new_state_tmp.disk;
  
 -      dev_info(DEV, "drbd_sync_handshake:\n");
 +      drbd_info(device, "drbd_sync_handshake:\n");
  
 -      spin_lock_irq(&mdev->ldev->md.uuid_lock);
 -      drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
 -      drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
 -                     mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
 +      spin_lock_irq(&device->ldev->md.uuid_lock);
 +      drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
 +      drbd_uuid_dump(device, "peer", device->p_uuid,
 +                     device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
  
 -      hg = drbd_uuid_compare(mdev, &rule_nr);
 -      spin_unlock_irq(&mdev->ldev->md.uuid_lock);
 +      hg = drbd_uuid_compare(device, &rule_nr);
 +      spin_unlock_irq(&device->ldev->md.uuid_lock);
  
 -      dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
 +      drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
  
        if (hg == -1000) {
 -              dev_alert(DEV, "Unrelated data, aborting!\n");
 +              drbd_alert(device, "Unrelated data, aborting!\n");
                return C_MASK;
        }
        if (hg < -1000) {
 -              dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
 +              drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
                return C_MASK;
        }
  
                hg = mydisk > D_INCONSISTENT ? 1 : -1;
                if (f)
                        hg = hg*2;
 -              dev_info(DEV, "Becoming sync %s due to disk states.\n",
 +              drbd_info(device, "Becoming sync %s due to disk states.\n",
                     hg > 0 ? "source" : "target");
        }
  
        if (abs(hg) == 100)
 -              drbd_khelper(mdev, "initial-split-brain");
 +              drbd_khelper(device, "initial-split-brain");
  
        rcu_read_lock();
 -      nc = rcu_dereference(mdev->tconn->net_conf);
 +      nc = rcu_dereference(peer_device->connection->net_conf);
  
        if (hg == 100 || (hg == -100 && nc->always_asbp)) {
 -              int pcount = (mdev->state.role == R_PRIMARY)
 +              int pcount = (device->state.role == R_PRIMARY)
                           + (peer_role == R_PRIMARY);
                int forced = (hg == -100);
  
                switch (pcount) {
                case 0:
 -                      hg = drbd_asb_recover_0p(mdev);
 +                      hg = drbd_asb_recover_0p(peer_device);
                        break;
                case 1:
 -                      hg = drbd_asb_recover_1p(mdev);
 +                      hg = drbd_asb_recover_1p(peer_device);
                        break;
                case 2:
 -                      hg = drbd_asb_recover_2p(mdev);
 +                      hg = drbd_asb_recover_2p(peer_device);
                        break;
                }
                if (abs(hg) < 100) {
 -                      dev_warn(DEV, "Split-Brain detected, %d primaries, "
 +                      drbd_warn(device, "Split-Brain detected, %d primaries, "
                             "automatically solved. Sync from %s node\n",
                             pcount, (hg < 0) ? "peer" : "this");
                        if (forced) {
 -                              dev_warn(DEV, "Doing a full sync, since"
 +                              drbd_warn(device, "Doing a full sync, since"
                                     " UUIDs where ambiguous.\n");
                                hg = hg*2;
                        }
        }
  
        if (hg == -100) {
 -              if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
 +              if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
                        hg = -1;
 -              if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
 +              if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
                        hg = 1;
  
                if (abs(hg) < 100)
 -                      dev_warn(DEV, "Split-Brain detected, manually solved. "
 +                      drbd_warn(device, "Split-Brain detected, manually solved. "
                             "Sync from %s node\n",
                             (hg < 0) ? "peer" : "this");
        }
                 * after an attempted attach on a diskless node.
                 * We just refuse to attach -- well, we drop the "connection"
                 * to that disk, in a way... */
 -              dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
 -              drbd_khelper(mdev, "split-brain");
 +              drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
 +              drbd_khelper(device, "split-brain");
                return C_MASK;
        }
  
        if (hg > 0 && mydisk <= D_INCONSISTENT) {
 -              dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
 +              drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
                return C_MASK;
        }
  
        if (hg < 0 && /* by intention we do not use mydisk here. */
 -          mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
 +          device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
                switch (rr_conflict) {
                case ASB_CALL_HELPER:
 -                      drbd_khelper(mdev, "pri-lost");
 +                      drbd_khelper(device, "pri-lost");
                        /* fall through */
                case ASB_DISCONNECT:
 -                      dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
 +                      drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
                        return C_MASK;
                case ASB_VIOLENTLY:
 -                      dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
 +                      drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
                             "assumption\n");
                }
        }
  
 -      if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
 +      if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
                if (hg == 0)
 -                      dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
 +                      drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
                else
 -                      dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
 +                      drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
                                 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
                                 abs(hg) >= 2 ? "full" : "bit-map based");
                return C_MASK;
        }
  
        if (abs(hg) >= 2) {
 -              dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
 -              if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
 +              drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
 +              if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
                                        BM_LOCKED_SET_ALLOWED))
                        return C_MASK;
        }
                rv = C_WF_BITMAP_T;
        } else {
                rv = C_CONNECTED;
 -              if (drbd_bm_total_weight(mdev)) {
 -                      dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
 -                           drbd_bm_total_weight(mdev));
 +              if (drbd_bm_total_weight(device)) {
 +                      drbd_info(device, "No resync, but %lu bits in bitmap!\n",
 +                           drbd_bm_total_weight(device));
                }
        }
  
@@@ -3146,7 -3092,7 +3136,7 @@@ static enum drbd_after_sb_p convert_aft
        return peer;
  }
  
 -static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
  {
        struct p_protocol *p = pi->data;
        enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
        cf              = be32_to_cpu(p->conn_flags);
        p_discard_my_data = cf & CF_DISCARD_MY_DATA;
  
 -      if (tconn->agreed_pro_version >= 87) {
 +      if (connection->agreed_pro_version >= 87) {
                int err;
  
                if (pi->size > sizeof(integrity_alg))
                        return -EIO;
 -              err = drbd_recv_all(tconn, integrity_alg, pi->size);
 +              err = drbd_recv_all(connection, integrity_alg, pi->size);
                if (err)
                        return err;
                integrity_alg[SHARED_SECRET_MAX - 1] = 0;
        }
  
        if (pi->cmd != P_PROTOCOL_UPDATE) {
 -              clear_bit(CONN_DRY_RUN, &tconn->flags);
 +              clear_bit(CONN_DRY_RUN, &connection->flags);
  
                if (cf & CF_DRY_RUN)
 -                      set_bit(CONN_DRY_RUN, &tconn->flags);
 +                      set_bit(CONN_DRY_RUN, &connection->flags);
  
                rcu_read_lock();
 -              nc = rcu_dereference(tconn->net_conf);
 +              nc = rcu_dereference(connection->net_conf);
  
                if (p_proto != nc->wire_protocol) {
 -                      conn_err(tconn, "incompatible %s settings\n", "protocol");
 +                      drbd_err(connection, "incompatible %s settings\n", "protocol");
                        goto disconnect_rcu_unlock;
                }
  
                if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
 -                      conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
 +                      drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
                        goto disconnect_rcu_unlock;
                }
  
                if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
 -                      conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
 +                      drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
                        goto disconnect_rcu_unlock;
                }
  
                if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
 -                      conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
 +                      drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
                        goto disconnect_rcu_unlock;
                }
  
                if (p_discard_my_data && nc->discard_my_data) {
 -                      conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
 +                      drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
                        goto disconnect_rcu_unlock;
                }
  
                if (p_two_primaries != nc->two_primaries) {
 -                      conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
 +                      drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
                        goto disconnect_rcu_unlock;
                }
  
                if (strcmp(integrity_alg, nc->integrity_alg)) {
 -                      conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
 +                      drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
                        goto disconnect_rcu_unlock;
                }
  
  
                peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
                if (!peer_integrity_tfm) {
 -                      conn_err(tconn, "peer data-integrity-alg %s not supported\n",
 +                      drbd_err(connection, "peer data-integrity-alg %s not supported\n",
                                 integrity_alg);
                        goto disconnect;
                }
                int_dig_in = kmalloc(hash_size, GFP_KERNEL);
                int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
                if (!(int_dig_in && int_dig_vv)) {
 -                      conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
 +                      drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
                        goto disconnect;
                }
        }
  
        new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
        if (!new_net_conf) {
 -              conn_err(tconn, "Allocation of new net_conf failed\n");
 +              drbd_err(connection, "Allocation of new net_conf failed\n");
                goto disconnect;
        }
  
 -      mutex_lock(&tconn->data.mutex);
 -      mutex_lock(&tconn->conf_update);
 -      old_net_conf = tconn->net_conf;
 +      mutex_lock(&connection->data.mutex);
 +      mutex_lock(&connection->resource->conf_update);
 +      old_net_conf = connection->net_conf;
        *new_net_conf = *old_net_conf;
  
        new_net_conf->wire_protocol = p_proto;
        new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
        new_net_conf->two_primaries = p_two_primaries;
  
 -      rcu_assign_pointer(tconn->net_conf, new_net_conf);
 -      mutex_unlock(&tconn->conf_update);
 -      mutex_unlock(&tconn->data.mutex);
 +      rcu_assign_pointer(connection->net_conf, new_net_conf);
 +      mutex_unlock(&connection->resource->conf_update);
 +      mutex_unlock(&connection->data.mutex);
  
 -      crypto_free_hash(tconn->peer_integrity_tfm);
 -      kfree(tconn->int_dig_in);
 -      kfree(tconn->int_dig_vv);
 -      tconn->peer_integrity_tfm = peer_integrity_tfm;
 -      tconn->int_dig_in = int_dig_in;
 -      tconn->int_dig_vv = int_dig_vv;
 +      crypto_free_hash(connection->peer_integrity_tfm);
 +      kfree(connection->int_dig_in);
 +      kfree(connection->int_dig_vv);
 +      connection->peer_integrity_tfm = peer_integrity_tfm;
 +      connection->int_dig_in = int_dig_in;
 +      connection->int_dig_vv = int_dig_vv;
  
        if (strcmp(old_net_conf->integrity_alg, integrity_alg))
 -              conn_info(tconn, "peer data-integrity-alg: %s\n",
 +              drbd_info(connection, "peer data-integrity-alg: %s\n",
                          integrity_alg[0] ? integrity_alg : "(none)");
  
        synchronize_rcu();
@@@ -3292,7 -3238,7 +3282,7 @@@ disconnect
        crypto_free_hash(peer_integrity_tfm);
        kfree(int_dig_in);
        kfree(int_dig_vv);
 -      conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +      conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
        return -EIO;
  }
  
   * return: NULL (alg name was "")
   *         ERR_PTR(error) if something goes wrong
   *         or the crypto hash ptr, if it worked out ok. */
 -struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
 +static
 +struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
                const char *alg, const char *name)
  {
        struct crypto_hash *tfm;
  
        tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
        if (IS_ERR(tfm)) {
 -              dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
 +              drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
                        alg, name, PTR_ERR(tfm));
                return tfm;
        }
        return tfm;
  }
  
 -static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      void *buffer = tconn->data.rbuf;
 +      void *buffer = connection->data.rbuf;
        int size = pi->size;
  
        while (size) {
                int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
 -              s = drbd_recv(tconn, buffer, s);
 +              s = drbd_recv(connection, buffer, s);
                if (s <= 0) {
                        if (s < 0)
                                return s;
   *
   * (We can also end up here if drbd is misconfigured.)
   */
 -static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
 +      drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
                  cmdname(pi->cmd), pi->vnr);
 -      return ignore_remaining_packet(tconn, pi);
 +      return ignore_remaining_packet(connection, pi);
  }
  
 -static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_rs_param_95 *p;
        unsigned int header_size, data_size, exp_max_sz;
        struct crypto_hash *verify_tfm = NULL;
        struct crypto_hash *csums_tfm = NULL;
        struct net_conf *old_net_conf, *new_net_conf = NULL;
        struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
 -      const int apv = tconn->agreed_pro_version;
 +      const int apv = connection->agreed_pro_version;
        struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
        int fifo_size = 0;
        int err;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 -              return config_unknown_volume(tconn, pi);
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
 +              return config_unknown_volume(connection, pi);
 +      device = peer_device->device;
  
        exp_max_sz  = apv <= 87 ? sizeof(struct p_rs_param)
                    : apv == 88 ? sizeof(struct p_rs_param)
                    : /* apv >= 95 */ sizeof(struct p_rs_param_95);
  
        if (pi->size > exp_max_sz) {
 -              dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
 +              drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
                    pi->size, exp_max_sz);
                return -EIO;
        }
        } else if (apv <= 94) {
                header_size = sizeof(struct p_rs_param_89);
                data_size = pi->size - header_size;
 -              D_ASSERT(data_size == 0);
 +              D_ASSERT(device, data_size == 0);
        } else {
                header_size = sizeof(struct p_rs_param_95);
                data_size = pi->size - header_size;
 -              D_ASSERT(data_size == 0);
 +              D_ASSERT(device, data_size == 0);
        }
  
        /* initialize verify_alg and csums_alg */
        p = pi->data;
        memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
  
 -      err = drbd_recv_all(mdev->tconn, p, header_size);
 +      err = drbd_recv_all(peer_device->connection, p, header_size);
        if (err)
                return err;
  
 -      mutex_lock(&mdev->tconn->conf_update);
 -      old_net_conf = mdev->tconn->net_conf;
 -      if (get_ldev(mdev)) {
 +      mutex_lock(&connection->resource->conf_update);
 +      old_net_conf = peer_device->connection->net_conf;
 +      if (get_ldev(device)) {
                new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
                if (!new_disk_conf) {
 -                      put_ldev(mdev);
 -                      mutex_unlock(&mdev->tconn->conf_update);
 -                      dev_err(DEV, "Allocation of new disk_conf failed\n");
 +                      put_ldev(device);
 +                      mutex_unlock(&connection->resource->conf_update);
 +                      drbd_err(device, "Allocation of new disk_conf failed\n");
                        return -ENOMEM;
                }
  
 -              old_disk_conf = mdev->ldev->disk_conf;
 +              old_disk_conf = device->ldev->disk_conf;
                *new_disk_conf = *old_disk_conf;
  
                new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
        if (apv >= 88) {
                if (apv == 88) {
                        if (data_size > SHARED_SECRET_MAX || data_size == 0) {
 -                              dev_err(DEV, "verify-alg of wrong size, "
 +                              drbd_err(device, "verify-alg of wrong size, "
                                        "peer wants %u, accepting only up to %u byte\n",
                                        data_size, SHARED_SECRET_MAX);
                                err = -EIO;
                                goto reconnect;
                        }
  
 -                      err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
 +                      err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
                        if (err)
                                goto reconnect;
                        /* we expect NUL terminated string */
                        /* but just in case someone tries to be evil */
 -                      D_ASSERT(p->verify_alg[data_size-1] == 0);
 +                      D_ASSERT(device, p->verify_alg[data_size-1] == 0);
                        p->verify_alg[data_size-1] = 0;
  
                } else /* apv >= 89 */ {
                        /* we still expect NUL terminated strings */
                        /* but just in case someone tries to be evil */
 -                      D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
 -                      D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
 +                      D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
 +                      D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
                        p->verify_alg[SHARED_SECRET_MAX-1] = 0;
                        p->csums_alg[SHARED_SECRET_MAX-1] = 0;
                }
  
                if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
 -                      if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 -                              dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
 +                      if (device->state.conn == C_WF_REPORT_PARAMS) {
 +                              drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
                                    old_net_conf->verify_alg, p->verify_alg);
                                goto disconnect;
                        }
 -                      verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
 +                      verify_tfm = drbd_crypto_alloc_digest_safe(device,
                                        p->verify_alg, "verify-alg");
                        if (IS_ERR(verify_tfm)) {
                                verify_tfm = NULL;
                }
  
                if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
 -                      if (mdev->state.conn == C_WF_REPORT_PARAMS) {
 -                              dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
 +                      if (device->state.conn == C_WF_REPORT_PARAMS) {
 +                              drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
                                    old_net_conf->csums_alg, p->csums_alg);
                                goto disconnect;
                        }
 -                      csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
 +                      csums_tfm = drbd_crypto_alloc_digest_safe(device,
                                        p->csums_alg, "csums-alg");
                        if (IS_ERR(csums_tfm)) {
                                csums_tfm = NULL;
                        new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
  
                        fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
 -                      if (fifo_size != mdev->rs_plan_s->size) {
 +                      if (fifo_size != device->rs_plan_s->size) {
                                new_plan = fifo_alloc(fifo_size);
                                if (!new_plan) {
 -                                      dev_err(DEV, "kmalloc of fifo_buffer failed");
 -                                      put_ldev(mdev);
 +                                      drbd_err(device, "kmalloc of fifo_buffer failed");
 +                                      put_ldev(device);
                                        goto disconnect;
                                }
                        }
                if (verify_tfm || csums_tfm) {
                        new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
                        if (!new_net_conf) {
 -                              dev_err(DEV, "Allocation of new net_conf failed\n");
 +                              drbd_err(device, "Allocation of new net_conf failed\n");
                                goto disconnect;
                        }
  
                        if (verify_tfm) {
                                strcpy(new_net_conf->verify_alg, p->verify_alg);
                                new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
 -                              crypto_free_hash(mdev->tconn->verify_tfm);
 -                              mdev->tconn->verify_tfm = verify_tfm;
 -                              dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
 +                              crypto_free_hash(peer_device->connection->verify_tfm);
 +                              peer_device->connection->verify_tfm = verify_tfm;
 +                              drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
                        }
                        if (csums_tfm) {
                                strcpy(new_net_conf->csums_alg, p->csums_alg);
                                new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
 -                              crypto_free_hash(mdev->tconn->csums_tfm);
 -                              mdev->tconn->csums_tfm = csums_tfm;
 -                              dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
 +                              crypto_free_hash(peer_device->connection->csums_tfm);
 +                              peer_device->connection->csums_tfm = csums_tfm;
 +                              drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
                        }
 -                      rcu_assign_pointer(tconn->net_conf, new_net_conf);
 +                      rcu_assign_pointer(connection->net_conf, new_net_conf);
                }
        }
  
        if (new_disk_conf) {
 -              rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
 -              put_ldev(mdev);
 +              rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
 +              put_ldev(device);
        }
  
        if (new_plan) {
 -              old_plan = mdev->rs_plan_s;
 -              rcu_assign_pointer(mdev->rs_plan_s, new_plan);
 +              old_plan = device->rs_plan_s;
 +              rcu_assign_pointer(device->rs_plan_s, new_plan);
        }
  
 -      mutex_unlock(&mdev->tconn->conf_update);
 +      mutex_unlock(&connection->resource->conf_update);
        synchronize_rcu();
        if (new_net_conf)
                kfree(old_net_conf);
  
  reconnect:
        if (new_disk_conf) {
 -              put_ldev(mdev);
 +              put_ldev(device);
                kfree(new_disk_conf);
        }
 -      mutex_unlock(&mdev->tconn->conf_update);
 +      mutex_unlock(&connection->resource->conf_update);
        return -EIO;
  
  disconnect:
        kfree(new_plan);
        if (new_disk_conf) {
 -              put_ldev(mdev);
 +              put_ldev(device);
                kfree(new_disk_conf);
        }
 -      mutex_unlock(&mdev->tconn->conf_update);
 +      mutex_unlock(&connection->resource->conf_update);
        /* just for completeness: actually not needed,
         * as this is not reached if csums_tfm was ok. */
        crypto_free_hash(csums_tfm);
        /* but free the verify_tfm again, if csums_tfm did not work out */
        crypto_free_hash(verify_tfm);
 -      conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +      conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
        return -EIO;
  }
  
  /* warn if the arguments differ by more than 12.5% */
 -static void warn_if_differ_considerably(struct drbd_conf *mdev,
 +static void warn_if_differ_considerably(struct drbd_device *device,
        const char *s, sector_t a, sector_t b)
  {
        sector_t d;
                return;
        d = (a > b) ? (a - b) : (b - a);
        if (d > (a>>3) || d > (b>>3))
 -              dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
 +              drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
                     (unsigned long long)a, (unsigned long long)b);
  }
  
 -static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_sizes *p = pi->data;
        enum determine_dev_size dd = DS_UNCHANGED;
        sector_t p_size, p_usize, my_usize;
        int ldsc = 0; /* local disk size changed */
        enum dds_flags ddsf;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 -              return config_unknown_volume(tconn, pi);
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
 +              return config_unknown_volume(connection, pi);
 +      device = peer_device->device;
  
        p_size = be64_to_cpu(p->d_size);
        p_usize = be64_to_cpu(p->u_size);
  
        /* just store the peer's disk size for now.
         * we still need to figure out whether we accept that. */
 -      mdev->p_size = p_size;
 +      device->p_size = p_size;
  
 -      if (get_ldev(mdev)) {
 +      if (get_ldev(device)) {
                rcu_read_lock();
 -              my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
 +              my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
                rcu_read_unlock();
  
 -              warn_if_differ_considerably(mdev, "lower level device sizes",
 -                         p_size, drbd_get_max_capacity(mdev->ldev));
 -              warn_if_differ_considerably(mdev, "user requested size",
 +              warn_if_differ_considerably(device, "lower level device sizes",
 +                         p_size, drbd_get_max_capacity(device->ldev));
 +              warn_if_differ_considerably(device, "user requested size",
                                            p_usize, my_usize);
  
                /* if this is the first connect, or an otherwise expected
                 * param exchange, choose the minimum */
 -              if (mdev->state.conn == C_WF_REPORT_PARAMS)
 +              if (device->state.conn == C_WF_REPORT_PARAMS)
                        p_usize = min_not_zero(my_usize, p_usize);
  
                /* Never shrink a device with usable data during connect.
                   But allow online shrinking if we are connected. */
 -              if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
 -                  drbd_get_capacity(mdev->this_bdev) &&
 -                  mdev->state.disk >= D_OUTDATED &&
 -                  mdev->state.conn < C_CONNECTED) {
 -                      dev_err(DEV, "The peer's disk size is too small!\n");
 -                      conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 -                      put_ldev(mdev);
 +              if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
 +                  drbd_get_capacity(device->this_bdev) &&
 +                  device->state.disk >= D_OUTDATED &&
 +                  device->state.conn < C_CONNECTED) {
 +                      drbd_err(device, "The peer's disk size is too small!\n");
 +                      conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
 +                      put_ldev(device);
                        return -EIO;
                }
  
  
                        new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
                        if (!new_disk_conf) {
 -                              dev_err(DEV, "Allocation of new disk_conf failed\n");
 -                              put_ldev(mdev);
 +                              drbd_err(device, "Allocation of new disk_conf failed\n");
 +                              put_ldev(device);
                                return -ENOMEM;
                        }
  
 -                      mutex_lock(&mdev->tconn->conf_update);
 -                      old_disk_conf = mdev->ldev->disk_conf;
 +                      mutex_lock(&connection->resource->conf_update);
 +                      old_disk_conf = device->ldev->disk_conf;
                        *new_disk_conf = *old_disk_conf;
                        new_disk_conf->disk_size = p_usize;
  
 -                      rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
 -                      mutex_unlock(&mdev->tconn->conf_update);
 +                      rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
 +                      mutex_unlock(&connection->resource->conf_update);
                        synchronize_rcu();
                        kfree(old_disk_conf);
  
 -                      dev_info(DEV, "Peer sets u_size to %lu sectors\n",
 +                      drbd_info(device, "Peer sets u_size to %lu sectors\n",
                                 (unsigned long)my_usize);
                }
  
 -              put_ldev(mdev);
 +              put_ldev(device);
        }
  
        ddsf = be16_to_cpu(p->dds_flags);
 -      if (get_ldev(mdev)) {
 -              dd = drbd_determine_dev_size(mdev, ddsf, NULL);
 -              put_ldev(mdev);
 +      if (get_ldev(device)) {
 +              dd = drbd_determine_dev_size(device, ddsf, NULL);
 +              put_ldev(device);
                if (dd == DS_ERROR)
                        return -EIO;
 -              drbd_md_sync(mdev);
 +              drbd_md_sync(device);
        } else {
                /* I am diskless, need to accept the peer's size. */
 -              drbd_set_my_capacity(mdev, p_size);
 +              drbd_set_my_capacity(device, p_size);
        }
  
 -      mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
 -      drbd_reconsider_max_bio_size(mdev);
 +      device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
 +      drbd_reconsider_max_bio_size(device);
  
 -      if (get_ldev(mdev)) {
 -              if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
 -                      mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 +      if (get_ldev(device)) {
 +              if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
 +                      device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
                        ldsc = 1;
                }
  
 -              put_ldev(mdev);
 +              put_ldev(device);
        }
  
 -      if (mdev->state.conn > C_WF_REPORT_PARAMS) {
 +      if (device->state.conn > C_WF_REPORT_PARAMS) {
                if (be64_to_cpu(p->c_size) !=
 -                  drbd_get_capacity(mdev->this_bdev) || ldsc) {
 +                  drbd_get_capacity(device->this_bdev) || ldsc) {
                        /* we have different sizes, probably peer
                         * needs to know my new size... */
 -                      drbd_send_sizes(mdev, 0, ddsf);
 +                      drbd_send_sizes(peer_device, 0, ddsf);
                }
 -              if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
 -                  (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
 -                      if (mdev->state.pdsk >= D_INCONSISTENT &&
 -                          mdev->state.disk >= D_INCONSISTENT) {
 +              if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
 +                  (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
 +                      if (device->state.pdsk >= D_INCONSISTENT &&
 +                          device->state.disk >= D_INCONSISTENT) {
                                if (ddsf & DDSF_NO_RESYNC)
 -                                      dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
 +                                      drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
                                else
 -                                      resync_after_online_grow(mdev);
 +                                      resync_after_online_grow(device);
                        } else
 -                              set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 +                              set_bit(RESYNC_AFTER_NEG, &device->flags);
                }
        }
  
        return 0;
  }
  
 -static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_uuids *p = pi->data;
        u64 *p_uuid;
        int i, updated_uuids = 0;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 -              return config_unknown_volume(tconn, pi);
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
 +              return config_unknown_volume(connection, pi);
 +      device = peer_device->device;
  
        p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
        if (!p_uuid) {
 -              dev_err(DEV, "kmalloc of p_uuid failed\n");
 +              drbd_err(device, "kmalloc of p_uuid failed\n");
                return false;
        }
  
        for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
                p_uuid[i] = be64_to_cpu(p->uuid[i]);
  
 -      kfree(mdev->p_uuid);
 -      mdev->p_uuid = p_uuid;
 +      kfree(device->p_uuid);
 +      device->p_uuid = p_uuid;
  
 -      if (mdev->state.conn < C_CONNECTED &&
 -          mdev->state.disk < D_INCONSISTENT &&
 -          mdev->state.role == R_PRIMARY &&
 -          (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
 -              dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
 -                  (unsigned long long)mdev->ed_uuid);
 -              conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +      if (device->state.conn < C_CONNECTED &&
 +          device->state.disk < D_INCONSISTENT &&
 +          device->state.role == R_PRIMARY &&
 +          (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
 +              drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
 +                  (unsigned long long)device->ed_uuid);
 +              conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
                return -EIO;
        }
  
 -      if (get_ldev(mdev)) {
 +      if (get_ldev(device)) {
                int skip_initial_sync =
 -                      mdev->state.conn == C_CONNECTED &&
 -                      mdev->tconn->agreed_pro_version >= 90 &&
 -                      mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
 +                      device->state.conn == C_CONNECTED &&
 +                      peer_device->connection->agreed_pro_version >= 90 &&
 +                      device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
                        (p_uuid[UI_FLAGS] & 8);
                if (skip_initial_sync) {
 -                      dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
 -                      drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
 +                      drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
 +                      drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
                                        "clear_n_write from receive_uuids",
                                        BM_LOCKED_TEST_ALLOWED);
 -                      _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
 -                      _drbd_uuid_set(mdev, UI_BITMAP, 0);
 -                      _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
 +                      _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
 +                      _drbd_uuid_set(device, UI_BITMAP, 0);
 +                      _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
                                        CS_VERBOSE, NULL);
 -                      drbd_md_sync(mdev);
 +                      drbd_md_sync(device);
                        updated_uuids = 1;
                }
 -              put_ldev(mdev);
 -      } else if (mdev->state.disk < D_INCONSISTENT &&
 -                 mdev->state.role == R_PRIMARY) {
 +              put_ldev(device);
 +      } else if (device->state.disk < D_INCONSISTENT &&
 +                 device->state.role == R_PRIMARY) {
                /* I am a diskless primary, the peer just created a new current UUID
                   for me. */
 -              updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
 +              updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
        }
  
        /* Before we test for the disk state, we should wait until an eventually
           ongoing cluster wide state change is finished. That is important if
           we are primary and are detaching from our disk. We need to see the
           new disk state... */
 -      mutex_lock(mdev->state_mutex);
 -      mutex_unlock(mdev->state_mutex);
 -      if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
 -              updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
 +      mutex_lock(device->state_mutex);
 +      mutex_unlock(device->state_mutex);
 +      if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
 +              updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
  
        if (updated_uuids)
 -              drbd_print_uuids(mdev, "receiver updated UUIDs to");
 +              drbd_print_uuids(device, "receiver updated UUIDs to");
  
        return 0;
  }
@@@ -3812,40 -3751,38 +3802,40 @@@ static union drbd_state convert_state(u
        return ms;
  }
  
 -static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_req_state *p = pi->data;
        union drbd_state mask, val;
        enum drbd_state_rv rv;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
        mask.i = be32_to_cpu(p->mask);
        val.i = be32_to_cpu(p->val);
  
 -      if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) &&
 -          mutex_is_locked(mdev->state_mutex)) {
 -              drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
 +      if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
 +          mutex_is_locked(device->state_mutex)) {
 +              drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
                return 0;
        }
  
        mask = convert_state(mask);
        val = convert_state(val);
  
 -      rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
 -      drbd_send_sr_reply(mdev, rv);
 +      rv = drbd_change_state(device, CS_VERBOSE, mask, val);
 +      drbd_send_sr_reply(peer_device, rv);
  
 -      drbd_md_sync(mdev);
 +      drbd_md_sync(device);
  
        return 0;
  }
  
 -static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
  {
        struct p_req_state *p = pi->data;
        union drbd_state mask, val;
        mask.i = be32_to_cpu(p->mask);
        val.i = be32_to_cpu(p->val);
  
 -      if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) &&
 -          mutex_is_locked(&tconn->cstate_mutex)) {
 -              conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
 +      if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
 +          mutex_is_locked(&connection->cstate_mutex)) {
 +              conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
                return 0;
        }
  
        mask = convert_state(mask);
        val = convert_state(val);
  
 -      rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
 -      conn_send_sr_reply(tconn, rv);
 +      rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
 +      conn_send_sr_reply(connection, rv);
  
        return 0;
  }
  
 -static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_state *p = pi->data;
        union drbd_state os, ns, peer_state;
        enum drbd_disk_state real_peer_disk;
        enum chg_state_flags cs_flags;
        int rv;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 -              return config_unknown_volume(tconn, pi);
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
 +              return config_unknown_volume(connection, pi);
 +      device = peer_device->device;
  
        peer_state.i = be32_to_cpu(p->state);
  
        real_peer_disk = peer_state.disk;
        if (peer_state.disk == D_NEGOTIATING) {
 -              real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
 -              dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
 +              real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
 +              drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
        }
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 +      spin_lock_irq(&device->resource->req_lock);
   retry:
 -      os = ns = drbd_read_state(mdev);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      os = ns = drbd_read_state(device);
 +      spin_unlock_irq(&device->resource->req_lock);
  
        /* If some other part of the code (asender thread, timeout)
         * already decided to close the connection again,
                 * Maybe we should finish it up, too? */
                else if (os.conn >= C_SYNC_SOURCE &&
                         peer_state.conn == C_CONNECTED) {
 -                      if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
 -                              drbd_resync_finished(mdev);
 +                      if (drbd_bm_total_weight(device) <= device->rs_failed)
 +                              drbd_resync_finished(device);
                        return 0;
                }
        }
        /* explicit verify finished notification, stop sector reached. */
        if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
            peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
 -              ov_out_of_sync_print(mdev);
 -              drbd_resync_finished(mdev);
 +              ov_out_of_sync_print(device);
 +              drbd_resync_finished(device);
                return 0;
        }
  
        if (peer_state.conn == C_AHEAD)
                ns.conn = C_BEHIND;
  
 -      if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
 -          get_ldev_if_state(mdev, D_NEGOTIATING)) {
 +      if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
 +          get_ldev_if_state(device, D_NEGOTIATING)) {
                int cr; /* consider resync */
  
                /* if we established a new connection */
                        os.disk == D_NEGOTIATING));
                /* if we have both been inconsistent, and the peer has been
                 * forced to be UpToDate with --overwrite-data */
 -              cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
 +              cr |= test_bit(CONSIDER_RESYNC, &device->flags);
                /* if we had been plain connected, and the admin requested to
                 * start a sync by "invalidate" or "invalidate-remote" */
                cr |= (os.conn == C_CONNECTED &&
                                 peer_state.conn <= C_WF_BITMAP_T));
  
                if (cr)
 -                      ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
 +                      ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
  
 -              put_ldev(mdev);
 +              put_ldev(device);
                if (ns.conn == C_MASK) {
                        ns.conn = C_CONNECTED;
 -                      if (mdev->state.disk == D_NEGOTIATING) {
 -                              drbd_force_state(mdev, NS(disk, D_FAILED));
 +                      if (device->state.disk == D_NEGOTIATING) {
 +                              drbd_force_state(device, NS(disk, D_FAILED));
                        } else if (peer_state.disk == D_NEGOTIATING) {
 -                              dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
 +                              drbd_err(device, "Disk attach process on the peer node was aborted.\n");
                                peer_state.disk = D_DISKLESS;
                                real_peer_disk = D_DISKLESS;
                        } else {
 -                              if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
 +                              if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
                                        return -EIO;
 -                              D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
 -                              conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +                              D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
 +                              conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
                                return -EIO;
                        }
                }
        }
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      if (os.i != drbd_read_state(mdev).i)
 +      spin_lock_irq(&device->resource->req_lock);
 +      if (os.i != drbd_read_state(device).i)
                goto retry;
 -      clear_bit(CONSIDER_RESYNC, &mdev->flags);
 +      clear_bit(CONSIDER_RESYNC, &device->flags);
        ns.peer = peer_state.role;
        ns.pdsk = real_peer_disk;
        ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
        if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
 -              ns.disk = mdev->new_state_tmp.disk;
 +              ns.disk = device->new_state_tmp.disk;
        cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
 -      if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
 -          test_bit(NEW_CUR_UUID, &mdev->flags)) {
 +      if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
 +          test_bit(NEW_CUR_UUID, &device->flags)) {
                /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
                   for temporal network outages! */
 -              spin_unlock_irq(&mdev->tconn->req_lock);
 -              dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
 -              tl_clear(mdev->tconn);
 -              drbd_uuid_new_current(mdev);
 -              clear_bit(NEW_CUR_UUID, &mdev->flags);
 -              conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
 +              spin_unlock_irq(&device->resource->req_lock);
 +              drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
 +              tl_clear(peer_device->connection);
 +              drbd_uuid_new_current(device);
 +              clear_bit(NEW_CUR_UUID, &device->flags);
 +              conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
                return -EIO;
        }
 -      rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
 -      ns = drbd_read_state(mdev);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      rv = _drbd_set_state(device, ns, cs_flags, NULL);
 +      ns = drbd_read_state(device);
 +      spin_unlock_irq(&device->resource->req_lock);
  
        if (rv < SS_SUCCESS) {
 -              conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +              conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
                return -EIO;
        }
  
                        /* we want resync, peer has not yet decided to sync... */
                        /* Nowadays only used when forcing a node into primary role and
                           setting its disk to UpToDate with that */
 -                      drbd_send_uuids(mdev);
 -                      drbd_send_current_state(mdev);
 +                      drbd_send_uuids(peer_device);
 +                      drbd_send_current_state(peer_device);
                }
        }
  
 -      clear_bit(DISCARD_MY_DATA, &mdev->flags);
 +      clear_bit(DISCARD_MY_DATA, &device->flags);
  
 -      drbd_md_sync(mdev); /* update connected indicator, la_size_sect, ... */
 +      drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
  
        return 0;
  }
  
 -static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_rs_uuid *p = pi->data;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      wait_event(mdev->misc_wait,
 -                 mdev->state.conn == C_WF_SYNC_UUID ||
 -                 mdev->state.conn == C_BEHIND ||
 -                 mdev->state.conn < C_CONNECTED ||
 -                 mdev->state.disk < D_NEGOTIATING);
 +      wait_event(device->misc_wait,
 +                 device->state.conn == C_WF_SYNC_UUID ||
 +                 device->state.conn == C_BEHIND ||
 +                 device->state.conn < C_CONNECTED ||
 +                 device->state.disk < D_NEGOTIATING);
  
 -      /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
 +      /* D_ASSERT(device,  device->state.conn == C_WF_SYNC_UUID ); */
  
        /* Here the _drbd_uuid_ functions are right, current should
           _not_ be rotated into the history */
 -      if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
 -              _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
 -              _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
 +      if (get_ldev_if_state(device, D_NEGOTIATING)) {
 +              _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
 +              _drbd_uuid_set(device, UI_BITMAP, 0UL);
  
 -              drbd_print_uuids(mdev, "updated sync uuid");
 -              drbd_start_resync(mdev, C_SYNC_TARGET);
 +              drbd_print_uuids(device, "updated sync uuid");
 +              drbd_start_resync(device, C_SYNC_TARGET);
  
 -              put_ldev(mdev);
 +              put_ldev(device);
        } else
 -              dev_err(DEV, "Ignoring SyncUUID packet!\n");
 +              drbd_err(device, "Ignoring SyncUUID packet!\n");
  
        return 0;
  }
   * code upon failure.
   */
  static int
 -receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
 +receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
                     unsigned long *p, struct bm_xfer_ctx *c)
  {
        unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
 -                               drbd_header_size(mdev->tconn);
 +                               drbd_header_size(peer_device->connection);
        unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
                                       c->bm_words - c->word_offset);
        unsigned int want = num_words * sizeof(*p);
        int err;
  
        if (want != size) {
 -              dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
 +              drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
                return -EIO;
        }
        if (want == 0)
                return 0;
 -      err = drbd_recv_all(mdev->tconn, p, want);
 +      err = drbd_recv_all(peer_device->connection, p, want);
        if (err)
                return err;
  
 -      drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
 +      drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
  
        c->word_offset += num_words;
        c->bit_offset = c->word_offset * BITS_PER_LONG;
@@@ -4141,7 -4074,7 +4131,7 @@@ static int dcbp_get_pad_bits(struct p_c
   * code upon failure.
   */
  static int
 -recv_bm_rle_bits(struct drbd_conf *mdev,
 +recv_bm_rle_bits(struct drbd_peer_device *peer_device,
                struct p_compressed_bm *p,
                 struct bm_xfer_ctx *c,
                 unsigned int len)
                if (toggle) {
                        e = s + rl -1;
                        if (e >= c->bm_bits) {
 -                              dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
 +                              drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
                                return -EIO;
                        }
 -                      _drbd_bm_set_bits(mdev, s, e);
 +                      _drbd_bm_set_bits(peer_device->device, s, e);
                }
  
                if (have < bits) {
 -                      dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
 +                      drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
                                have, bits, look_ahead,
                                (unsigned int)(bs.cur.b - p->code),
                                (unsigned int)bs.buf_len);
   * code upon failure.
   */
  static int
 -decode_bitmap_c(struct drbd_conf *mdev,
 +decode_bitmap_c(struct drbd_peer_device *peer_device,
                struct p_compressed_bm *p,
                struct bm_xfer_ctx *c,
                unsigned int len)
  {
        if (dcbp_get_code(p) == RLE_VLI_Bits)
 -              return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
 +              return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
  
        /* other variants had been implemented for evaluation,
         * but have been dropped as this one turned out to be "best"
         * during all our tests. */
  
 -      dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
 -      conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
 +      drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
 +      conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
        return -EIO;
  }
  
 -void INFO_bm_xfer_stats(struct drbd_conf *mdev,
 +void INFO_bm_xfer_stats(struct drbd_device *device,
                const char *direction, struct bm_xfer_ctx *c)
  {
        /* what would it take to transfer it "plaintext" */
 -      unsigned int header_size = drbd_header_size(mdev->tconn);
 +      unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
        unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
        unsigned int plain =
                header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
                r = 1000;
  
        r = 1000 - r;
 -      dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
 +      drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
             "total %u; compression: %u.%u%%\n",
                        direction,
                        c->bytes[1], c->packets[1],
     in order to be agnostic to the 32 vs 64 bits issue.
  
     returns 0 on failure, 1 if we successfully received it. */
 -static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct bm_xfer_ctx c;
        int err;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
 +      drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
        /* you are supposed to send additional out-of-sync information
         * if you actually set bits during this phase */
  
        c = (struct bm_xfer_ctx) {
 -              .bm_bits = drbd_bm_bits(mdev),
 -              .bm_words = drbd_bm_words(mdev),
 +              .bm_bits = drbd_bm_bits(device),
 +              .bm_words = drbd_bm_words(device),
        };
  
        for(;;) {
                if (pi->cmd == P_BITMAP)
 -                      err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
 +                      err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
                else if (pi->cmd == P_COMPRESSED_BITMAP) {
                        /* MAYBE: sanity check that we speak proto >= 90,
                         * and the feature is enabled! */
                        struct p_compressed_bm *p = pi->data;
  
 -                      if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
 -                              dev_err(DEV, "ReportCBitmap packet too large\n");
 +                      if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
 +                              drbd_err(device, "ReportCBitmap packet too large\n");
                                err = -EIO;
                                goto out;
                        }
                        if (pi->size <= sizeof(*p)) {
 -                              dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
 +                              drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
                                err = -EIO;
                                goto out;
                        }
 -                      err = drbd_recv_all(mdev->tconn, p, pi->size);
 +                      err = drbd_recv_all(peer_device->connection, p, pi->size);
                        if (err)
                               goto out;
 -                      err = decode_bitmap_c(mdev, p, &c, pi->size);
 +                      err = decode_bitmap_c(peer_device, p, &c, pi->size);
                } else {
 -                      dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
 +                      drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
                        err = -EIO;
                        goto out;
                }
  
                c.packets[pi->cmd == P_BITMAP]++;
 -              c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
 +              c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
  
                if (err <= 0) {
                        if (err < 0)
                                goto out;
                        break;
                }
 -              err = drbd_recv_header(mdev->tconn, pi);
 +              err = drbd_recv_header(peer_device->connection, pi);
                if (err)
                        goto out;
        }
  
 -      INFO_bm_xfer_stats(mdev, "receive", &c);
 +      INFO_bm_xfer_stats(device, "receive", &c);
  
 -      if (mdev->state.conn == C_WF_BITMAP_T) {
 +      if (device->state.conn == C_WF_BITMAP_T) {
                enum drbd_state_rv rv;
  
 -              err = drbd_send_bitmap(mdev);
 +              err = drbd_send_bitmap(device);
                if (err)
                        goto out;
                /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
 -              rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
 -              D_ASSERT(rv == SS_SUCCESS);
 -      } else if (mdev->state.conn != C_WF_BITMAP_S) {
 +              rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
 +              D_ASSERT(device, rv == SS_SUCCESS);
 +      } else if (device->state.conn != C_WF_BITMAP_S) {
                /* admin may have requested C_DISCONNECTING,
                 * other threads may have noticed network errors */
 -              dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
 -                  drbd_conn_str(mdev->state.conn));
 +              drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
 +                  drbd_conn_str(device->state.conn));
        }
        err = 0;
  
   out:
 -      drbd_bm_unlock(mdev);
 -      if (!err && mdev->state.conn == C_WF_BITMAP_S)
 -              drbd_start_resync(mdev, C_SYNC_SOURCE);
 +      drbd_bm_unlock(device);
 +      if (!err && device->state.conn == C_WF_BITMAP_S)
 +              drbd_start_resync(device, C_SYNC_SOURCE);
        return err;
  }
  
 -static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
 +      drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
                 pi->cmd, pi->size);
  
 -      return ignore_remaining_packet(tconn, pi);
 +      return ignore_remaining_packet(connection, pi);
  }
  
 -static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
  {
        /* Make sure we've acked all the TCP data associated
         * with the data requests being unplugged */
 -      drbd_tcp_quickack(tconn->data.socket);
 +      drbd_tcp_quickack(connection->data.socket);
  
        return 0;
  }
  
 -static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_block_desc *p = pi->data;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      switch (mdev->state.conn) {
 +      switch (device->state.conn) {
        case C_WF_SYNC_UUID:
        case C_WF_BITMAP_T:
        case C_BEHIND:
                        break;
        default:
 -              dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
 -                              drbd_conn_str(mdev->state.conn));
 +              drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
 +                              drbd_conn_str(device->state.conn));
        }
  
 -      drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
 +      drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
  
        return 0;
  }
  struct data_cmd {
        int expect_payload;
        size_t pkt_size;
 -      int (*fn)(struct drbd_tconn *, struct packet_info *);
 +      int (*fn)(struct drbd_connection *, struct packet_info *);
  };
  
  static struct data_cmd drbd_cmd_handler[] = {
        [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
  };
  
 -static void drbdd(struct drbd_tconn *tconn)
 +static void drbdd(struct drbd_connection *connection)
  {
        struct packet_info pi;
        size_t shs; /* sub header size */
        int err;
  
 -      while (get_t_state(&tconn->receiver) == RUNNING) {
 +      while (get_t_state(&connection->receiver) == RUNNING) {
                struct data_cmd *cmd;
  
 -              drbd_thread_current_set_cpu(&tconn->receiver);
 -              if (drbd_recv_header(tconn, &pi))
 +              drbd_thread_current_set_cpu(&connection->receiver);
 +              if (drbd_recv_header(connection, &pi))
                        goto err_out;
  
                cmd = &drbd_cmd_handler[pi.cmd];
                if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
 -                      conn_err(tconn, "Unexpected data packet %s (0x%04x)",
 +                      drbd_err(connection, "Unexpected data packet %s (0x%04x)",
                                 cmdname(pi.cmd), pi.cmd);
                        goto err_out;
                }
  
                shs = cmd->pkt_size;
                if (pi.size > shs && !cmd->expect_payload) {
 -                      conn_err(tconn, "No payload expected %s l:%d\n",
 +                      drbd_err(connection, "No payload expected %s l:%d\n",
                                 cmdname(pi.cmd), pi.size);
                        goto err_out;
                }
  
                if (shs) {
 -                      err = drbd_recv_all_warn(tconn, pi.data, shs);
 +                      err = drbd_recv_all_warn(connection, pi.data, shs);
                        if (err)
                                goto err_out;
                        pi.size -= shs;
                }
  
 -              err = cmd->fn(tconn, &pi);
 +              err = cmd->fn(connection, &pi);
                if (err) {
 -                      conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
 +                      drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
                                 cmdname(pi.cmd), err, pi.size);
                        goto err_out;
                }
        return;
  
      err_out:
 -      conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
 -}
 -
 -void conn_flush_workqueue(struct drbd_tconn *tconn)
 -{
 -      struct drbd_wq_barrier barr;
 -
 -      barr.w.cb = w_prev_work_done;
 -      barr.w.tconn = tconn;
 -      init_completion(&barr.done);
 -      drbd_queue_work(&tconn->sender_work, &barr.w);
 -      wait_for_completion(&barr.done);
 +      conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
  }
  
 -static void conn_disconnect(struct drbd_tconn *tconn)
 +static void conn_disconnect(struct drbd_connection *connection)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
        enum drbd_conns oc;
        int vnr;
  
 -      if (tconn->cstate == C_STANDALONE)
 +      if (connection->cstate == C_STANDALONE)
                return;
  
        /* We are about to start the cleanup after connection loss.
         * Usually we should be in some network failure state already,
         * but just in case we are not, we fix it up here.
         */
 -      conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 +      conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
  
        /* asender does not clean up anything. it must not interfere, either */
 -      drbd_thread_stop(&tconn->asender);
 -      drbd_free_sock(tconn);
 +      drbd_thread_stop(&connection->asender);
 +      drbd_free_sock(connection);
  
        rcu_read_lock();
 -      idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 -              kref_get(&mdev->kref);
 +      idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 +              struct drbd_device *device = peer_device->device;
 +              kref_get(&device->kref);
                rcu_read_unlock();
 -              drbd_disconnected(mdev);
 -              kref_put(&mdev->kref, &drbd_minor_destroy);
 +              drbd_disconnected(peer_device);
 +              kref_put(&device->kref, drbd_destroy_device);
                rcu_read_lock();
        }
        rcu_read_unlock();
  
 -      if (!list_empty(&tconn->current_epoch->list))
 -              conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
 +      if (!list_empty(&connection->current_epoch->list))
 +              drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
        /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
 -      atomic_set(&tconn->current_epoch->epoch_size, 0);
 -      tconn->send.seen_any_write_yet = false;
 +      atomic_set(&connection->current_epoch->epoch_size, 0);
 +      connection->send.seen_any_write_yet = false;
  
 -      conn_info(tconn, "Connection closed\n");
 +      drbd_info(connection, "Connection closed\n");
  
 -      if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
 -              conn_try_outdate_peer_async(tconn);
 +      if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
 +              conn_try_outdate_peer_async(connection);
  
 -      spin_lock_irq(&tconn->req_lock);
 -      oc = tconn->cstate;
 +      spin_lock_irq(&connection->resource->req_lock);
 +      oc = connection->cstate;
        if (oc >= C_UNCONNECTED)
 -              _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
 +              _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
  
 -      spin_unlock_irq(&tconn->req_lock);
 +      spin_unlock_irq(&connection->resource->req_lock);
  
        if (oc == C_DISCONNECTING)
 -              conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
 +              conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
  }
  
 -static int drbd_disconnected(struct drbd_conf *mdev)
 +static int drbd_disconnected(struct drbd_peer_device *peer_device)
  {
 +      struct drbd_device *device = peer_device->device;
        unsigned int i;
  
        /* wait for current activity to cease. */
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
 -      _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
 -      _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_lock_irq(&device->resource->req_lock);
 +      _drbd_wait_ee_list_empty(device, &device->active_ee);
 +      _drbd_wait_ee_list_empty(device, &device->sync_ee);
 +      _drbd_wait_ee_list_empty(device, &device->read_ee);
 +      spin_unlock_irq(&device->resource->req_lock);
  
        /* We do not have data structures that would allow us to
         * get the rs_pending_cnt down to 0 again.
         *  resync_LRU. The resync_LRU tracks the whole operation including
         *  the disk-IO, while the rs_pending_cnt only tracks the blocks
         *  on the fly. */
 -      drbd_rs_cancel_all(mdev);
 -      mdev->rs_total = 0;
 -      mdev->rs_failed = 0;
 -      atomic_set(&mdev->rs_pending_cnt, 0);
 -      wake_up(&mdev->misc_wait);
 +      drbd_rs_cancel_all(device);
 +      device->rs_total = 0;
 +      device->rs_failed = 0;
 +      atomic_set(&device->rs_pending_cnt, 0);
 +      wake_up(&device->misc_wait);
  
 -      del_timer_sync(&mdev->resync_timer);
 -      resync_timer_fn((unsigned long)mdev);
 +      del_timer_sync(&device->resync_timer);
 +      resync_timer_fn((unsigned long)device);
  
        /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
         * w_make_resync_request etc. which may still be on the worker queue
         * to be "canceled" */
 -      drbd_flush_workqueue(mdev);
 +      drbd_flush_workqueue(&peer_device->connection->sender_work);
  
 -      drbd_finish_peer_reqs(mdev);
 +      drbd_finish_peer_reqs(device);
  
        /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
           might have issued a work again. The one before drbd_finish_peer_reqs() is
           necessary to reclain net_ee in drbd_finish_peer_reqs(). */
 -      drbd_flush_workqueue(mdev);
 +      drbd_flush_workqueue(&peer_device->connection->sender_work);
  
        /* need to do it again, drbd_finish_peer_reqs() may have populated it
         * again via drbd_try_clear_on_disk_bm(). */
 -      drbd_rs_cancel_all(mdev);
 +      drbd_rs_cancel_all(device);
  
 -      kfree(mdev->p_uuid);
 -      mdev->p_uuid = NULL;
 +      kfree(device->p_uuid);
 +      device->p_uuid = NULL;
  
 -      if (!drbd_suspended(mdev))
 -              tl_clear(mdev->tconn);
 +      if (!drbd_suspended(device))
 +              tl_clear(peer_device->connection);
  
 -      drbd_md_sync(mdev);
 +      drbd_md_sync(device);
  
        /* serialize with bitmap writeout triggered by the state change,
         * if any. */
 -      wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
 +      wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
  
        /* tcp_close and release of sendpage pages can be deferred.  I don't
         * want to use SO_LINGER, because apparently it can be deferred for
         * Actually we don't care for exactly when the network stack does its
         * put_page(), but release our reference on these pages right here.
         */
 -      i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
 +      i = drbd_free_peer_reqs(device, &device->net_ee);
        if (i)
 -              dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
 -      i = atomic_read(&mdev->pp_in_use_by_net);
 +              drbd_info(device, "net_ee not empty, killed %u entries\n", i);
 +      i = atomic_read(&device->pp_in_use_by_net);
        if (i)
 -              dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
 -      i = atomic_read(&mdev->pp_in_use);
 +              drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
 +      i = atomic_read(&device->pp_in_use);
        if (i)
 -              dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
 +              drbd_info(device, "pp_in_use = %d, expected 0\n", i);
  
 -      D_ASSERT(list_empty(&mdev->read_ee));
 -      D_ASSERT(list_empty(&mdev->active_ee));
 -      D_ASSERT(list_empty(&mdev->sync_ee));
 -      D_ASSERT(list_empty(&mdev->done_ee));
 +      D_ASSERT(device, list_empty(&device->read_ee));
 +      D_ASSERT(device, list_empty(&device->active_ee));
 +      D_ASSERT(device, list_empty(&device->sync_ee));
 +      D_ASSERT(device, list_empty(&device->done_ee));
  
        return 0;
  }
   *
   * for now, they are expected to be zero, but ignored.
   */
 -static int drbd_send_features(struct drbd_tconn *tconn)
 +static int drbd_send_features(struct drbd_connection *connection)
  {
        struct drbd_socket *sock;
        struct p_connection_features *p;
  
 -      sock = &tconn->data;
 -      p = conn_prepare_command(tconn, sock);
 +      sock = &connection->data;
 +      p = conn_prepare_command(connection, sock);
        if (!p)
                return -EIO;
        memset(p, 0, sizeof(*p));
        p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
        p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
 -      return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
 +      return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
  }
  
  /*
   *  -1 peer talks different language,
   *     no point in trying again, please go standalone.
   */
 -static int drbd_do_features(struct drbd_tconn *tconn)
 +static int drbd_do_features(struct drbd_connection *connection)
  {
 -      /* ASSERT current == tconn->receiver ... */
 +      /* ASSERT current == connection->receiver ... */
        struct p_connection_features *p;
        const int expect = sizeof(struct p_connection_features);
        struct packet_info pi;
        int err;
  
 -      err = drbd_send_features(tconn);
 +      err = drbd_send_features(connection);
        if (err)
                return 0;
  
 -      err = drbd_recv_header(tconn, &pi);
 +      err = drbd_recv_header(connection, &pi);
        if (err)
                return 0;
  
        if (pi.cmd != P_CONNECTION_FEATURES) {
 -              conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
 +              drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
                         cmdname(pi.cmd), pi.cmd);
                return -1;
        }
  
        if (pi.size != expect) {
 -              conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
 +              drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
                     expect, pi.size);
                return -1;
        }
  
        p = pi.data;
 -      err = drbd_recv_all_warn(tconn, p, expect);
 +      err = drbd_recv_all_warn(connection, p, expect);
        if (err)
                return 0;
  
            PRO_VERSION_MIN > p->protocol_max)
                goto incompat;
  
 -      tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
 +      connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
  
 -      conn_info(tconn, "Handshake successful: "
 -           "Agreed network protocol version %d\n", tconn->agreed_pro_version);
 +      drbd_info(connection, "Handshake successful: "
 +           "Agreed network protocol version %d\n", connection->agreed_pro_version);
  
        return 1;
  
   incompat:
 -      conn_err(tconn, "incompatible DRBD dialects: "
 +      drbd_err(connection, "incompatible DRBD dialects: "
            "I support %d-%d, peer supports %d-%d\n",
            PRO_VERSION_MIN, PRO_VERSION_MAX,
            p->protocol_min, p->protocol_max);
  }
  
  #if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
 -static int drbd_do_auth(struct drbd_tconn *tconn)
 +static int drbd_do_auth(struct drbd_connection *connection)
  {
 -      conn_err(tconn, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
 -      conn_err(tconn, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
 +      drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
 +      drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
        return -1;
  }
  #else
        -1 - auth failed, don't try again.
  */
  
 -static int drbd_do_auth(struct drbd_tconn *tconn)
 +static int drbd_do_auth(struct drbd_connection *connection)
  {
        struct drbd_socket *sock;
        char my_challenge[CHALLENGE_LEN];  /* 64 Bytes... */
        /* FIXME: Put the challenge/response into the preallocated socket buffer.  */
  
        rcu_read_lock();
 -      nc = rcu_dereference(tconn->net_conf);
 +      nc = rcu_dereference(connection->net_conf);
        key_len = strlen(nc->shared_secret);
        memcpy(secret, nc->shared_secret, key_len);
        rcu_read_unlock();
  
 -      desc.tfm = tconn->cram_hmac_tfm;
 +      desc.tfm = connection->cram_hmac_tfm;
        desc.flags = 0;
  
 -      rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
 +      rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
        if (rv) {
 -              conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
 +              drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
                rv = -1;
                goto fail;
        }
  
        get_random_bytes(my_challenge, CHALLENGE_LEN);
  
 -      sock = &tconn->data;
 -      if (!conn_prepare_command(tconn, sock)) {
 +      sock = &connection->data;
 +      if (!conn_prepare_command(connection, sock)) {
                rv = 0;
                goto fail;
        }
 -      rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
 +      rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
                                my_challenge, CHALLENGE_LEN);
        if (!rv)
                goto fail;
  
 -      err = drbd_recv_header(tconn, &pi);
 +      err = drbd_recv_header(connection, &pi);
        if (err) {
                rv = 0;
                goto fail;
        }
  
        if (pi.cmd != P_AUTH_CHALLENGE) {
 -              conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
 +              drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
                         cmdname(pi.cmd), pi.cmd);
                rv = 0;
                goto fail;
        }
  
        if (pi.size > CHALLENGE_LEN * 2) {
 -              conn_err(tconn, "expected AuthChallenge payload too big.\n");
 +              drbd_err(connection, "expected AuthChallenge payload too big.\n");
                rv = -1;
                goto fail;
        }
  
        peers_ch = kmalloc(pi.size, GFP_NOIO);
        if (peers_ch == NULL) {
 -              conn_err(tconn, "kmalloc of peers_ch failed\n");
 +              drbd_err(connection, "kmalloc of peers_ch failed\n");
                rv = -1;
                goto fail;
        }
  
 -      err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
 +      err = drbd_recv_all_warn(connection, peers_ch, pi.size);
        if (err) {
                rv = 0;
                goto fail;
        }
  
 -      resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
 +      resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
        response = kmalloc(resp_size, GFP_NOIO);
        if (response == NULL) {
 -              conn_err(tconn, "kmalloc of response failed\n");
 +              drbd_err(connection, "kmalloc of response failed\n");
                rv = -1;
                goto fail;
        }
  
        rv = crypto_hash_digest(&desc, &sg, sg.length, response);
        if (rv) {
 -              conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
 +              drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
                rv = -1;
                goto fail;
        }
  
 -      if (!conn_prepare_command(tconn, sock)) {
 +      if (!conn_prepare_command(connection, sock)) {
                rv = 0;
                goto fail;
        }
 -      rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
 +      rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
                                response, resp_size);
        if (!rv)
                goto fail;
  
 -      err = drbd_recv_header(tconn, &pi);
 +      err = drbd_recv_header(connection, &pi);
        if (err) {
                rv = 0;
                goto fail;
        }
  
        if (pi.cmd != P_AUTH_RESPONSE) {
 -              conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
 +              drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
                         cmdname(pi.cmd), pi.cmd);
                rv = 0;
                goto fail;
        }
  
        if (pi.size != resp_size) {
 -              conn_err(tconn, "expected AuthResponse payload of wrong size\n");
 +              drbd_err(connection, "expected AuthResponse payload of wrong size\n");
                rv = 0;
                goto fail;
        }
  
 -      err = drbd_recv_all_warn(tconn, response , resp_size);
 +      err = drbd_recv_all_warn(connection, response , resp_size);
        if (err) {
                rv = 0;
                goto fail;
  
        right_response = kmalloc(resp_size, GFP_NOIO);
        if (right_response == NULL) {
 -              conn_err(tconn, "kmalloc of right_response failed\n");
 +              drbd_err(connection, "kmalloc of right_response failed\n");
                rv = -1;
                goto fail;
        }
  
        rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
        if (rv) {
 -              conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
 +              drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
                rv = -1;
                goto fail;
        }
        rv = !memcmp(response, right_response, resp_size);
  
        if (rv)
 -              conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
 +              drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
                     resp_size);
        else
                rv = -1;
  }
  #endif
  
 -int drbdd_init(struct drbd_thread *thi)
 +int drbd_receiver(struct drbd_thread *thi)
  {
 -      struct drbd_tconn *tconn = thi->tconn;
 +      struct drbd_connection *connection = thi->connection;
        int h;
  
 -      conn_info(tconn, "receiver (re)started\n");
 +      drbd_info(connection, "receiver (re)started\n");
  
        do {
 -              h = conn_connect(tconn);
 +              h = conn_connect(connection);
                if (h == 0) {
 -                      conn_disconnect(tconn);
 +                      conn_disconnect(connection);
                        schedule_timeout_interruptible(HZ);
                }
                if (h == -1) {
 -                      conn_warn(tconn, "Discarding network configuration.\n");
 -                      conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +                      drbd_warn(connection, "Discarding network configuration.\n");
 +                      conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
                }
        } while (h == 0);
  
        if (h > 0)
 -              drbdd(tconn);
 +              drbdd(connection);
  
 -      conn_disconnect(tconn);
 +      conn_disconnect(connection);
  
 -      conn_info(tconn, "receiver terminated\n");
 +      drbd_info(connection, "receiver terminated\n");
        return 0;
  }
  
  /* ********* acknowledge sender ******** */
  
 -static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
  {
        struct p_req_state_reply *p = pi->data;
        int retcode = be32_to_cpu(p->retcode);
  
        if (retcode >= SS_SUCCESS) {
 -              set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
 +              set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
        } else {
 -              set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
 -              conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
 +              set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
 +              drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
                         drbd_set_st_err_str(retcode), retcode);
        }
 -      wake_up(&tconn->ping_wait);
 +      wake_up(&connection->ping_wait);
  
        return 0;
  }
  
 -static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_req_state_reply *p = pi->data;
        int retcode = be32_to_cpu(p->retcode);
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
 -              D_ASSERT(tconn->agreed_pro_version < 100);
 -              return got_conn_RqSReply(tconn, pi);
 +      if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
 +              D_ASSERT(device, connection->agreed_pro_version < 100);
 +              return got_conn_RqSReply(connection, pi);
        }
  
        if (retcode >= SS_SUCCESS) {
 -              set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
 +              set_bit(CL_ST_CHG_SUCCESS, &device->flags);
        } else {
 -              set_bit(CL_ST_CHG_FAIL, &mdev->flags);
 -              dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
 +              set_bit(CL_ST_CHG_FAIL, &device->flags);
 +              drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
                        drbd_set_st_err_str(retcode), retcode);
        }
 -      wake_up(&mdev->state_wait);
 +      wake_up(&device->state_wait);
  
        return 0;
  }
  
 -static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      return drbd_send_ping_ack(tconn);
 +      return drbd_send_ping_ack(connection);
  
  }
  
 -static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
  {
        /* restore idle timeout */
 -      tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
 -      if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
 -              wake_up(&tconn->ping_wait);
 +      connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
 +      if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
 +              wake_up(&connection->ping_wait);
  
        return 0;
  }
  
 -static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_block_ack *p = pi->data;
        sector_t sector = be64_to_cpu(p->sector);
        int blksize = be32_to_cpu(p->blksize);
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
 +      D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
  
 -      update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 +      update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
  
 -      if (get_ldev(mdev)) {
 -              drbd_rs_complete_io(mdev, sector);
 -              drbd_set_in_sync(mdev, sector, blksize);
 +      if (get_ldev(device)) {
 +              drbd_rs_complete_io(device, sector);
 +              drbd_set_in_sync(device, sector, blksize);
                /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
 -              mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
 -              put_ldev(mdev);
 +              device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
 +              put_ldev(device);
        }
 -      dec_rs_pending(mdev);
 -      atomic_add(blksize >> 9, &mdev->rs_sect_in);
 +      dec_rs_pending(device);
 +      atomic_add(blksize >> 9, &device->rs_sect_in);
  
        return 0;
  }
  
  static int
 -validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
 +validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
                              struct rb_root *root, const char *func,
                              enum drbd_req_event what, bool missing_ok)
  {
        struct drbd_request *req;
        struct bio_and_error m;
  
 -      spin_lock_irq(&mdev->tconn->req_lock);
 -      req = find_request(mdev, root, id, sector, missing_ok, func);
 +      spin_lock_irq(&device->resource->req_lock);
 +      req = find_request(device, root, id, sector, missing_ok, func);
        if (unlikely(!req)) {
 -              spin_unlock_irq(&mdev->tconn->req_lock);
 +              spin_unlock_irq(&device->resource->req_lock);
                return -EIO;
        }
        __req_mod(req, what, &m);
 -      spin_unlock_irq(&mdev->tconn->req_lock);
 +      spin_unlock_irq(&device->resource->req_lock);
  
        if (m.bio)
 -              complete_master_bio(mdev, &m);
 +              complete_master_bio(device, &m);
        return 0;
  }
  
 -static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_block_ack *p = pi->data;
        sector_t sector = be64_to_cpu(p->sector);
        int blksize = be32_to_cpu(p->blksize);
        enum drbd_req_event what;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 +      update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
  
        if (p->block_id == ID_SYNCER) {
 -              drbd_set_in_sync(mdev, sector, blksize);
 -              dec_rs_pending(mdev);
 +              drbd_set_in_sync(device, sector, blksize);
 +              dec_rs_pending(device);
                return 0;
        }
        switch (pi->cmd) {
                BUG();
        }
  
 -      return validate_req_change_req_state(mdev, p->block_id, sector,
 -                                           &mdev->write_requests, __func__,
 +      return validate_req_change_req_state(device, p->block_id, sector,
 +                                           &device->write_requests, __func__,
                                             what, false);
  }
  
 -static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_block_ack *p = pi->data;
        sector_t sector = be64_to_cpu(p->sector);
        int size = be32_to_cpu(p->blksize);
        int err;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 +      update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
  
        if (p->block_id == ID_SYNCER) {
 -              dec_rs_pending(mdev);
 -              drbd_rs_failed_io(mdev, sector, size);
 +              dec_rs_pending(device);
 +              drbd_rs_failed_io(device, sector, size);
                return 0;
        }
  
 -      err = validate_req_change_req_state(mdev, p->block_id, sector,
 -                                          &mdev->write_requests, __func__,
 +      err = validate_req_change_req_state(device, p->block_id, sector,
 +                                          &device->write_requests, __func__,
                                            NEG_ACKED, true);
        if (err) {
                /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
                   request is no longer in the collision hash. */
                /* In Protocol B we might already have got a P_RECV_ACK
                   but then get a P_NEG_ACK afterwards. */
 -              drbd_set_out_of_sync(mdev, sector, size);
 +              drbd_set_out_of_sync(device, sector, size);
        }
        return 0;
  }
  
 -static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_block_ack *p = pi->data;
        sector_t sector = be64_to_cpu(p->sector);
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
 -      update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 +      update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
  
 -      dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
 +      drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
            (unsigned long long)sector, be32_to_cpu(p->blksize));
  
 -      return validate_req_change_req_state(mdev, p->block_id, sector,
 -                                           &mdev->read_requests, __func__,
 +      return validate_req_change_req_state(device, p->block_id, sector,
 +                                           &device->read_requests, __func__,
                                             NEG_ACKED, false);
  }
  
 -static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        sector_t sector;
        int size;
        struct p_block_ack *p = pi->data;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
        sector = be64_to_cpu(p->sector);
        size = be32_to_cpu(p->blksize);
  
 -      update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 +      update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
  
 -      dec_rs_pending(mdev);
 +      dec_rs_pending(device);
  
 -      if (get_ldev_if_state(mdev, D_FAILED)) {
 -              drbd_rs_complete_io(mdev, sector);
 +      if (get_ldev_if_state(device, D_FAILED)) {
 +              drbd_rs_complete_io(device, sector);
                switch (pi->cmd) {
                case P_NEG_RS_DREPLY:
 -                      drbd_rs_failed_io(mdev, sector, size);
 +                      drbd_rs_failed_io(device, sector, size);
                case P_RS_CANCEL:
                        break;
                default:
                        BUG();
                }
 -              put_ldev(mdev);
 +              put_ldev(device);
        }
  
        return 0;
  }
  
 -static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
  {
        struct p_barrier_ack *p = pi->data;
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
        int vnr;
  
 -      tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
 +      tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
  
        rcu_read_lock();
 -      idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 -              if (mdev->state.conn == C_AHEAD &&
 -                  atomic_read(&mdev->ap_in_flight) == 0 &&
 -                  !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
 -                      mdev->start_resync_timer.expires = jiffies + HZ;
 -                      add_timer(&mdev->start_resync_timer);
 +      idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 +              struct drbd_device *device = peer_device->device;
 +
 +              if (device->state.conn == C_AHEAD &&
 +                  atomic_read(&device->ap_in_flight) == 0 &&
 +                  !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
 +                      device->start_resync_timer.expires = jiffies + HZ;
 +                      add_timer(&device->start_resync_timer);
                }
        }
        rcu_read_unlock();
        return 0;
  }
  
 -static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
 +      struct drbd_device *device;
        struct p_block_ack *p = pi->data;
 -      struct drbd_work *w;
 +      struct drbd_device_work *dw;
        sector_t sector;
        int size;
  
 -      mdev = vnr_to_mdev(tconn, pi->vnr);
 -      if (!mdev)
 +      peer_device = conn_peer_device(connection, pi->vnr);
 +      if (!peer_device)
                return -EIO;
 +      device = peer_device->device;
  
        sector = be64_to_cpu(p->sector);
        size = be32_to_cpu(p->blksize);
  
 -      update_peer_seq(mdev, be32_to_cpu(p->seq_num));
 +      update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
  
        if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
 -              drbd_ov_out_of_sync_found(mdev, sector, size);
 +              drbd_ov_out_of_sync_found(device, sector, size);
        else
 -              ov_out_of_sync_print(mdev);
 +              ov_out_of_sync_print(device);
  
 -      if (!get_ldev(mdev))
 +      if (!get_ldev(device))
                return 0;
  
 -      drbd_rs_complete_io(mdev, sector);
 -      dec_rs_pending(mdev);
 +      drbd_rs_complete_io(device, sector);
 +      dec_rs_pending(device);
  
 -      --mdev->ov_left;
 +      --device->ov_left;
  
        /* let's advance progress step marks only for every other megabyte */
 -      if ((mdev->ov_left & 0x200) == 0x200)
 -              drbd_advance_rs_marks(mdev, mdev->ov_left);
 -
 -      if (mdev->ov_left == 0) {
 -              w = kmalloc(sizeof(*w), GFP_NOIO);
 -              if (w) {
 -                      w->cb = w_ov_finished;
 -                      w->mdev = mdev;
 -                      drbd_queue_work(&mdev->tconn->sender_work, w);
 +      if ((device->ov_left & 0x200) == 0x200)
 +              drbd_advance_rs_marks(device, device->ov_left);
 +
 +      if (device->ov_left == 0) {
 +              dw = kmalloc(sizeof(*dw), GFP_NOIO);
 +              if (dw) {
 +                      dw->w.cb = w_ov_finished;
 +                      dw->device = device;
 +                      drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
                } else {
 -                      dev_err(DEV, "kmalloc(w) failed.");
 -                      ov_out_of_sync_print(mdev);
 -                      drbd_resync_finished(mdev);
 +                      drbd_err(device, "kmalloc(dw) failed.");
 +                      ov_out_of_sync_print(device);
 +                      drbd_resync_finished(device);
                }
        }
 -      put_ldev(mdev);
 +      put_ldev(device);
        return 0;
  }
  
 -static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
 +static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
  {
        return 0;
  }
  
 -static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
 +static int connection_finish_peer_reqs(struct drbd_connection *connection)
  {
 -      struct drbd_conf *mdev;
 +      struct drbd_peer_device *peer_device;
        int vnr, not_empty = 0;
  
        do {
 -              clear_bit(SIGNAL_ASENDER, &tconn->flags);
 +              clear_bit(SIGNAL_ASENDER, &connection->flags);
                flush_signals(current);
  
                rcu_read_lock();
 -              idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 -                      kref_get(&mdev->kref);
 +              idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 +                      struct drbd_device *device = peer_device->device;
 +                      kref_get(&device->kref);
                        rcu_read_unlock();
 -                      if (drbd_finish_peer_reqs(mdev)) {
 -                              kref_put(&mdev->kref, &drbd_minor_destroy);
 +                      if (drbd_finish_peer_reqs(device)) {
 +                              kref_put(&device->kref, drbd_destroy_device);
                                return 1;
                        }
 -                      kref_put(&mdev->kref, &drbd_minor_destroy);
 +                      kref_put(&device->kref, drbd_destroy_device);
                        rcu_read_lock();
                }
 -              set_bit(SIGNAL_ASENDER, &tconn->flags);
 +              set_bit(SIGNAL_ASENDER, &connection->flags);
  
 -              spin_lock_irq(&tconn->req_lock);
 -              idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 -                      not_empty = !list_empty(&mdev->done_ee);
 +              spin_lock_irq(&connection->resource->req_lock);
 +              idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
 +                      struct drbd_device *device = peer_device->device;
 +                      not_empty = !list_empty(&device->done_ee);
                        if (not_empty)
                                break;
                }
 -              spin_unlock_irq(&tconn->req_lock);
 +              spin_unlock_irq(&connection->resource->req_lock);
                rcu_read_unlock();
        } while (not_empty);
  
  
  struct asender_cmd {
        size_t pkt_size;
 -      int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
 +      int (*fn)(struct drbd_connection *connection, struct packet_info *);
  };
  
  static struct asender_cmd asender_tbl[] = {
  
  int drbd_asender(struct drbd_thread *thi)
  {
 -      struct drbd_tconn *tconn = thi->tconn;
 +      struct drbd_connection *connection = thi->connection;
        struct asender_cmd *cmd = NULL;
        struct packet_info pi;
        int rv;
 -      void *buf    = tconn->meta.rbuf;
 +      void *buf    = connection->meta.rbuf;
        int received = 0;
 -      unsigned int header_size = drbd_header_size(tconn);
 +      unsigned int header_size = drbd_header_size(connection);
        int expect   = header_size;
        bool ping_timeout_active = false;
        struct net_conf *nc;
  
        rv = sched_setscheduler(current, SCHED_RR, &param);
        if (rv < 0)
 -              conn_err(tconn, "drbd_asender: ERROR set priority, ret=%d\n", rv);
 +              drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
  
        while (get_t_state(thi) == RUNNING) {
                drbd_thread_current_set_cpu(thi);
  
                rcu_read_lock();
 -              nc = rcu_dereference(tconn->net_conf);
 +              nc = rcu_dereference(connection->net_conf);
                ping_timeo = nc->ping_timeo;
                tcp_cork = nc->tcp_cork;
                ping_int = nc->ping_int;
                rcu_read_unlock();
  
 -              if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
 -                      if (drbd_send_ping(tconn)) {
 -                              conn_err(tconn, "drbd_send_ping has failed\n");
 +              if (test_and_clear_bit(SEND_PING, &connection->flags)) {
 +                      if (drbd_send_ping(connection)) {
 +                              drbd_err(connection, "drbd_send_ping has failed\n");
                                goto reconnect;
                        }
 -                      tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
 +                      connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
                        ping_timeout_active = true;
                }
  
                /* TODO: conditionally cork; it may hurt latency if we cork without
                   much to send */
                if (tcp_cork)
 -                      drbd_tcp_cork(tconn->meta.socket);
 -              if (tconn_finish_peer_reqs(tconn)) {
 -                      conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
 +                      drbd_tcp_cork(connection->meta.socket);
 +              if (connection_finish_peer_reqs(connection)) {
 +                      drbd_err(connection, "connection_finish_peer_reqs() failed\n");
                        goto reconnect;
                }
                /* but unconditionally uncork unless disabled */
                if (tcp_cork)
 -                      drbd_tcp_uncork(tconn->meta.socket);
 +                      drbd_tcp_uncork(connection->meta.socket);
  
                /* short circuit, recv_msg would return EINTR anyways. */
                if (signal_pending(current))
                        continue;
  
 -              rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
 -              clear_bit(SIGNAL_ASENDER, &tconn->flags);
 +              rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
 +              clear_bit(SIGNAL_ASENDER, &connection->flags);
  
                flush_signals(current);
  
                        received += rv;
                        buf      += rv;
                } else if (rv == 0) {
 -                      if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
 +                      if (test_bit(DISCONNECT_SENT, &connection->flags)) {
                                long t;
                                rcu_read_lock();
 -                              t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
 +                              t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
                                rcu_read_unlock();
  
 -                              t = wait_event_timeout(tconn->ping_wait,
 -                                                     tconn->cstate < C_WF_REPORT_PARAMS,
 +                              t = wait_event_timeout(connection->ping_wait,
 +                                                     connection->cstate < C_WF_REPORT_PARAMS,
                                                       t);
                                if (t)
                                        break;
                        }
 -                      conn_err(tconn, "meta connection shut down by peer.\n");
 +                      drbd_err(connection, "meta connection shut down by peer.\n");
                        goto reconnect;
                } else if (rv == -EAGAIN) {
                        /* If the data socket received something meanwhile,
                         * that is good enough: peer is still alive. */
 -                      if (time_after(tconn->last_received,
 -                              jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
 +                      if (time_after(connection->last_received,
 +                              jiffies - connection->meta.socket->sk->sk_rcvtimeo))
                                continue;
                        if (ping_timeout_active) {
 -                              conn_err(tconn, "PingAck did not arrive in time.\n");
 +                              drbd_err(connection, "PingAck did not arrive in time.\n");
                                goto reconnect;
                        }
 -                      set_bit(SEND_PING, &tconn->flags);
 +                      set_bit(SEND_PING, &connection->flags);
                        continue;
                } else if (rv == -EINTR) {
                        continue;
                } else {
 -                      conn_err(tconn, "sock_recvmsg returned %d\n", rv);
 +                      drbd_err(connection, "sock_recvmsg returned %d\n", rv);
                        goto reconnect;
                }
  
                if (received == expect && cmd == NULL) {
 -                      if (decode_header(tconn, tconn->meta.rbuf, &pi))
 +                      if (decode_header(connection, connection->meta.rbuf, &pi))
                                goto reconnect;
                        cmd = &asender_tbl[pi.cmd];
                        if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
 -                              conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
 +                              drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
                                         cmdname(pi.cmd), pi.cmd);
                                goto disconnect;
                        }
                        expect = header_size + cmd->pkt_size;
                        if (pi.size != expect - header_size) {
 -                              conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
 +                              drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
                                        pi.cmd, pi.size);
                                goto reconnect;
                        }
                if (received == expect) {
                        bool err;
  
 -                      err = cmd->fn(tconn, &pi);
 +                      err = cmd->fn(connection, &pi);
                        if (err) {
 -                              conn_err(tconn, "%pf failed\n", cmd->fn);
 +                              drbd_err(connection, "%pf failed\n", cmd->fn);
                                goto reconnect;
                        }
  
 -                      tconn->last_received = jiffies;
 +                      connection->last_received = jiffies;
  
                        if (cmd == &asender_tbl[P_PING_ACK]) {
                                /* restore idle timeout */
 -                              tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
 +                              connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
                                ping_timeout_active = false;
                        }
  
 -                      buf      = tconn->meta.rbuf;
 +                      buf      = connection->meta.rbuf;
                        received = 0;
                        expect   = header_size;
                        cmd      = NULL;
  
        if (0) {
  reconnect:
 -              conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 -              conn_md_sync(tconn);
 +              conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
 +              conn_md_sync(connection);
        }
        if (0) {
  disconnect:
 -              conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
 +              conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
        }
 -      clear_bit(SIGNAL_ASENDER, &tconn->flags);
 +      clear_bit(SIGNAL_ASENDER, &connection->flags);
  
 -      conn_info(tconn, "asender terminated\n");
 +      drbd_info(connection, "asender terminated\n");
  
        return 0;
  }
index a54b506ba7ca030230e99d91092638f0bcfc6cb8,733c79e1f12dbdb0d45028dee57a8e7512e21cfe..b87b246111c0c9727709838c1860df6d0421fc00
@@@ -99,16 -99,7 +99,7 @@@ ksocknal_lib_send_iov (ksock_conn_t *co
                struct iovec   *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
                unsigned int    niov = tx->tx_niov;
  #endif
-               struct msghdr msg = {
-                       .msg_name       = NULL,
-                       .msg_namelen    = 0,
-                       .msg_iov        = scratchiov,
-                       .msg_iovlen     = niov,
-                       .msg_control    = NULL,
-                       .msg_controllen = 0,
-                       .msg_flags      = MSG_DONTWAIT
-               };
-               mm_segment_t oldmm = get_fs();
+               struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
                int  i;
  
                for (nob = i = 0; i < niov; i++) {
                    nob < tx->tx_resid)
                        msg.msg_flags |= MSG_MORE;
  
-               set_fs (KERNEL_DS);
-               rc = sock_sendmsg(sock, &msg, nob);
-               set_fs (oldmm);
+               rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
        }
        return rc;
  }
@@@ -174,16 -163,7 +163,7 @@@ ksocknal_lib_send_kiov (ksock_conn_t *c
                struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
                unsigned int  niov = tx->tx_nkiov;
  #endif
-               struct msghdr msg = {
-                       .msg_name       = NULL,
-                       .msg_namelen    = 0,
-                       .msg_iov        = scratchiov,
-                       .msg_iovlen     = niov,
-                       .msg_control    = NULL,
-                       .msg_controllen = 0,
-                       .msg_flags      = MSG_DONTWAIT
-               };
-               mm_segment_t  oldmm = get_fs();
+               struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
                int        i;
  
                for (nob = i = 0; i < niov; i++) {
                    nob < tx->tx_resid)
                        msg.msg_flags |= MSG_MORE;
  
-               set_fs (KERNEL_DS);
-               rc = sock_sendmsg(sock, &msg, nob);
-               set_fs (oldmm);
+               rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
  
                for (i = 0; i < niov; i++)
                        kunmap(kiov[i].kiov_page);
@@@ -237,15 -215,8 +215,8 @@@ ksocknal_lib_recv_iov (ksock_conn_t *co
  #endif
        struct iovec *iov = conn->ksnc_rx_iov;
        struct msghdr msg = {
-               .msg_name       = NULL,
-               .msg_namelen    = 0,
-               .msg_iov        = scratchiov,
-               .msg_iovlen     = niov,
-               .msg_control    = NULL,
-               .msg_controllen = 0,
                .msg_flags      = 0
        };
-       mm_segment_t oldmm = get_fs();
        int       nob;
        int       i;
        int       rc;
        }
        LASSERT (nob <= conn->ksnc_rx_nob_wanted);
  
-       set_fs (KERNEL_DS);
-       rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
-       /* NB this is just a boolean..........................^ */
-       set_fs (oldmm);
+       rc = kernel_recvmsg(conn->ksnc_sock, &msg,
+               (struct kvec *)scratchiov, niov, nob, MSG_DONTWAIT);
  
        saved_csum = 0;
        if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
@@@ -355,14 -324,8 +324,8 @@@ ksocknal_lib_recv_kiov (ksock_conn_t *c
  #endif
        lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
        struct msghdr msg = {
-               .msg_name       = NULL,
-               .msg_namelen    = 0,
-               .msg_iov        = scratchiov,
-               .msg_control    = NULL,
-               .msg_controllen = 0,
                .msg_flags      = 0
        };
-       mm_segment_t oldmm = get_fs();
        int       nob;
        int       i;
        int       rc;
        void    *addr;
        int       sum;
        int       fragnob;
+       int n;
  
        /* NB we can't trust socket ops to either consume our iovs
         * or leave them alone. */
 -      if ((addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages)) != NULL) {
 +      addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages);
 +      if (addr != NULL) {
                nob = scratchiov[0].iov_len;
-               msg.msg_iovlen = 1;
+               n = 1;
  
        } else {
                for (nob = i = 0; i < niov; i++) {
                        scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
                                                 kiov[i].kiov_offset;
                }
-               msg.msg_iovlen = niov;
+               n = niov;
        }
  
        LASSERT (nob <= conn->ksnc_rx_nob_wanted);
  
-       set_fs (KERNEL_DS);
-       rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
-       /* NB this is just a boolean.......................^ */
-       set_fs (oldmm);
+       rc = kernel_recvmsg(conn->ksnc_sock, &msg,
+                       (struct kvec *)scratchiov, n, nob, MSG_DONTWAIT);
  
        if (conn->ksnc_msg.ksm_csum != 0) {
                for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
index 773d8ca07a004c6ec7972ae62f9be9d266c366a4,2e2ccefb9c2b4630ba90b0cc953809f939253b35..de692d7011a5cc4937298e5d3d697f519f0c9d04
@@@ -86,19 -86,16 +86,19 @@@ static ssize_t store_sockfd(struct devi
        struct stub_device *sdev = dev_get_drvdata(dev);
        int sockfd = 0;
        struct socket *socket;
-       ssize_t err = -EINVAL;
 +      int rv;
  
        if (!sdev) {
                dev_err(dev, "sdev is null\n");
                return -ENODEV;
        }
  
 -      sscanf(buf, "%d", &sockfd);
 +      rv = sscanf(buf, "%d", &sockfd);
 +      if (rv != 1)
 +              return -EINVAL;
  
        if (sockfd != -1) {
+               int err;
                dev_info(dev, "stub up\n");
  
                spin_lock_irq(&sdev->ud.lock);
                        goto err;
                }
  
-               socket = sockfd_to_socket(sockfd);
+               socket = sockfd_lookup(sockfd, &err);
                if (!socket)
                        goto err;
  
  
  err:
        spin_unlock_irq(&sdev->ud.lock);
-       return err;
+       return -EINVAL;
  }
  static DEVICE_ATTR(usbip_sockfd, S_IWUSR, NULL, store_sockfd);
  
@@@ -211,7 -208,7 +211,7 @@@ static void stub_shutdown_connection(st
         * not touch NULL socket.
         */
        if (ud->tcp_socket) {
-               fput(ud->tcp_socket->file);
+               sockfd_put(ud->tcp_socket);
                ud->tcp_socket = NULL;
        }
  
@@@ -282,19 -279,21 +282,19 @@@ static void stub_device_unusable(struc
   *
   * Allocates and initializes a new stub_device struct.
   */
 -static struct stub_device *stub_device_alloc(struct usb_device *udev,
 -                                           struct usb_interface *interface)
 +static struct stub_device *stub_device_alloc(struct usb_device *udev)
  {
        struct stub_device *sdev;
 -      int busnum = interface_to_busnum(interface);
 -      int devnum = interface_to_devnum(interface);
 +      int busnum = udev->bus->busnum;
 +      int devnum = udev->devnum;
  
 -      dev_dbg(&interface->dev, "allocating stub device");
 +      dev_dbg(&udev->dev, "allocating stub device");
  
        /* yes, it's a new device */
        sdev = kzalloc(sizeof(struct stub_device), GFP_KERNEL);
        if (!sdev)
                return NULL;
  
 -      sdev->interface = usb_get_intf(interface);
        sdev->udev = usb_get_dev(udev);
  
        /*
  
        usbip_start_eh(&sdev->ud);
  
 -      dev_dbg(&interface->dev, "register new interface\n");
 +      dev_dbg(&udev->dev, "register new device\n");
  
        return sdev;
  }
@@@ -333,21 -332,32 +333,21 @@@ static void stub_device_free(struct stu
        kfree(sdev);
  }
  
 -/*
 - * If a usb device has multiple active interfaces, this driver is bound to all
 - * the active interfaces. However, usbip exports *a* usb device (i.e., not *an*
 - * active interface). Currently, a userland program must ensure that it
 - * looks at the usbip's sysfs entries of only the first active interface.
 - *
 - * TODO: use "struct usb_device_driver" to bind a usb device.
 - * However, it seems it is not fully supported in mainline kernel yet
 - * (2.6.19.2).
 - */
 -static int stub_probe(struct usb_interface *interface,
 -                    const struct usb_device_id *id)
 +static int stub_probe(struct usb_device *udev)
  {
 -      struct usb_device *udev = interface_to_usbdev(interface);
        struct stub_device *sdev = NULL;
 -      const char *udev_busid = dev_name(interface->dev.parent);
 +      const char *udev_busid = dev_name(&udev->dev);
        int err = 0;
        struct bus_id_priv *busid_priv;
 +      int rc;
  
 -      dev_dbg(&interface->dev, "Enter\n");
 +      dev_dbg(&udev->dev, "Enter\n");
  
        /* check we should claim or not by busid_table */
        busid_priv = get_busid_priv(udev_busid);
        if (!busid_priv || (busid_priv->status == STUB_BUSID_REMOV) ||
            (busid_priv->status == STUB_BUSID_OTHER)) {
 -              dev_info(&interface->dev,
 +              dev_info(&udev->dev,
                        "%s is not in match_busid table... skip!\n",
                        udev_busid);
  
                return -ENODEV;
        }
  
 -      if (busid_priv->status == STUB_BUSID_ALLOC) {
 -              sdev = busid_priv->sdev;
 -              if (!sdev)
 -                      return -ENODEV;
 -
 -              busid_priv->interf_count++;
 -              dev_info(&interface->dev,
 -                      "usbip-host: register new interface (bus %u dev %u ifn %u)\n",
 -                      udev->bus->busnum, udev->devnum,
 -                      interface->cur_altsetting->desc.bInterfaceNumber);
 -
 -              /* set private data to usb_interface */
 -              usb_set_intfdata(interface, sdev);
 -
 -              err = stub_add_files(&interface->dev);
 -              if (err) {
 -                      dev_err(&interface->dev, "stub_add_files for %s\n",
 -                              udev_busid);
 -                      usb_set_intfdata(interface, NULL);
 -                      busid_priv->interf_count--;
 -                      return err;
 -              }
 -
 -              usb_get_intf(interface);
 -              return 0;
 -      }
 -
        /* ok, this is my device */
 -      sdev = stub_device_alloc(udev, interface);
 +      sdev = stub_device_alloc(udev);
        if (!sdev)
                return -ENOMEM;
  
 -      dev_info(&interface->dev,
 -              "usbip-host: register new device (bus %u dev %u ifn %u)\n",
 -              udev->bus->busnum, udev->devnum,
 -              interface->cur_altsetting->desc.bInterfaceNumber);
 +      dev_info(&udev->dev,
 +              "usbip-host: register new device (bus %u dev %u)\n",
 +              udev->bus->busnum, udev->devnum);
  
 -      busid_priv->interf_count = 0;
        busid_priv->shutdown_busid = 0;
  
 -      /* set private data to usb_interface */
 -      usb_set_intfdata(interface, sdev);
 -      busid_priv->interf_count++;
 +      /* set private data to usb_device */
 +      dev_set_drvdata(&udev->dev, sdev);
        busid_priv->sdev = sdev;
 +      busid_priv->udev = udev;
 +
 +      /*
 +       * Claim this hub port.
 +       * It doesn't matter what value we pass as owner
 +       * (struct dev_state) as long as it is unique.
 +       */
 +      rc = usb_hub_claim_port(udev->parent, udev->portnum,
 +                      (struct usb_dev_state *) udev);
 +      if (rc) {
 +              dev_dbg(&udev->dev, "unable to claim port\n");
 +              return rc;
 +      }
  
 -      err = stub_add_files(&interface->dev);
 +      err = stub_add_files(&udev->dev);
        if (err) {
 -              dev_err(&interface->dev, "stub_add_files for %s\n", udev_busid);
 -              usb_set_intfdata(interface, NULL);
 -              usb_put_intf(interface);
 +              dev_err(&udev->dev, "stub_add_files for %s\n", udev_busid);
 +              dev_set_drvdata(&udev->dev, NULL);
                usb_put_dev(udev);
                kthread_stop_put(sdev->ud.eh);
  
 -              busid_priv->interf_count = 0;
                busid_priv->sdev = NULL;
                stub_device_free(sdev);
                return err;
@@@ -432,14 -461,13 +432,14 @@@ static void shutdown_busid(struct bus_i
   * called in usb_disconnect() or usb_deregister()
   * but only if actconfig(active configuration) exists
   */
 -static void stub_disconnect(struct usb_interface *interface)
 +static void stub_disconnect(struct usb_device *udev)
  {
        struct stub_device *sdev;
 -      const char *udev_busid = dev_name(interface->dev.parent);
 +      const char *udev_busid = dev_name(&udev->dev);
        struct bus_id_priv *busid_priv;
 +      int rc;
  
 -      dev_dbg(&interface->dev, "Enter\n");
 +      dev_dbg(&udev->dev, "Enter\n");
  
        busid_priv = get_busid_priv(udev_busid);
        if (!busid_priv) {
                return;
        }
  
 -      sdev = usb_get_intfdata(interface);
 +      sdev = dev_get_drvdata(&udev->dev);
  
        /* get stub_device */
        if (!sdev) {
 -              dev_err(&interface->dev, "could not get device");
 +              dev_err(&udev->dev, "could not get device");
                return;
        }
  
 -      usb_set_intfdata(interface, NULL);
 +      dev_set_drvdata(&udev->dev, NULL);
  
        /*
         * NOTE: rx/tx threads are invoked for each usb_device.
         */
 -      stub_remove_files(&interface->dev);
 +      stub_remove_files(&udev->dev);
  
 -      /* If usb reset is called from event handler */
 -      if (busid_priv->sdev->ud.eh == current) {
 -              busid_priv->interf_count--;
 +      /* release port */
 +      rc = usb_hub_release_port(udev->parent, udev->portnum,
 +                                (struct usb_dev_state *) udev);
 +      if (rc) {
 +              dev_dbg(&udev->dev, "unable to release port\n");
                return;
        }
  
 -      if (busid_priv->interf_count > 1) {
 -              busid_priv->interf_count--;
 -              shutdown_busid(busid_priv);
 -              usb_put_intf(interface);
 +      /* If usb reset is called from event handler */
 +      if (busid_priv->sdev->ud.eh == current)
                return;
 -      }
 -
 -      busid_priv->interf_count = 0;
  
        /* shutdown the current connection */
        shutdown_busid(busid_priv);
  
        usb_put_dev(sdev->udev);
 -      usb_put_intf(interface);
  
        /* free sdev */
        busid_priv->sdev = NULL;
        }
  }
  
 -/*
 - * Presence of pre_reset and post_reset prevents the driver from being unbound
 - * when the device is being reset
 - */
 +#ifdef CONFIG_PM
  
 -static int stub_pre_reset(struct usb_interface *interface)
 +/* These functions need usb_port_suspend and usb_port_resume,
 + * which reside in drivers/usb/core/usb.h. Skip for now. */
 +
 +static int stub_suspend(struct usb_device *udev, pm_message_t message)
  {
 -      dev_dbg(&interface->dev, "pre_reset\n");
 +      dev_dbg(&udev->dev, "stub_suspend\n");
 +
        return 0;
  }
  
 -static int stub_post_reset(struct usb_interface *interface)
 +static int stub_resume(struct usb_device *udev, pm_message_t message)
  {
 -      dev_dbg(&interface->dev, "post_reset\n");
 +      dev_dbg(&udev->dev, "stub_resume\n");
 +
        return 0;
  }
  
 -struct usb_driver stub_driver = {
 +#endif        /* CONFIG_PM */
 +
 +struct usb_device_driver stub_driver = {
        .name           = "usbip-host",
        .probe          = stub_probe,
        .disconnect     = stub_disconnect,
 -      .id_table       = stub_table,
 -      .pre_reset      = stub_pre_reset,
 -      .post_reset     = stub_post_reset,
 +#ifdef CONFIG_PM
 +      .suspend        = stub_suspend,
 +      .resume         = stub_resume,
 +#endif
 +      .supports_autosuspend   =       0,
  };
index 184fa70365db3e32a37f55efc819858b431578d6,e010939ebb1242991264ab29da33a422e526d7b6..facaaf003f19931b2f15603568bb565f3de40607
@@@ -55,8 -55,7 +55,8 @@@ static ssize_t usbip_debug_store(struc
                                 struct device_attribute *attr, const char *buf,
                                 size_t count)
  {
 -      sscanf(buf, "%lx", &usbip_debug_flag);
 +      if (sscanf(buf, "%lx", &usbip_debug_flag) != 1)
 +              return -EINVAL;
        return count;
  }
  DEVICE_ATTR_RW(usbip_debug);
@@@ -100,8 -99,26 +100,8 @@@ static void usbip_dump_usb_device(struc
        struct device *dev = &udev->dev;
        int i;
  
 -      dev_dbg(dev, "       devnum(%d) devpath(%s) ",
 -              udev->devnum, udev->devpath);
 -
 -      switch (udev->speed) {
 -      case USB_SPEED_HIGH:
 -              pr_debug("SPD_HIGH ");
 -              break;
 -      case USB_SPEED_FULL:
 -              pr_debug("SPD_FULL ");
 -              break;
 -      case USB_SPEED_LOW:
 -              pr_debug("SPD_LOW ");
 -              break;
 -      case USB_SPEED_UNKNOWN:
 -              pr_debug("SPD_UNKNOWN ");
 -              break;
 -      default:
 -              pr_debug("SPD_ERROR ");
 -              break;
 -      }
 +      dev_dbg(dev, "       devnum(%d) devpath(%s) usb speed(%s)",
 +              udev->devnum, udev->devpath, usb_speed_string(udev->speed));
  
        pr_debug("tt %p, ttport %d\n", udev->tt, udev->ttport);
  
@@@ -178,8 -195,8 +178,8 @@@ static void usbip_dump_usb_ctrlrequest(
        }
  
        pr_debug("       ");
 -      pr_debug("bRequestType(%02X) bRequest(%02X) wValue(%04X) wIndex(%04X) "
 -               "wLength(%04X) ", cmd->bRequestType, cmd->bRequest,
 +      pr_debug("bRequestType(%02X) bRequest(%02X) wValue(%04X) wIndex(%04X) wLength(%04X) ",
 +               cmd->bRequestType, cmd->bRequest,
                 cmd->wValue, cmd->wIndex, cmd->wLength);
        pr_debug("\n       ");
  
@@@ -290,7 -307,8 +290,7 @@@ void usbip_dump_header(struct usbip_hea
  
        switch (pdu->base.command) {
        case USBIP_CMD_SUBMIT:
 -              pr_debug("USBIP_CMD_SUBMIT: "
 -                       "x_flags %u x_len %u sf %u #p %d iv %d\n",
 +              pr_debug("USBIP_CMD_SUBMIT: x_flags %u x_len %u sf %u #p %d iv %d\n",
                         pdu->u.cmd_submit.transfer_flags,
                         pdu->u.cmd_submit.transfer_buffer_length,
                         pdu->u.cmd_submit.start_frame,
@@@ -382,31 -400,6 +382,6 @@@ err
  }
  EXPORT_SYMBOL_GPL(usbip_recv);
  
- struct socket *sockfd_to_socket(unsigned int sockfd)
- {
-       struct socket *socket;
-       struct file *file;
-       struct inode *inode;
-       file = fget(sockfd);
-       if (!file) {
-               pr_err("invalid sockfd\n");
-               return NULL;
-       }
-       inode = file_inode(file);
-       if (!inode || !S_ISSOCK(inode->i_mode)) {
-               fput(file);
-               return NULL;
-       }
-       socket = SOCKET_I(inode);
-       return socket;
- }
- EXPORT_SYMBOL_GPL(sockfd_to_socket);
  /* there may be more cases to tweak the flags. */
  static unsigned int tweak_transfer_flags(unsigned int flags)
  {
@@@ -687,7 -680,8 +662,7 @@@ int usbip_recv_iso(struct usbip_device 
  
        if (total_length != urb->actual_length) {
                dev_err(&urb->dev->dev,
 -                      "total length of iso packets %d not equal to actual "
 -                      "length of buffer %d\n",
 +                      "total length of iso packets %d not equal to actual length of buffer %d\n",
                        total_length, urb->actual_length);
  
                if (ud->side == USBIP_STUB)
index 732fb636a1e5b26c19b816448f7faa08391f5a35,9f86588a4534b4464867c727d93eab3714891ea1..f555d834f134a8a72e52751de62d3305bde27ecc
@@@ -29,7 -29,6 +29,7 @@@
  #include <linux/types.h>
  #include <linux/usb.h>
  #include <linux/wait.h>
 +#include "uapi/usbip.h"
  
  #define USBIP_VERSION "1.0.0"
  
@@@ -236,6 -235,22 +236,6 @@@ enum usbip_side 
        USBIP_STUB,
  };
  
 -enum usbip_status {
 -      /* sdev is available. */
 -      SDEV_ST_AVAILABLE = 0x01,
 -      /* sdev is now used. */
 -      SDEV_ST_USED,
 -      /* sdev is unusable because of a fatal error. */
 -      SDEV_ST_ERROR,
 -
 -      /* vdev does not connect a remote device. */
 -      VDEV_ST_NULL,
 -      /* vdev is used, but the USB address is not assigned yet */
 -      VDEV_ST_NOTASSIGNED,
 -      VDEV_ST_USED,
 -      VDEV_ST_ERROR
 -};
 -
  /* event handler */
  #define USBIP_EH_SHUTDOWN     (1 << 0)
  #define USBIP_EH_BYE          (1 << 1)
  /* a common structure for stub_device and vhci_device */
  struct usbip_device {
        enum usbip_side side;
 -      enum usbip_status status;
 +      enum usbip_device_status status;
  
        /* lock for status */
        spinlock_t lock;
@@@ -299,7 -314,6 +299,6 @@@ void usbip_dump_urb(struct urb *purb)
  void usbip_dump_header(struct usbip_header *pdu);
  
  int usbip_recv(struct socket *sock, void *buf, int size);
- struct socket *sockfd_to_socket(unsigned int sockfd);
  
  void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd,
                    int pack);
index 1e84577230ef4120f4847b6ca67af1e028a102cb,99dd2b1656c95b741d9dd7e8d23d3b4981280b5c..70e17551943dc45bb49edfd4f3ba9e38a87df8fe
@@@ -205,6 -205,8 +205,6 @@@ static int vhci_hub_status(struct usb_h
                }
        }
  
 -      pr_info("changed %d\n", changed);
 -
        if ((hcd->state == HC_STATE_SUSPENDED) && (changed == 1))
                usb_hcd_resume_root_hub(hcd);
  
@@@ -271,14 -273,14 +271,14 @@@ static int vhci_hub_control(struct usb_
                        }
                        break;
                case USB_PORT_FEAT_POWER:
 -                      usbip_dbg_vhci_rh(" ClearPortFeature: "
 -                                        "USB_PORT_FEAT_POWER\n");
 +                      usbip_dbg_vhci_rh(
 +                              " ClearPortFeature: USB_PORT_FEAT_POWER\n");
                        dum->port_status[rhport] = 0;
                        dum->resuming = 0;
                        break;
                case USB_PORT_FEAT_C_RESET:
 -                      usbip_dbg_vhci_rh(" ClearPortFeature: "
 -                                        "USB_PORT_FEAT_C_RESET\n");
 +                      usbip_dbg_vhci_rh(
 +                              " ClearPortFeature: USB_PORT_FEAT_C_RESET\n");
                        switch (dum->vdev[rhport].speed) {
                        case USB_SPEED_HIGH:
                                dum->port_status[rhport] |=
  
                        if (dum->vdev[rhport].ud.status ==
                            VDEV_ST_NOTASSIGNED) {
 -                              usbip_dbg_vhci_rh(" enable rhport %d "
 -                                                "(status %u)\n",
 -                                                rhport,
 -                                                dum->vdev[rhport].ud.status);
 +                              usbip_dbg_vhci_rh(
 +                                      " enable rhport %d (status %u)\n",
 +                                      rhport,
 +                                      dum->vdev[rhport].ud.status);
                                dum->port_status[rhport] |=
                                        USB_PORT_STAT_ENABLE;
                        }
                }
                ((__le16 *) buf)[0] = cpu_to_le16(dum->port_status[rhport]);
 -              ((__le16 *) buf)[1] = cpu_to_le16(dum->port_status[rhport] >> 16);
 +              ((__le16 *) buf)[1] =
 +                      cpu_to_le16(dum->port_status[rhport] >> 16);
  
                usbip_dbg_vhci_rh(" GetPortStatus bye %x %x\n", ((u16 *)buf)[0],
                                  ((u16 *)buf)[1]);
        case SetPortFeature:
                switch (wValue) {
                case USB_PORT_FEAT_SUSPEND:
 -                      usbip_dbg_vhci_rh(" SetPortFeature: "
 -                                        "USB_PORT_FEAT_SUSPEND\n");
 +                      usbip_dbg_vhci_rh(
 +                              " SetPortFeature: USB_PORT_FEAT_SUSPEND\n");
                        break;
                case USB_PORT_FEAT_RESET:
 -                      usbip_dbg_vhci_rh(" SetPortFeature: "
 -                                        "USB_PORT_FEAT_RESET\n");
 +                      usbip_dbg_vhci_rh(
 +                              " SetPortFeature: USB_PORT_FEAT_RESET\n");
                        /* if it's already running, disconnect first */
                        if (dum->port_status[rhport] & USB_PORT_STAT_ENABLE) {
                                dum->port_status[rhport] &=
@@@ -536,8 -537,9 +536,8 @@@ static int vhci_urb_enqueue(struct usb_
  
                case USB_REQ_GET_DESCRIPTOR:
                        if (ctrlreq->wValue == cpu_to_le16(USB_DT_DEVICE << 8))
 -                              usbip_dbg_vhci_hc("Not yet?: "
 -                                                "Get_Descriptor to device 0 "
 -                                                "(get max pipe size)\n");
 +                              usbip_dbg_vhci_hc(
 +                                      "Not yet?:Get_Descriptor to device 0 (get max pipe size)\n");
  
                        if (vdev->udev)
                                usb_put_dev(vdev->udev);
  
                default:
                        /* NOT REACHED */
 -                      dev_err(dev, "invalid request to devnum 0 bRequest %u, "
 -                              "wValue %u\n", ctrlreq->bRequest,
 +                      dev_err(dev,
 +                              "invalid request to devnum 0 bRequest %u, wValue %u\n",
 +                              ctrlreq->bRequest,
                                ctrlreq->wValue);
                        ret =  -EINVAL;
                        goto no_need_xmit;
@@@ -788,7 -789,7 +788,7 @@@ static void vhci_shutdown_connection(st
  
        /* active connection is closed */
        if (vdev->ud.tcp_socket) {
-               fput(vdev->ud.tcp_socket->file);
+               sockfd_put(vdev->ud.tcp_socket);
                vdev->ud.tcp_socket = NULL;
        }
        pr_info("release socket\n");
@@@ -835,7 -836,7 +835,7 @@@ static void vhci_device_reset(struct us
        vdev->udev = NULL;
  
        if (ud->tcp_socket) {
-               fput(ud->tcp_socket->file);
+               sockfd_put(ud->tcp_socket);
                ud->tcp_socket = NULL;
        }
        ud->status = VDEV_ST_NULL;
@@@ -1069,9 -1070,8 +1069,9 @@@ static int vhci_hcd_suspend(struct plat
        spin_unlock(&the_controller->lock);
  
        if (connected > 0) {
 -              dev_info(&pdev->dev, "We have %d active connection%s. Do not "
 -                       "suspend.\n", connected, (connected == 1 ? "" : "s"));
 +              dev_info(&pdev->dev,
 +                       "We have %d active connection%s. Do not suspend.\n",
 +                       connected, (connected == 1 ? "" : "s"));
                ret =  -EBUSY;
        } else {
                dev_info(&pdev->dev, "suspend vhci_hcd");
index e0980324fb0366e06a39b84ca26c109b36381e35,baba127081b3c65588213aa8acf6333a5d03221d..47bddcdde0a621330a063d297b132f8da8396b72
@@@ -47,8 -47,8 +47,8 @@@ static ssize_t status_show(struct devic
         * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
         * port number and its peer IP address.
         */
 -      out += sprintf(out, "prt sta spd bus dev socket           "
 -                     "local_busid\n");
 +      out += sprintf(out,
 +                     "prt sta spd bus dev socket           local_busid\n");
  
        for (i = 0; i < VHCI_NPORTS; i++) {
                struct vhci_device *vdev = port_to_vdev(i);
@@@ -114,8 -114,7 +114,8 @@@ static ssize_t store_detach(struct devi
        int err;
        __u32 rhport = 0;
  
 -      sscanf(buf, "%u", &rhport);
 +      if (sscanf(buf, "%u", &rhport) != 1)
 +              return -EINVAL;
  
        /* check rhport */
        if (rhport >= VHCI_NPORTS) {
@@@ -176,6 -175,7 +176,7 @@@ static ssize_t store_attach(struct devi
        struct socket *socket;
        int sockfd = 0;
        __u32 rhport = 0, devid = 0, speed = 0;
+       int err;
  
        /*
         * @rhport: port number of vhci_hcd
         * @devid: unique device identifier in a remote host
         * @speed: usb device speed in a remote host
         */
 -      sscanf(buf, "%u %u %u %u", &rhport, &sockfd, &devid, &speed);
 +      if (sscanf(buf, "%u %u %u %u", &rhport, &sockfd, &devid, &speed) != 1)
 +              return -EINVAL;
  
        usbip_dbg_vhci_sysfs("rhport(%u) sockfd(%u) devid(%u) speed(%u)\n",
                             rhport, sockfd, devid, speed);
                return -EINVAL;
  
        /* Extract socket from fd. */
-       /* The correct way to clean this up is to fput(socket->file). */
-       socket = sockfd_to_socket(sockfd);
+       socket = sockfd_lookup(sockfd, &err);
        if (!socket)
                return -EINVAL;
  
                spin_unlock(&vdev->ud.lock);
                spin_unlock(&the_controller->lock);
  
-               fput(socket->file);
+               sockfd_put(socket);
  
                dev_err(dev, "port %d already used\n", rhport);
                return -EINVAL;
        }
  
 -      dev_info(dev, "rhport(%u) sockfd(%d) devid(%u) speed(%u)\n",
 -               rhport, sockfd, devid, speed);
 +      dev_info(dev,
 +               "rhport(%u) sockfd(%d) devid(%u) speed(%u) speed_str(%s)\n",
 +               rhport, sockfd, devid, speed, usb_speed_string(speed));
  
        vdev->devid         = devid;
        vdev->speed         = speed;
diff --combined fs/bio.c
index b1bc722b89aa6b99a6e2c8dcf64aa4dbbd0d82ff,7065837ae9f36425b095fd41af777dc35ef632b4..6f0362b77806c61909aa37433a9e77eb77476cff
+++ b/fs/bio.c
@@@ -116,6 -116,7 +116,6 @@@ static struct kmem_cache *bio_find_or_c
        if (!slab)
                goto out_unlock;
  
 -      printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
        bslab->slab = slab;
        bslab->slab_ref = 1;
        bslab->slab_size = sz;
@@@ -1002,7 -1003,7 +1002,7 @@@ struct bio_map_data 
  };
  
  static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
-                            struct sg_iovec *iov, int iov_count,
+                            const struct sg_iovec *iov, int iov_count,
                             int is_our_pages)
  {
        memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
@@@ -1022,7 -1023,7 +1022,7 @@@ static struct bio_map_data *bio_alloc_m
                       sizeof(struct sg_iovec) * iov_count, gfp_mask);
  }
  
- static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
+ static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
                          int to_user, int from_user, int do_free_page)
  {
        int ret = 0, i;
@@@ -1120,7 -1121,7 +1120,7 @@@ EXPORT_SYMBOL(bio_uncopy_user)
   */
  struct bio *bio_copy_user_iov(struct request_queue *q,
                              struct rq_map_data *map_data,
-                             struct sg_iovec *iov, int iov_count,
+                             const struct sg_iovec *iov, int iov_count,
                              int write_to_vm, gfp_t gfp_mask)
  {
        struct bio_map_data *bmd;
@@@ -1259,7 -1260,7 +1259,7 @@@ EXPORT_SYMBOL(bio_copy_user)
  
  static struct bio *__bio_map_user_iov(struct request_queue *q,
                                      struct block_device *bdev,
-                                     struct sg_iovec *iov, int iov_count,
+                                     const struct sg_iovec *iov, int iov_count,
                                      int write_to_vm, gfp_t gfp_mask)
  {
        int i, j;
@@@ -1407,7 -1408,7 +1407,7 @@@ EXPORT_SYMBOL(bio_map_user)
   *    device. Returns an error pointer in case of error.
   */
  struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
-                            struct sg_iovec *iov, int iov_count,
+                            const struct sg_iovec *iov, int iov_count,
                             int write_to_vm, gfp_t gfp_mask)
  {
        struct bio *bio;
@@@ -1969,7 -1970,7 +1969,7 @@@ int bio_associate_current(struct bio *b
  
        /* associate blkcg if exists */
        rcu_read_lock();
 -      css = task_css(current, blkio_subsys_id);
 +      css = task_css(current, blkio_cgrp_id);
        if (css && css_tryget(css))
                bio->bi_css = css;
        rcu_read_unlock();
diff --combined fs/block_dev.c
index ba0d2b05bb787a28e59629a05586441e8b9386af,764bd3b8d2fa06d6130e45a4c0ffef2af70d61d8..552a8d13bc321f4d1cf64fb9b3171893e28e73e9
@@@ -83,7 -83,7 +83,7 @@@ void kill_bdev(struct block_device *bde
  {
        struct address_space *mapping = bdev->bd_inode->i_mapping;
  
 -      if (mapping->nrpages == 0)
 +      if (mapping->nrpages == 0 && mapping->nrshadows == 0)
                return;
  
        invalidate_bh_lrus();
@@@ -419,7 -419,7 +419,7 @@@ static void bdev_evict_inode(struct ino
  {
        struct block_device *bdev = &BDEV_I(inode)->bdev;
        struct list_head *p;
 -      truncate_inode_pages(&inode->i_data, 0);
 +      truncate_inode_pages_final(&inode->i_data);
        invalidate_inode_buffers(inode); /* is it needed here? */
        clear_inode(inode);
        spin_lock(&bdev_lock);
@@@ -1518,12 -1518,12 +1518,12 @@@ ssize_t blkdev_aio_write(struct kiocb *
        BUG_ON(iocb->ki_pos != pos);
  
        blk_start_plug(&plug);
-       ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+       ret = __generic_file_aio_write(iocb, iov, nr_segs);
        if (ret > 0) {
                ssize_t err;
  
                err = generic_write_sync(file, pos, ret);
 -              if (err < 0 && ret > 0)
 +              if (err < 0)
                        ret = err;
        }
        blk_finish_plug(&plug);
diff --combined fs/btrfs/file.c
index c5998477fe60ef3f53dafd8e741e1f750be6daee,8ed4b165abbd2b3b4bb996ee4273c14fe0bce831..eb742c07e7a41aacdb595b0252a12b3584bbee83
@@@ -425,13 -425,8 +425,8 @@@ static noinline int btrfs_copy_from_use
                struct page *page = prepared_pages[pg];
                /*
                 * Copy data from userspace to the current page
-                *
-                * Disable pagefault to avoid recursive lock since
-                * the pages are already locked
                 */
-               pagefault_disable();
                copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
-               pagefault_enable();
  
                /* Flush processor's dcache for this page */
                flush_dcache_page(page);
@@@ -591,6 -586,7 +586,6 @@@ void btrfs_drop_extent_cache(struct ino
                clear_bit(EXTENT_FLAG_PINNED, &em->flags);
                clear_bit(EXTENT_FLAG_LOGGING, &flags);
                modified = !list_empty(&em->list);
 -              remove_extent_mapping(em_tree, em);
                if (no_splits)
                        goto next;
  
                        split->bdev = em->bdev;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
 -                      ret = add_extent_mapping(em_tree, split, modified);
 -                      BUG_ON(ret); /* Logic error */
 +                      replace_extent_mapping(em_tree, em, split, modified);
                        free_extent_map(split);
                        split = split2;
                        split2 = NULL;
                                split->orig_block_len = 0;
                        }
  
 -                      ret = add_extent_mapping(em_tree, split, modified);
 -                      BUG_ON(ret); /* Logic error */
 +                      if (extent_map_in_tree(em)) {
 +                              replace_extent_mapping(em_tree, em, split,
 +                                                     modified);
 +                      } else {
 +                              ret = add_extent_mapping(em_tree, split,
 +                                                       modified);
 +                              ASSERT(ret == 0); /* Logic error */
 +                      }
                        free_extent_map(split);
                        split = NULL;
                }
  next:
 +              if (extent_map_in_tree(em))
 +                      remove_extent_mapping(em_tree, em);
                write_unlock(&em_tree->lock);
  
                /* once for us */
@@@ -726,7 -715,7 +721,7 @@@ int __btrfs_drop_extents(struct btrfs_t
        if (drop_cache)
                btrfs_drop_extent_cache(inode, start, end - 1, 0);
  
 -      if (start >= BTRFS_I(inode)->disk_i_size)
 +      if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
                modify_tree = 0;
  
        while (1) {
@@@ -804,10 -793,7 +799,10 @@@ next_slot
                 */
                if (start > key.offset && end < extent_end) {
                        BUG_ON(del_nr > 0);
 -                      BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
 +                      if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 +                              ret = -EINVAL;
 +                              break;
 +                      }
  
                        memcpy(&new_key, &key, sizeof(new_key));
                        new_key.offset = start;
                 *      | -------- extent -------- |
                 */
                if (start <= key.offset && end < extent_end) {
 -                      BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
 +                      if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 +                              ret = -EINVAL;
 +                              break;
 +                      }
  
                        memcpy(&new_key, &key, sizeof(new_key));
                        new_key.offset = end;
                 */
                if (start > key.offset && end >= extent_end) {
                        BUG_ON(del_nr > 0);
 -                      BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
 +                      if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 +                              ret = -EINVAL;
 +                              break;
 +                      }
  
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        start - key.offset);
                 * Set path->slots[0] to first slot, so that after the delete
                 * if items are move off from our leaf to its immediate left or
                 * right neighbor leafs, we end up with a correct and adjusted
 -               * path->slots[0] for our insertion.
 +               * path->slots[0] for our insertion (if replace_extent != 0).
                 */
                path->slots[0] = del_slot;
                ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
                if (ret)
                        btrfs_abort_transaction(trans, root, ret);
 +      }
  
 -              leaf = path->nodes[0];
 -              /*
 -               * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that
 -               * is, its contents got pushed to its neighbors), in which case
 -               * it means path->locks[0] == 0
 -               */
 -              if (!ret && replace_extent && leafs_visited == 1 &&
 -                  path->locks[0] &&
 -                  btrfs_leaf_free_space(root, leaf) >=
 -                  sizeof(struct btrfs_item) + extent_item_size) {
 -
 -                      key.objectid = ino;
 -                      key.type = BTRFS_EXTENT_DATA_KEY;
 -                      key.offset = start;
 -                      setup_items_for_insert(root, path, &key,
 -                                             &extent_item_size,
 -                                             extent_item_size,
 -                                             sizeof(struct btrfs_item) +
 -                                             extent_item_size, 1);
 -                      *key_inserted = 1;
 +      leaf = path->nodes[0];
 +      /*
 +       * If btrfs_del_items() was called, it might have deleted a leaf, in
 +       * which case it unlocked our path, so check path->locks[0] matches a
 +       * write lock.
 +       */
 +      if (!ret && replace_extent && leafs_visited == 1 &&
 +          (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING ||
 +           path->locks[0] == BTRFS_WRITE_LOCK) &&
 +          btrfs_leaf_free_space(root, leaf) >=
 +          sizeof(struct btrfs_item) + extent_item_size) {
 +
 +              key.objectid = ino;
 +              key.type = BTRFS_EXTENT_DATA_KEY;
 +              key.offset = start;
 +              if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) {
 +                      struct btrfs_key slot_key;
 +
 +                      btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]);
 +                      if (btrfs_comp_cpu_keys(&key, &slot_key) > 0)
 +                              path->slots[0]++;
                }
 +              setup_items_for_insert(root, path, &key,
 +                                     &extent_item_size,
 +                                     extent_item_size,
 +                                     sizeof(struct btrfs_item) +
 +                                     extent_item_size, 1);
 +              *key_inserted = 1;
        }
  
        if (!replace_extent || !(*key_inserted))
@@@ -1369,11 -1341,11 +1364,11 @@@ lock_and_cleanup_extent_if_need(struct 
                struct btrfs_ordered_extent *ordered;
                lock_extent_bits(&BTRFS_I(inode)->io_tree,
                                 start_pos, last_pos, 0, cached_state);
 -              ordered = btrfs_lookup_first_ordered_extent(inode, last_pos);
 +              ordered = btrfs_lookup_ordered_range(inode, start_pos,
 +                                                   last_pos - start_pos + 1);
                if (ordered &&
                    ordered->file_offset + ordered->len > start_pos &&
                    ordered->file_offset <= last_pos) {
 -                      btrfs_put_ordered_extent(ordered);
                        unlock_extent_cached(&BTRFS_I(inode)->io_tree,
                                             start_pos, last_pos,
                                             cached_state, GFP_NOFS);
                                unlock_page(pages[i]);
                                page_cache_release(pages[i]);
                        }
 -                      ret = btrfs_wait_ordered_range(inode, start_pos,
 -                                              last_pos - start_pos + 1);
 -                      if (ret)
 -                              return ret;
 -                      else
 -                              return -EAGAIN;
 +                      btrfs_start_ordered_extent(inode, ordered, 1);
 +                      btrfs_put_ordered_extent(ordered);
 +                      return -EAGAIN;
                }
                if (ordered)
                        btrfs_put_ordered_extent(ordered);
@@@ -1416,12 -1391,8 +1411,12 @@@ static noinline int check_can_nocow(str
        u64 num_bytes;
        int ret;
  
 +      ret = btrfs_start_nocow_write(root);
 +      if (!ret)
 +              return -ENOSPC;
 +
        lockstart = round_down(pos, root->sectorsize);
 -      lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1;
 +      lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
  
        while (1) {
                lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
        ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
        if (ret <= 0) {
                ret = 0;
 +              btrfs_end_nocow_write(root);
        } else {
 -              clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
 -                               EXTENT_DIRTY | EXTENT_DELALLOC |
 -                               EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
 -                               NULL, GFP_NOFS);
 -              *write_bytes = min_t(size_t, *write_bytes, num_bytes);
 +              *write_bytes = min_t(size_t, *write_bytes ,
 +                                   num_bytes - pos + lockstart);
        }
  
        unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
@@@ -1532,8 -1505,6 +1527,8 @@@ static noinline ssize_t __btrfs_buffere
                        if (!only_release_metadata)
                                btrfs_free_reserved_data_space(inode,
                                                               reserve_bytes);
 +                      else
 +                              btrfs_end_nocow_write(root);
                        break;
                }
  
@@@ -1622,9 -1593,6 +1617,9 @@@ again
                }
  
                release_bytes = 0;
 +              if (only_release_metadata)
 +                      btrfs_end_nocow_write(root);
 +
                if (only_release_metadata && copied > 0) {
                        u64 lockstart = round_down(pos, root->sectorsize);
                        u64 lockend = lockstart +
        kfree(pages);
  
        if (release_bytes) {
 -              if (only_release_metadata)
 +              if (only_release_metadata) {
 +                      btrfs_end_nocow_write(root);
                        btrfs_delalloc_release_metadata(inode, release_bytes);
 -              else
 +              } else {
                        btrfs_delalloc_release_space(inode, release_bytes);
 +              }
        }
  
        return num_written ? num_written : ret;
  static ssize_t __btrfs_direct_write(struct kiocb *iocb,
                                    const struct iovec *iov,
                                    unsigned long nr_segs, loff_t pos,
-                                   loff_t *ppos, size_t count, size_t ocount)
+                                   size_t count, size_t ocount)
  {
        struct file *file = iocb->ki_filp;
        struct iov_iter i;
        loff_t endbyte;
        int err;
  
-       written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
+       written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
                                            count, ocount);
  
        if (written < 0 || written == count)
        if (err)
                goto out;
        written += written_buffered;
-       *ppos = pos + written_buffered;
+       iocb->ki_pos = pos + written_buffered;
        invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
                                 endbyte >> PAGE_CACHE_SHIFT);
  out:
@@@ -1725,9 -1691,7 +1720,8 @@@ static ssize_t btrfs_file_aio_write(str
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       loff_t *ppos = &iocb->ki_pos;
        u64 start_pos;
 +      u64 end_pos;
        ssize_t num_written = 0;
        ssize_t err = 0;
        size_t count, ocount;
  
        start_pos = round_down(pos, root->sectorsize);
        if (start_pos > i_size_read(inode)) {
 -              err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
 +              /* Expand hole size to cover write data, preventing empty gap */
 +              end_pos = round_up(pos + iov->iov_len, root->sectorsize);
 +              err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
                if (err) {
                        mutex_unlock(&inode->i_mutex);
                        goto out;
  
        if (unlikely(file->f_flags & O_DIRECT)) {
                num_written = __btrfs_direct_write(iocb, iov, nr_segs,
-                                                  pos, ppos, count, ocount);
+                                                  pos, count, ocount);
        } else {
                struct iov_iter i;
  
  
                num_written = __btrfs_buffered_write(file, &i, pos);
                if (num_written > 0)
-                       *ppos = pos + num_written;
+                       iocb->ki_pos = pos + num_written;
        }
  
        mutex_unlock(&inode->i_mutex);
        BTRFS_I(inode)->last_sub_trans = root->log_transid;
        if (num_written > 0) {
                err = generic_write_sync(file, pos, num_written);
 -              if (err < 0 && num_written > 0)
 +              if (err < 0)
                        num_written = err;
        }
  
@@@ -1888,9 -1850,8 +1882,9 @@@ int btrfs_sync_file(struct file *file, 
        struct dentry *dentry = file->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
 -      int ret = 0;
        struct btrfs_trans_handle *trans;
 +      struct btrfs_log_ctx ctx;
 +      int ret = 0;
        bool full_sync = 0;
  
        trace_btrfs_sync_file(file, datasync);
        }
        trans->sync = true;
  
 -      ret = btrfs_log_dentry_safe(trans, root, dentry);
 +      btrfs_init_log_ctx(&ctx);
 +
 +      ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx);
        if (ret < 0) {
                /* Fallthrough and commit/free transaction. */
                ret = 1;
  
        if (ret != BTRFS_NO_LOG_SYNC) {
                if (!ret) {
 -                      ret = btrfs_sync_log(trans, root);
 +                      ret = btrfs_sync_log(trans, root, &ctx);
                        if (!ret) {
                                ret = btrfs_end_transaction(trans, root);
                                goto out;
@@@ -2028,7 -1987,6 +2022,7 @@@ out
  
  static const struct vm_operations_struct btrfs_file_vm_ops = {
        .fault          = filemap_fault,
 +      .map_pages      = filemap_map_pages,
        .page_mkwrite   = btrfs_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
  };
@@@ -2193,7 -2151,6 +2187,7 @@@ static int btrfs_punch_hole(struct inod
        bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
                          ((offset + len - 1) >> PAGE_CACHE_SHIFT));
        bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
 +      u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
  
        ret = btrfs_wait_ordered_range(inode, offset, len);
        if (ret)
         * entire page.
         */
        if (same_page && len < PAGE_CACHE_SIZE) {
 -              if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE))
 +              if (offset < ino_size)
                        ret = btrfs_truncate_page(inode, offset, len, 0);
                mutex_unlock(&inode->i_mutex);
                return ret;
        }
  
        /* zero back part of the first page */
 -      if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
 +      if (offset < ino_size) {
                ret = btrfs_truncate_page(inode, offset, 0, 0);
                if (ret) {
                        mutex_unlock(&inode->i_mutex);
        }
  
        /* zero the front end of the last page */
 -      if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) {
 +      if (offset + len < ino_size) {
                ret = btrfs_truncate_page(inode, offset + len, 0, 1);
                if (ret) {
                        mutex_unlock(&inode->i_mutex);
  
                trans->block_rsv = &root->fs_info->trans_block_rsv;
  
 -              ret = fill_holes(trans, inode, path, cur_offset, drop_end);
 -              if (ret) {
 -                      err = ret;
 -                      break;
 +              if (cur_offset < ino_size) {
 +                      ret = fill_holes(trans, inode, path, cur_offset,
 +                                       drop_end);
 +                      if (ret) {
 +                              err = ret;
 +                              break;
 +                      }
                }
  
                cur_offset = drop_end;
        }
  
        trans->block_rsv = &root->fs_info->trans_block_rsv;
 -      ret = fill_holes(trans, inode, path, cur_offset, drop_end);
 -      if (ret) {
 -              err = ret;
 -              goto out_trans;
 +      if (cur_offset < ino_size) {
 +              ret = fill_holes(trans, inode, path, cur_offset, drop_end);
 +              if (ret) {
 +                      err = ret;
 +                      goto out_trans;
 +              }
        }
  
  out_trans:
diff --combined fs/buffer.c
index 8c53a2b15ecbaffcc19ab5f45b53c19174998c91,027ae3bdfbbd9a894a2b6abfb985e47510d938e5..9ddb9fc7d923fa31299a8aba228f61973d3b429f
@@@ -2114,8 -2114,8 +2114,8 @@@ EXPORT_SYMBOL(generic_write_end)
   * Returns true if all buffers which correspond to a file portion
   * we want to read are uptodate.
   */
- int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
-                                       unsigned long from)
+ int block_is_partially_uptodate(struct page *page, unsigned long from,
+                                       unsigned long count)
  {
        unsigned block_start, block_end, blocksize;
        unsigned to;
  
        head = page_buffers(page);
        blocksize = head->b_size;
-       to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+       to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
        to = from + to;
        if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
                return 0;
@@@ -3088,7 -3088,7 +3088,7 @@@ EXPORT_SYMBOL(submit_bh)
   * until the buffer gets unlocked).
   *
   * ll_rw_block sets b_end_io to simple completion handler that marks
 - * the buffer up-to-date (if approriate), unlocks the buffer and wakes
 + * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
   * any waiters. 
   *
   * All of the buffers must be for the same device, and must also be a
diff --combined fs/cachefiles/namei.c
index 6494d9f673aa51490a59694d600bd8a7101559bf,1b1283bff8debf95bcaaac1a27331121ee3019f9..c0a681705104fc7a8aae428169e3f342bb4cb8a4
@@@ -391,12 -391,12 +391,12 @@@ try_again
        path.dentry = dir;
        path_to_graveyard.mnt = cache->mnt;
        path_to_graveyard.dentry = cache->graveyard;
 -      ret = security_path_rename(&path, rep, &path_to_graveyard, grave);
 +      ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0);
        if (ret < 0) {
                cachefiles_io_error(cache, "Rename security error %d", ret);
        } else {
                ret = vfs_rename(dir->d_inode, rep,
 -                               cache->graveyard->d_inode, grave, NULL);
 +                               cache->graveyard->d_inode, grave, NULL, 0);
                if (ret != 0 && ret != -ENOMEM)
                        cachefiles_io_error(cache,
                                            "Rename failed with error %d", ret);
@@@ -779,8 -779,7 +779,7 @@@ struct dentry *cachefiles_get_directory
        }
  
        ret = -EPERM;
-       if (!subdir->d_inode->i_op ||
-           !subdir->d_inode->i_op->setxattr ||
+       if (!subdir->d_inode->i_op->setxattr ||
            !subdir->d_inode->i_op->getxattr ||
            !subdir->d_inode->i_op->lookup ||
            !subdir->d_inode->i_op->mkdir ||
diff --combined fs/ceph/file.c
index 66075a4ad97900edbfaf98775d484c31c7496200,359805b671b92c2732150e9eb39c69b1e5c356e1..39da1c2efa5030216d18bc6bb3020a78afb4c5f6
@@@ -210,7 -210,7 +210,7 @@@ int ceph_open(struct inode *inode, stru
        ihold(inode);
  
        req->r_num_caps = 1;
 -      if (flags & (O_CREAT|O_TRUNC))
 +      if (flags & O_CREAT)
                parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
        err = ceph_mdsc_do_request(mdsc, parent_inode, req);
        iput(parent_inode);
@@@ -291,9 -291,8 +291,9 @@@ int ceph_atomic_open(struct inode *dir
                }
                err = finish_open(file, dentry, ceph_open, opened);
        }
 -
  out_err:
 +      if (!req->r_err && req->r_target_inode)
 +              ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
        ceph_mdsc_put_request(req);
        dout("atomic_open result=%d\n", err);
        return err;
@@@ -601,7 -600,7 +601,7 @@@ ceph_sync_direct_write(struct kiocb *io
                                            false);
                if (IS_ERR(req)) {
                        ret = PTR_ERR(req);
-                       goto out;
+                       break;
                }
  
                num_pages = calc_pages_for(page_align, len);
@@@ -719,7 -718,7 +719,7 @@@ static ssize_t ceph_sync_write(struct k
                                            false);
                if (IS_ERR(req)) {
                        ret = PTR_ERR(req);
-                       goto out;
+                       break;
                }
  
                /*
@@@ -971,7 -970,7 +971,8 @@@ retry_snap
                        goto retry_snap;
                }
        } else {
 +              loff_t old_size = inode->i_size;
+               struct iov_iter from;
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
                 * are pending vmtruncate. So write and vmtruncate
                 * can not run at the same time
                 */
-               written = generic_file_buffered_write(iocb, iov, nr_segs,
-                                                     pos, &iocb->ki_pos,
-                                                     count, 0);
+               iov_iter_init(&from, iov, nr_segs, count, 0);
+               written = generic_perform_write(file, &from, pos);
+               if (likely(written >= 0))
+                       iocb->ki_pos = pos + written;
 +              if (inode->i_size > old_size)
 +                      ceph_fscache_update_objectsize(inode);
                mutex_unlock(&inode->i_mutex);
        }
  
diff --combined fs/cifs/cifsfs.c
index 2c70cbe35d39c3b7df79dd0b7e2b3fde6401f8fe,f31f9d6913b2b0ca42d7a18d5e99a9dd6dd1e333..df9c9141c0998383522b181664b9d57256bc99fa
@@@ -286,7 -286,7 +286,7 @@@ cifs_destroy_inode(struct inode *inode
  static void
  cifs_evict_inode(struct inode *inode)
  {
 -      truncate_inode_pages(&inode->i_data, 0);
 +      truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
        cifs_fscache_release_inode_cookie(inode);
  }
@@@ -541,7 -541,6 +541,7 @@@ static int cifs_show_stats(struct seq_f
  
  static int cifs_remount(struct super_block *sb, int *flags, char *data)
  {
 +      sync_filesystem(sb);
        *flags |= MS_NODIRATIME;
        return 0;
  }
@@@ -850,7 -849,6 +850,6 @@@ const struct inode_operations cifs_file
  /*    revalidate:cifs_revalidate, */
        .setattr = cifs_setattr,
        .getattr = cifs_getattr, /* do we need this anymore? */
-       .rename = cifs_rename,
        .permission = cifs_permission,
  #ifdef CONFIG_CIFS_XATTR
        .setxattr = cifs_setxattr,
@@@ -1006,7 -1004,7 +1005,7 @@@ cifs_init_once(void *inode
        init_rwsem(&cifsi->lock_sem);
  }
  
 -static int
 +static int __init
  cifs_init_inodecache(void)
  {
        cifs_inode_cachep = kmem_cache_create("cifs_inode_cache",
diff --combined fs/cifs/file.c
index 216d7e99f9219317bd0f2567c898925df77ec68d,5bac2763c450514c0fcb887cce0c5fe36bce1fec..8807442c94dd3323cbb7f9f8283c4f1c1a2d1480
@@@ -2579,19 -2579,32 +2579,32 @@@ cifs_writev(struct kiocb *iocb, const s
        struct cifsInodeInfo *cinode = CIFS_I(inode);
        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
        ssize_t rc = -EACCES;
-       loff_t lock_pos = pos;
+       loff_t lock_pos = iocb->ki_pos;
  
-       if (file->f_flags & O_APPEND)
-               lock_pos = i_size_read(inode);
        /*
         * We need to hold the sem to be sure nobody modifies lock list
         * with a brlock that prevents writing.
         */
        down_read(&cinode->lock_sem);
+       mutex_lock(&inode->i_mutex);
+       if (file->f_flags & O_APPEND)
+               lock_pos = i_size_read(inode);
        if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs),
                                     server->vals->exclusive_lock_type, NULL,
-                                    CIFS_WRITE_OP))
-               rc = generic_file_aio_write(iocb, iov, nr_segs, pos);
+                                    CIFS_WRITE_OP)) {
+               rc = __generic_file_aio_write(iocb, iov, nr_segs);
+               mutex_unlock(&inode->i_mutex);
+               if (rc > 0) {
+                       ssize_t err;
+                       err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+                       if (rc < 0)
+                               rc = err;
+               }
+       } else {
+               mutex_unlock(&inode->i_mutex);
+       }
        up_read(&cinode->lock_sem);
        return rc;
  }
@@@ -2727,56 -2740,27 +2740,27 @@@ cifs_retry_async_readv(struct cifs_read
  /**
   * cifs_readdata_to_iov - copy data from pages in response to an iovec
   * @rdata:    the readdata response with list of pages holding data
-  * @iov:      vector in which we should copy the data
-  * @nr_segs:  number of segments in vector
-  * @offset:   offset into file of the first iovec
-  * @copied:   used to return the amount of data copied to the iov
+  * @iter:     destination for our data
   *
   * This function copies data from a list of pages in a readdata response into
   * an array of iovecs. It will first calculate where the data should go
   * based on the info in the readdata and then copy the data into that spot.
   */
- static ssize_t
- cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
-                       unsigned long nr_segs, loff_t offset, ssize_t *copied)
+ static int
+ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
  {
-       int rc = 0;
-       struct iov_iter ii;
-       size_t pos = rdata->offset - offset;
-       ssize_t remaining = rdata->bytes;
-       unsigned char *pdata;
+       size_t remaining = rdata->bytes;
        unsigned int i;
  
-       /* set up iov_iter and advance to the correct offset */
-       iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
-       iov_iter_advance(&ii, pos);
-       *copied = 0;
        for (i = 0; i < rdata->nr_pages; i++) {
-               ssize_t copy;
                struct page *page = rdata->pages[i];
-               /* copy a whole page or whatever's left */
-               copy = min_t(ssize_t, remaining, PAGE_SIZE);
-               /* ...but limit it to whatever space is left in the iov */
-               copy = min_t(ssize_t, copy, iov_iter_count(&ii));
-               /* go while there's data to be copied and no errors */
-               if (copy && !rc) {
-                       pdata = kmap(page);
-                       rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
-                                               (int)copy);
-                       kunmap(page);
-                       if (!rc) {
-                               *copied += copy;
-                               remaining -= copy;
-                               iov_iter_advance(&ii, copy);
-                       }
-               }
+               size_t copy = min(remaining, PAGE_SIZE);
+               size_t written = copy_page_to_iter(page, 0, copy, iter);
+               remaining -= written;
+               if (written < copy && iov_iter_count(iter) > 0)
+                       break;
        }
-       return rc;
+       return remaining ? -EFAULT : 0;
  }
  
  static void
@@@ -2837,20 -2821,21 +2821,21 @@@ cifs_uncached_read_into_pages(struct TC
        return total_read > 0 ? total_read : result;
  }
  
- static ssize_t
- cifs_iovec_read(struct file *file, const struct iovec *iov,
-                unsigned long nr_segs, loff_t *poffset)
+ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+                              unsigned long nr_segs, loff_t pos)
  {
+       struct file *file = iocb->ki_filp;
        ssize_t rc;
        size_t len, cur_len;
        ssize_t total_read = 0;
-       loff_t offset = *poffset;
+       loff_t offset = pos;
        unsigned int npages;
        struct cifs_sb_info *cifs_sb;
        struct cifs_tcon *tcon;
        struct cifsFileInfo *open_file;
        struct cifs_readdata *rdata, *tmp;
        struct list_head rdata_list;
+       struct iov_iter to;
        pid_t pid;
  
        if (!nr_segs)
        if (!len)
                return 0;
  
+       iov_iter_init(&to, iov, nr_segs, len, 0);
        INIT_LIST_HEAD(&rdata_list);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        open_file = file->private_data;
@@@ -2917,55 -2904,44 +2904,44 @@@ error
        if (!list_empty(&rdata_list))
                rc = 0;
  
+       len = iov_iter_count(&to);
        /* the loop below should proceed in the order of increasing offsets */
- restart_loop:
        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
+       again:
                if (!rc) {
-                       ssize_t copied;
                        /* FIXME: freezable sleep too? */
                        rc = wait_for_completion_killable(&rdata->done);
                        if (rc)
                                rc = -EINTR;
-                       else if (rdata->result)
+                       else if (rdata->result) {
                                rc = rdata->result;
-                       else {
-                               rc = cifs_readdata_to_iov(rdata, iov,
-                                                       nr_segs, *poffset,
-                                                       &copied);
-                               total_read += copied;
+                               /* resend call if it's a retryable error */
+                               if (rc == -EAGAIN) {
+                                       rc = cifs_retry_async_readv(rdata);
+                                       goto again;
+                               }
+                       } else {
+                               rc = cifs_readdata_to_iov(rdata, &to);
                        }
  
-                       /* resend call if it's a retryable error */
-                       if (rc == -EAGAIN) {
-                               rc = cifs_retry_async_readv(rdata);
-                               goto restart_loop;
-                       }
                }
                list_del_init(&rdata->list);
                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
        }
  
+       total_read = len - iov_iter_count(&to);
        cifs_stats_bytes_read(tcon, total_read);
-       *poffset += total_read;
  
        /* mask nodata case */
        if (rc == -ENODATA)
                rc = 0;
  
-       return total_read ? total_read : rc;
- }
- ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-                              unsigned long nr_segs, loff_t pos)
- {
-       ssize_t read;
-       read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
-       if (read > 0)
-               iocb->ki_pos = pos;
-       return read;
+       if (total_read) {
+               iocb->ki_pos = pos + total_read;
+               return total_read;
+       }
+       return rc;
  }
  
  ssize_t
@@@ -3113,7 -3089,6 +3089,7 @@@ cifs_page_mkwrite(struct vm_area_struc
  
  static struct vm_operations_struct cifs_file_vm_ops = {
        .fault = filemap_fault,
 +      .map_pages = filemap_map_pages,
        .page_mkwrite = cifs_page_mkwrite,
        .remap_pages = generic_file_remap_pages,
  };
diff --combined fs/exec.c
index 9e81c630dfa76469cdd452e179882403a93b03c8,4cc94534ed5b290f6462563dbb0030d78e05995b..476f3ebf437ef40ddd7432200080825b7e9e992c
+++ b/fs/exec.c
@@@ -26,7 -26,6 +26,7 @@@
  #include <linux/file.h>
  #include <linux/fdtable.h>
  #include <linux/mm.h>
 +#include <linux/vmacache.h>
  #include <linux/stat.h>
  #include <linux/fcntl.h>
  #include <linux/swap.h>
@@@ -98,7 -97,6 +98,7 @@@ static inline void put_binfmt(struct li
        module_put(fmt->module);
  }
  
 +#ifdef CONFIG_USELIB
  /*
   * Note that a shared library must be both readable and executable due to
   * security reasons.
@@@ -158,7 -156,6 +158,7 @@@ exit
  out:
        return error;
  }
 +#endif /* #ifdef CONFIG_USELIB */
  
  #ifdef CONFIG_MMU
  /*
@@@ -813,7 -810,7 +813,7 @@@ EXPORT_SYMBOL(kernel_read)
  
  ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
  {
-       ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
+       ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
        if (res > 0)
                flush_icache_range(addr, addr + len);
        return res;
@@@ -823,7 -820,7 +823,7 @@@ EXPORT_SYMBOL(read_code)
  static int exec_mmap(struct mm_struct *mm)
  {
        struct task_struct *tsk;
 -      struct mm_struct * old_mm, *active_mm;
 +      struct mm_struct *old_mm, *active_mm;
  
        /* Notify parent that we're no longer interested in the old VM */
        tsk = current;
        tsk->mm = mm;
        tsk->active_mm = mm;
        activate_mm(active_mm, mm);
 +      tsk->mm->vmacache_seqnum = 0;
 +      vmacache_flush(tsk);
        task_unlock(tsk);
        if (old_mm) {
                up_read(&old_mm->mmap_sem);
@@@ -1046,7 -1041,7 +1046,7 @@@ EXPORT_SYMBOL_GPL(get_task_comm)
   * so that a new one can be started
   */
  
 -void set_task_comm(struct task_struct *tsk, char *buf)
 +void set_task_comm(struct task_struct *tsk, const char *buf)
  {
        task_lock(tsk);
        trace_task_rename(tsk, buf);
        perf_event_comm(tsk);
  }
  
 -static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len)
 -{
 -      int i, ch;
 -
 -      /* Copies the binary name from after last slash */
 -      for (i = 0; (ch = *(fn++)) != '\0';) {
 -              if (ch == '/')
 -                      i = 0; /* overwrite what we wrote */
 -              else
 -                      if (i < len - 1)
 -                              tcomm[i++] = ch;
 -      }
 -      tcomm[i] = '\0';
 -}
 -
  int flush_old_exec(struct linux_binprm * bprm)
  {
        int retval;
                goto out;
  
        set_mm_exe_file(bprm->mm, bprm->file);
 -
 -      filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm));
        /*
         * Release all of the old mmap stuff
         */
@@@ -1110,7 -1122,7 +1110,7 @@@ void setup_new_exec(struct linux_binpr
        else
                set_dumpable(current->mm, suid_dumpable);
  
 -      set_task_comm(current, bprm->tcomm);
 +      set_task_comm(current, kbasename(bprm->filename));
  
        /* Set the new mm task size. We have to do that late because it may
         * depend on TIF_32BIT which is only updated in flush_thread() on
@@@ -1607,9 -1619,9 +1607,9 @@@ SYSCALL_DEFINE3(execve
        return do_execve(getname(filename), argv, envp);
  }
  #ifdef CONFIG_COMPAT
 -asmlinkage long compat_sys_execve(const char __user * filename,
 -      const compat_uptr_t __user * argv,
 -      const compat_uptr_t __user * envp)
 +COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
 +      const compat_uptr_t __user *, argv,
 +      const compat_uptr_t __user *, envp)
  {
        return compat_do_execve(getname(filename), argv, envp);
  }
diff --combined fs/ext4/file.c
index 4e508fc83dcf1b0b9b2934e4cf69506e063b0f6e,d564bcfb23c5c88d964ad1472371cbbe181de6f5..ca7502d89fdee07b96585c768854375b207daaf6
@@@ -146,14 -146,14 +146,14 @@@ ext4_file_dio_write(struct kiocb *iocb
                        overwrite = 1;
        }
  
-       ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+       ret = __generic_file_aio_write(iocb, iov, nr_segs);
        mutex_unlock(&inode->i_mutex);
  
        if (ret > 0) {
                ssize_t err;
  
                err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 -              if (err < 0 && ret > 0)
 +              if (err < 0)
                        ret = err;
        }
        blk_finish_plug(&plug);
@@@ -200,7 -200,6 +200,7 @@@ ext4_file_write(struct kiocb *iocb, con
  
  static const struct vm_operations_struct ext4_file_vm_ops = {
        .fault          = filemap_fault,
 +      .map_pages      = filemap_map_pages,
        .page_mkwrite   = ext4_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
  };
diff --combined fs/file.c
index b61293badfb1a9c98742a5bcc790751979251741,682103b95f8f1e0dfa28ecfea59d56a877a6203f..8f294cfac69749024c2c2e19d4b156755130e9ed
+++ b/fs/file.c
  
  int sysctl_nr_open __read_mostly = 1024*1024;
  int sysctl_nr_open_min = BITS_PER_LONG;
- int sysctl_nr_open_max = 1024 * 1024; /* raised later */
+ /* our max() is unusable in constant expressions ;-/ */
+ #define __const_max(x, y) ((x) < (y) ? (x) : (y))
+ int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) &
+                        -BITS_PER_LONG;
  
  static void *alloc_fdmem(size_t size)
  {
@@@ -429,12 -432,6 +432,6 @@@ void exit_files(struct task_struct *tsk
        }
  }
  
- void __init files_defer_init(void)
- {
-       sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
-                            -BITS_PER_LONG;
- }
  struct files_struct init_files = {
        .count          = ATOMIC_INIT(1),
        .fdt            = &init_files.fdtab,
@@@ -497,7 -494,7 +494,7 @@@ repeat
        error = fd;
  #if 1
        /* Sanity check */
 -      if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
 +      if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
                printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
                rcu_assign_pointer(fdt->fd[fd], NULL);
        }
diff --combined fs/file_table.c
index 01071c4d752e1e41099c8082a31a55305c5c647d,718e8e5224f8fd0652f4ded2d9c99db83c31ed48..a374f5033e97bab814977f8375f5c8cb1c899228
@@@ -52,7 -52,6 +52,6 @@@ static void file_free_rcu(struct rcu_he
  static inline void file_free(struct file *f)
  {
        percpu_counter_dec(&nr_files);
-       file_check_state(f);
        call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
  }
  
@@@ -178,47 -177,12 +177,12 @@@ struct file *alloc_file(struct path *pa
        file->f_mapping = path->dentry->d_inode->i_mapping;
        file->f_mode = mode;
        file->f_op = fop;
-       /*
-        * These mounts don't really matter in practice
-        * for r/o bind mounts.  They aren't userspace-
-        * visible.  We do this for consistency, and so
-        * that we can do debugging checks at __fput()
-        */
-       if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
-               file_take_write(file);
-               WARN_ON(mnt_clone_write(path->mnt));
-       }
        if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_inc(path->dentry->d_inode);
        return file;
  }
  EXPORT_SYMBOL(alloc_file);
  
- /**
-  * drop_file_write_access - give up ability to write to a file
-  * @file: the file to which we will stop writing
-  *
-  * This is a central place which will give up the ability
-  * to write to @file, along with access to write through
-  * its vfsmount.
-  */
- static void drop_file_write_access(struct file *file)
- {
-       struct vfsmount *mnt = file->f_path.mnt;
-       struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
-       put_write_access(inode);
-       if (special_file(inode->i_mode))
-               return;
-       if (file_check_writeable(file) != 0)
-               return;
-       __mnt_drop_write(mnt);
-       file_release_write(file);
- }
  /* the real guts of fput() - releasing the last reference to file
   */
  static void __fput(struct file *file)
         * in the file cleanup chain.
         */
        eventpoll_release(file);
 -      locks_remove_flock(file);
 +      locks_remove_file(file);
  
        if (unlikely(file->f_flags & FASYNC)) {
                if (file->f_op->fasync)
        put_pid(file->f_owner.pid);
        if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_dec(inode);
-       if (file->f_mode & FMODE_WRITE)
-               drop_file_write_access(file);
+       if (file->f_mode & FMODE_WRITER) {
+               put_write_access(inode);
+               __mnt_drop_write(mnt);
+       }
        file->f_path.dentry = NULL;
        file->f_path.mnt = NULL;
        file->f_inode = NULL;
@@@ -359,6 -325,5 +325,5 @@@ void __init files_init(unsigned long me
  
        n = (mempages * (PAGE_SIZE / 1024)) / 10;
        files_stat.max_files = max_t(unsigned long, n, NR_FILE);
-       files_defer_init();
        percpu_counter_init(&nr_files, 0);
  } 
diff --combined fs/fuse/file.c
index 48992cac714b413f644dbdd7c97ba87355831677,fd06d1ebc2ebc1ded380befd984faa6c7ccbaf2f..13f8bdec5110d1a7db12b2a262bb5e2ecb0e4f82
@@@ -188,22 -188,6 +188,22 @@@ int fuse_do_open(struct fuse_conn *fc, 
  }
  EXPORT_SYMBOL_GPL(fuse_do_open);
  
 +static void fuse_link_write_file(struct file *file)
 +{
 +      struct inode *inode = file_inode(file);
 +      struct fuse_conn *fc = get_fuse_conn(inode);
 +      struct fuse_inode *fi = get_fuse_inode(inode);
 +      struct fuse_file *ff = file->private_data;
 +      /*
 +       * file may be written through mmap, so chain it onto the
 +       * inodes's write_file list
 +       */
 +      spin_lock(&fc->lock);
 +      if (list_empty(&ff->write_entry))
 +              list_add(&ff->write_entry, &fi->write_files);
 +      spin_unlock(&fc->lock);
 +}
 +
  void fuse_finish_open(struct inode *inode, struct file *file)
  {
        struct fuse_file *ff = file->private_data;
                spin_unlock(&fc->lock);
                fuse_invalidate_attr(inode);
        }
 +      if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
 +              fuse_link_write_file(file);
  }
  
  int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
@@@ -310,15 -292,6 +310,15 @@@ static int fuse_open(struct inode *inod
  
  static int fuse_release(struct inode *inode, struct file *file)
  {
 +      struct fuse_conn *fc = get_fuse_conn(inode);
 +
 +      /* see fuse_vma_close() for !writeback_cache case */
 +      if (fc->writeback_cache)
 +              filemap_write_and_wait(file->f_mapping);
 +
 +      if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state))
 +              fuse_flush_mtime(file, true);
 +
        fuse_release_common(file, FUSE_RELEASE);
  
        /* return value is ignored by VFS */
@@@ -360,13 -333,12 +360,13 @@@ u64 fuse_lock_owner_id(struct fuse_con
  }
  
  /*
 - * Check if page is under writeback
 + * Check if any page in a range is under writeback
   *
   * This is currently done by walking the list of writepage requests
   * for the inode, which can be pretty inefficient.
   */
 -static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
 +static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
 +                                 pgoff_t idx_to)
  {
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
  
                BUG_ON(req->inode != inode);
                curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
 -              if (curr_index <= index &&
 -                  index < curr_index + req->num_pages) {
 +              if (idx_from < curr_index + req->num_pages &&
 +                  curr_index <= idx_to) {
                        found = true;
                        break;
                }
        return found;
  }
  
 +static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
 +{
 +      return fuse_range_is_writeback(inode, index, index);
 +}
 +
  /*
   * Wait for page writeback to be completed.
   *
@@@ -409,21 -376,6 +409,21 @@@ static int fuse_wait_on_page_writeback(
        return 0;
  }
  
 +/*
 + * Wait for all pending writepages on the inode to finish.
 + *
 + * This is currently done by blocking further writes with FUSE_NOWRITE
 + * and waiting for all sent writes to complete.
 + *
 + * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
 + * could conflict with truncation.
 + */
 +static void fuse_sync_writes(struct inode *inode)
 +{
 +      fuse_set_nowrite(inode);
 +      fuse_release_nowrite(inode);
 +}
 +
  static int fuse_flush(struct file *file, fl_owner_t id)
  {
        struct inode *inode = file_inode(file);
        if (fc->no_flush)
                return 0;
  
 +      err = filemap_write_and_wait(file->f_mapping);
 +      if (err)
 +              return err;
 +
 +      mutex_lock(&inode->i_mutex);
 +      fuse_sync_writes(inode);
 +      mutex_unlock(&inode->i_mutex);
 +
        req = fuse_get_req_nofail_nopages(fc, file);
        memset(&inarg, 0, sizeof(inarg));
        inarg.fh = ff->fh;
        return err;
  }
  
 -/*
 - * Wait for all pending writepages on the inode to finish.
 - *
 - * This is currently done by blocking further writes with FUSE_NOWRITE
 - * and waiting for all sent writes to complete.
 - *
 - * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
 - * could conflict with truncation.
 - */
 -static void fuse_sync_writes(struct inode *inode)
 -{
 -      fuse_set_nowrite(inode);
 -      fuse_release_nowrite(inode);
 -}
 -
  int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
                      int datasync, int isdir)
  {
  
        fuse_sync_writes(inode);
  
 +      if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) {
 +              int err = fuse_flush_mtime(file, false);
 +              if (err)
 +                      goto out;
 +      }
 +
        req = fuse_get_req_nopages(fc);
        if (IS_ERR(req)) {
                err = PTR_ERR(req);
@@@ -702,33 -655,7 +702,33 @@@ static void fuse_read_update_size(struc
        spin_unlock(&fc->lock);
  }
  
 -static int fuse_readpage(struct file *file, struct page *page)
 +static void fuse_short_read(struct fuse_req *req, struct inode *inode,
 +                          u64 attr_ver)
 +{
 +      size_t num_read = req->out.args[0].size;
 +      struct fuse_conn *fc = get_fuse_conn(inode);
 +
 +      if (fc->writeback_cache) {
 +              /*
 +               * A hole in a file. Some data after the hole are in page cache,
 +               * but have not reached the client fs yet. So, the hole is not
 +               * present there.
 +               */
 +              int i;
 +              int start_idx = num_read >> PAGE_CACHE_SHIFT;
 +              size_t off = num_read & (PAGE_CACHE_SIZE - 1);
 +
 +              for (i = start_idx; i < req->num_pages; i++) {
 +                      zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
 +                      off = 0;
 +              }
 +      } else {
 +              loff_t pos = page_offset(req->pages[0]) + num_read;
 +              fuse_read_update_size(inode, pos, attr_ver);
 +      }
 +}
 +
 +static int fuse_do_readpage(struct file *file, struct page *page)
  {
        struct fuse_io_priv io = { .async = 0, .file = file };
        struct inode *inode = page->mapping->host;
        u64 attr_ver;
        int err;
  
 -      err = -EIO;
 -      if (is_bad_inode(inode))
 -              goto out;
 -
        /*
         * Page writeback can extend beyond the lifetime of the
         * page-cache page, so make sure we read a properly synced
        fuse_wait_on_page_writeback(inode, page->index);
  
        req = fuse_get_req(fc, 1);
 -      err = PTR_ERR(req);
        if (IS_ERR(req))
 -              goto out;
 +              return PTR_ERR(req);
  
        attr_ver = fuse_get_attr_version(fc);
  
        req->page_descs[0].length = count;
        num_read = fuse_send_read(req, &io, pos, count, NULL);
        err = req->out.h.error;
 -      fuse_put_request(fc, req);
  
        if (!err) {
                /*
                 * Short read means EOF.  If file size is larger, truncate it
                 */
                if (num_read < count)
 -                      fuse_read_update_size(inode, pos + num_read, attr_ver);
 +                      fuse_short_read(req, inode, attr_ver);
  
                SetPageUptodate(page);
        }
  
 +      fuse_put_request(fc, req);
 +
 +      return err;
 +}
 +
 +static int fuse_readpage(struct file *file, struct page *page)
 +{
 +      struct inode *inode = page->mapping->host;
 +      int err;
 +
 +      err = -EIO;
 +      if (is_bad_inode(inode))
 +              goto out;
 +
 +      err = fuse_do_readpage(file, page);
        fuse_invalidate_atime(inode);
   out:
        unlock_page(page);
@@@ -808,9 -726,13 +808,9 @@@ static void fuse_readpages_end(struct f
                /*
                 * Short read means EOF. If file size is larger, truncate it
                 */
 -              if (!req->out.h.error && num_read < count) {
 -                      loff_t pos;
 +              if (!req->out.h.error && num_read < count)
 +                      fuse_short_read(req, inode, req->misc.read.attr_ver);
  
 -                      pos = page_offset(req->pages[0]) + num_read;
 -                      fuse_read_update_size(inode, pos,
 -                                            req->misc.read.attr_ver);
 -              }
                fuse_invalidate_atime(inode);
        }
  
@@@ -1000,21 -922,16 +1000,21 @@@ static size_t fuse_send_write(struct fu
        return req->misc.write.out.size;
  }
  
 -void fuse_write_update_size(struct inode *inode, loff_t pos)
 +bool fuse_write_update_size(struct inode *inode, loff_t pos)
  {
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
 +      bool ret = false;
  
        spin_lock(&fc->lock);
        fi->attr_version = ++fc->attr_version;
 -      if (pos > inode->i_size)
 +      if (pos > inode->i_size) {
                i_size_write(inode, pos);
 +              ret = true;
 +      }
        spin_unlock(&fc->lock);
 +
 +      return ret;
  }
  
  static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
@@@ -1086,9 -1003,7 +1086,7 @@@ static ssize_t fuse_fill_write_pages(st
                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
  
-               pagefault_disable();
                tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
-               pagefault_enable();
                flush_dcache_page(page);
  
                mark_page_accessed(page);
@@@ -1199,15 -1114,6 +1197,15 @@@ static ssize_t fuse_file_aio_write(stru
        struct iov_iter i;
        loff_t endbyte = 0;
  
 +      if (get_fuse_conn(inode)->writeback_cache) {
 +              /* Update size (EOF optimization) and mode (SUID clearing) */
 +              err = fuse_update_attributes(mapping->host, NULL, file, NULL);
 +              if (err)
 +                      return err;
 +
 +              return generic_file_aio_write(iocb, iov, nr_segs, pos);
 +      }
 +
        WARN_ON(iocb->ki_pos != pos);
  
        ocount = 0;
                goto out;
  
        if (file->f_flags & O_DIRECT) {
-               written = generic_file_direct_write(iocb, iov, &nr_segs,
-                                                   pos, &iocb->ki_pos,
+               written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 
                                                    count, ocount);
                if (written < 0 || written == count)
                        goto out;
@@@ -1381,18 -1286,13 +1378,18 @@@ static inline int fuse_iter_npages(cons
  
  ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
                       unsigned long nr_segs, size_t count, loff_t *ppos,
 -                     int write)
 +                     int flags)
  {
 +      int write = flags & FUSE_DIO_WRITE;
 +      int cuse = flags & FUSE_DIO_CUSE;
        struct file *file = io->file;
 +      struct inode *inode = file->f_mapping->host;
        struct fuse_file *ff = file->private_data;
        struct fuse_conn *fc = ff->fc;
        size_t nmax = write ? fc->max_write : fc->max_read;
        loff_t pos = *ppos;
 +      pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
 +      pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
        ssize_t res = 0;
        struct fuse_req *req;
        struct iov_iter ii;
        if (IS_ERR(req))
                return PTR_ERR(req);
  
 +      if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
 +              if (!write)
 +                      mutex_lock(&inode->i_mutex);
 +              fuse_sync_writes(inode);
 +              if (!write)
 +                      mutex_unlock(&inode->i_mutex);
 +      }
 +
        while (count) {
                size_t nres;
                fl_owner_t owner = current->files;
@@@ -1502,8 -1394,7 +1499,8 @@@ static ssize_t __fuse_direct_write(stru
  
        res = generic_write_checks(file, ppos, &count, 0);
        if (!res)
 -              res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1);
 +              res = fuse_direct_io(io, iov, nr_segs, count, ppos,
 +                                   FUSE_DIO_WRITE);
  
        fuse_invalidate_attr(inode);
  
        return err;
  }
  
 +/*
 + * It's worthy to make sure that space is reserved on disk for the write,
 + * but how to implement it without killing performance need more thinking.
 + */
 +static int fuse_write_begin(struct file *file, struct address_space *mapping,
 +              loff_t pos, unsigned len, unsigned flags,
 +              struct page **pagep, void **fsdata)
 +{
 +      pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 +      struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode);
 +      struct page *page;
 +      loff_t fsize;
 +      int err = -ENOMEM;
 +
 +      WARN_ON(!fc->writeback_cache);
 +
 +      page = grab_cache_page_write_begin(mapping, index, flags);
 +      if (!page)
 +              goto error;
 +
 +      fuse_wait_on_page_writeback(mapping->host, page->index);
 +
 +      if (PageUptodate(page) || len == PAGE_CACHE_SIZE)
 +              goto success;
 +      /*
 +       * Check if the start this page comes after the end of file, in which
 +       * case the readpage can be optimized away.
 +       */
 +      fsize = i_size_read(mapping->host);
 +      if (fsize <= (pos & PAGE_CACHE_MASK)) {
 +              size_t off = pos & ~PAGE_CACHE_MASK;
 +              if (off)
 +                      zero_user_segment(page, 0, off);
 +              goto success;
 +      }
 +      err = fuse_do_readpage(file, page);
 +      if (err)
 +              goto cleanup;
 +success:
 +      *pagep = page;
 +      return 0;
 +
 +cleanup:
 +      unlock_page(page);
 +      page_cache_release(page);
 +error:
 +      return err;
 +}
 +
 +static int fuse_write_end(struct file *file, struct address_space *mapping,
 +              loff_t pos, unsigned len, unsigned copied,
 +              struct page *page, void *fsdata)
 +{
 +      struct inode *inode = page->mapping->host;
 +
 +      if (!PageUptodate(page)) {
 +              /* Zero any unwritten bytes at the end of the page */
 +              size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
 +              if (endoff)
 +                      zero_user_segment(page, endoff, PAGE_CACHE_SIZE);
 +              SetPageUptodate(page);
 +      }
 +
 +      fuse_write_update_size(inode, pos + copied);
 +      set_page_dirty(page);
 +      unlock_page(page);
 +      page_cache_release(page);
 +
 +      return copied;
 +}
 +
  static int fuse_launder_page(struct page *page)
  {
        int err = 0;
@@@ -2117,16 -1937,26 +2114,16 @@@ static int fuse_page_mkwrite(struct vm_
  static const struct vm_operations_struct fuse_file_vm_ops = {
        .close          = fuse_vma_close,
        .fault          = filemap_fault,
 +      .map_pages      = filemap_map_pages,
        .page_mkwrite   = fuse_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
  };
  
  static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
 -      if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
 -              struct inode *inode = file_inode(file);
 -              struct fuse_conn *fc = get_fuse_conn(inode);
 -              struct fuse_inode *fi = get_fuse_inode(inode);
 -              struct fuse_file *ff = file->private_data;
 -              /*
 -               * file may be written through mmap, so chain it onto the
 -               * inodes's write_file list
 -               */
 -              spin_lock(&fc->lock);
 -              if (list_empty(&ff->write_entry))
 -                      list_add(&ff->write_entry, &fi->write_files);
 -              spin_unlock(&fc->lock);
 -      }
 +      if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
 +              fuse_link_write_file(file);
 +
        file_accessed(file);
        vma->vm_ops = &fuse_file_vm_ops;
        return 0;
@@@ -2773,7 -2603,7 +2770,7 @@@ static void fuse_register_polled_file(s
  {
        spin_lock(&fc->lock);
        if (RB_EMPTY_NODE(&ff->polled_node)) {
 -              struct rb_node **link, *parent;
 +              struct rb_node **link, *uninitialized_var(parent);
  
                link = fuse_find_polled_node(fc, ff->kh, &parent);
                BUG_ON(*link);
@@@ -3017,16 -2847,8 +3014,16 @@@ static long fuse_file_fallocate(struct 
                goto out;
  
        /* we could have extended the file */
 -      if (!(mode & FALLOC_FL_KEEP_SIZE))
 -              fuse_write_update_size(inode, offset + length);
 +      if (!(mode & FALLOC_FL_KEEP_SIZE)) {
 +              bool changed = fuse_write_update_size(inode, offset + length);
 +
 +              if (changed && fc->writeback_cache) {
 +                      struct fuse_inode *fi = get_fuse_inode(inode);
 +
 +                      inode->i_mtime = current_fs_time(inode->i_sb);
 +                      set_bit(FUSE_I_MTIME_DIRTY, &fi->state);
 +              }
 +      }
  
        if (mode & FALLOC_FL_PUNCH_HOLE)
                truncate_pagecache_range(inode, offset, offset + length - 1);
@@@ -3090,8 -2912,6 +3087,8 @@@ static const struct address_space_opera
        .set_page_dirty = __set_page_dirty_nobuffers,
        .bmap           = fuse_bmap,
        .direct_IO      = fuse_direct_IO,
 +      .write_begin    = fuse_write_begin,
 +      .write_end      = fuse_write_end,
  };
  
  void fuse_init_file_inode(struct inode *inode)
diff --combined fs/namei.c
index 88339f59efb5d9b3691f5ebbad7e5ef6eb59c4db,4fb52f0ca5cbfb831f0e681ee7ab5c4477a04a6e..c6157c894fce234c333d5a2d787f81ee3e5e7ba9
@@@ -358,6 -358,7 +358,7 @@@ int generic_permission(struct inode *in
  
        return -EACCES;
  }
+ EXPORT_SYMBOL(generic_permission);
  
  /*
   * We _really_ want to just do "generic_permission()" without
@@@ -455,6 -456,7 +456,7 @@@ int inode_permission(struct inode *inod
                return retval;
        return __inode_permission(inode, mask);
  }
+ EXPORT_SYMBOL(inode_permission);
  
  /**
   * path_get - get a reference to a path
@@@ -924,6 -926,7 +926,7 @@@ int follow_up(struct path *path
        path->mnt = &parent->mnt;
        return 1;
  }
+ EXPORT_SYMBOL(follow_up);
  
  /*
   * Perform an automount
@@@ -1085,6 -1088,7 +1088,7 @@@ int follow_down_one(struct path *path
        }
        return 0;
  }
+ EXPORT_SYMBOL(follow_down_one);
  
  static inline bool managed_dentry_might_block(struct dentry *dentry)
  {
@@@ -1223,6 -1227,7 +1227,7 @@@ int follow_down(struct path *path
        }
        return 0;
  }
+ EXPORT_SYMBOL(follow_down);
  
  /*
   * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
@@@ -1796,7 -1801,7 +1801,7 @@@ static int link_path_walk(const char *n
                        if (err)
                                return err;
                }
 -              if (!d_is_directory(nd->path.dentry)) {
 +              if (!d_can_lookup(nd->path.dentry)) {
                        err = -ENOTDIR; 
                        break;
                }
@@@ -1817,7 -1822,7 +1822,7 @@@ static int path_init(int dfd, const cha
                struct dentry *root = nd->root.dentry;
                struct inode *inode = root->d_inode;
                if (*name) {
 -                      if (!d_is_directory(root))
 +                      if (!d_can_lookup(root))
                                return -ENOTDIR;
                        retval = inode_permission(inode, MAY_EXEC);
                        if (retval)
                dentry = f.file->f_path.dentry;
  
                if (*name) {
 -                      if (!d_is_directory(dentry)) {
 +                      if (!d_can_lookup(dentry)) {
                                fdput(f);
                                return -ENOTDIR;
                        }
@@@ -1955,7 -1960,7 +1960,7 @@@ static int path_lookupat(int dfd, cons
                err = complete_walk(nd);
  
        if (!err && nd->flags & LOOKUP_DIRECTORY) {
 -              if (!d_is_directory(nd->path.dentry)) {
 +              if (!d_can_lookup(nd->path.dentry)) {
                        path_put(&nd->path);
                        err = -ENOTDIR;
                }
@@@ -2025,6 -2030,7 +2030,7 @@@ int kern_path(const char *name, unsigne
                *path = nd.path;
        return res;
  }
+ EXPORT_SYMBOL(kern_path);
  
  /**
   * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
@@@ -2049,6 -2055,7 +2055,7 @@@ int vfs_path_lookup(struct dentry *dent
                *path = nd.path;
        return err;
  }
+ EXPORT_SYMBOL(vfs_path_lookup);
  
  /*
   * Restricted form of lookup. Doesn't follow links, single-component only,
@@@ -2111,6 -2118,7 +2118,7 @@@ struct dentry *lookup_one_len(const cha
  
        return __lookup_hash(&this, base, 0);
  }
+ EXPORT_SYMBOL(lookup_one_len);
  
  int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
                 struct path *path, int *empty)
@@@ -2135,6 -2143,7 +2143,7 @@@ int user_path_at(int dfd, const char __
  {
        return user_path_at_empty(dfd, name, flags, path, NULL);
  }
+ EXPORT_SYMBOL(user_path_at);
  
  /*
   * NB: most callers don't do anything directly with the reference to the
@@@ -2414,11 -2423,11 +2423,11 @@@ static int may_delete(struct inode *dir
            IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
                return -EPERM;
        if (isdir) {
 -              if (!d_is_directory(victim) && !d_is_autodir(victim))
 +              if (!d_is_dir(victim))
                        return -ENOTDIR;
                if (IS_ROOT(victim))
                        return -EBUSY;
 -      } else if (d_is_directory(victim) || d_is_autodir(victim))
 +      } else if (d_is_dir(victim))
                return -EISDIR;
        if (IS_DEADDIR(dir))
                return -ENOENT;
@@@ -2477,6 -2486,7 +2486,7 @@@ struct dentry *lock_rename(struct dentr
        mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
        return NULL;
  }
+ EXPORT_SYMBOL(lock_rename);
  
  void unlock_rename(struct dentry *p1, struct dentry *p2)
  {
                mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
        }
  }
+ EXPORT_SYMBOL(unlock_rename);
  
  int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                bool want_excl)
                fsnotify_create(dir, dentry);
        return error;
  }
+ EXPORT_SYMBOL(vfs_create);
  
  static int may_open(struct path *path, int acc_mode, int flag)
  {
@@@ -2569,7 -2581,7 +2581,7 @@@ static int handle_truncate(struct file 
        /*
         * Refuse to truncate files with mandatory locks held on them.
         */
 -      error = locks_verify_locked(inode);
 +      error = locks_verify_locked(filp);
        if (!error)
                error = security_path_truncate(path);
        if (!error) {
@@@ -3016,10 -3028,11 +3028,10 @@@ finish_open
        }
        audit_inode(name, nd->path.dentry, 0);
        error = -EISDIR;
 -      if ((open_flag & O_CREAT) &&
 -          (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
 +      if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
                goto out;
        error = -ENOTDIR;
 -      if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
 +      if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
                goto out;
        if (!S_ISREG(nd->inode->i_mode))
                will_truncate = false;
@@@ -3375,6 -3388,7 +3387,7 @@@ int vfs_mknod(struct inode *dir, struc
                fsnotify_create(dir, dentry);
        return error;
  }
+ EXPORT_SYMBOL(vfs_mknod);
  
  static int may_mknod(umode_t mode)
  {
@@@ -3464,6 -3478,7 +3477,7 @@@ int vfs_mkdir(struct inode *dir, struc
                fsnotify_mkdir(dir, dentry);
        return error;
  }
+ EXPORT_SYMBOL(vfs_mkdir);
  
  SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
  {
@@@ -3518,6 -3533,7 +3532,7 @@@ void dentry_unhash(struct dentry *dentr
                __d_drop(dentry);
        spin_unlock(&dentry->d_lock);
  }
+ EXPORT_SYMBOL(dentry_unhash);
  
  int vfs_rmdir(struct inode *dir, struct dentry *dentry)
  {
@@@ -3555,6 -3571,7 +3570,7 @@@ out
                d_delete(dentry);
        return error;
  }
+ EXPORT_SYMBOL(vfs_rmdir);
  
  static long do_rmdir(int dfd, const char __user *pathname)
  {
@@@ -3672,6 -3689,7 +3688,7 @@@ out
  
        return error;
  }
+ EXPORT_SYMBOL(vfs_unlink);
  
  /*
   * Make sure that the actual truncation of the file will occur outside its
@@@ -3743,7 -3761,7 +3760,7 @@@ exit1
  slashes:
        if (d_is_negative(dentry))
                error = -ENOENT;
 -      else if (d_is_directory(dentry) || d_is_autodir(dentry))
 +      else if (d_is_dir(dentry))
                error = -EISDIR;
        else
                error = -ENOTDIR;
@@@ -3785,6 -3803,7 +3802,7 @@@ int vfs_symlink(struct inode *dir, stru
                fsnotify_create(dir, dentry);
        return error;
  }
+ EXPORT_SYMBOL(vfs_symlink);
  
  SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
                int, newdfd, const char __user *, newname)
@@@ -3893,6 -3912,7 +3911,7 @@@ int vfs_link(struct dentry *old_dentry
                fsnotify_link(dir, inode, new_dentry);
        return error;
  }
+ EXPORT_SYMBOL(vfs_link);
  
  /*
   * Hardlinks are often used in delicate situations.  We avoid
@@@ -3973,28 -3993,7 +3992,28 @@@ SYSCALL_DEFINE2(link, const char __use
        return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
  }
  
 -/*
 +/**
 + * vfs_rename - rename a filesystem object
 + * @old_dir:  parent of source
 + * @old_dentry:       source
 + * @new_dir:  parent of destination
 + * @new_dentry:       destination
 + * @delegated_inode: returns an inode needing a delegation break
 + * @flags:    rename flags
 + *
 + * The caller must hold multiple mutexes--see lock_rename()).
 + *
 + * If vfs_rename discovers a delegation in need of breaking at either
 + * the source or destination, it will return -EWOULDBLOCK and return a
 + * reference to the inode in delegated_inode.  The caller should then
 + * break the delegation and retry.  Because breaking a delegation may
 + * take a long time, the caller should drop all locks before doing
 + * so.
 + *
 + * Alternatively, a caller may pass NULL for delegated_inode.  This may
 + * be appropriate for callers that expect the underlying filesystem not
 + * to be NFS exported.
 + *
   * The worst of all namespace operations - renaming directory. "Perverted"
   * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
   * Problems:
   *       ->i_mutex on parents, which works but leads to some truly excessive
   *       locking].
   */
 -static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 -                        struct inode *new_dir, struct dentry *new_dentry)
 +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 +             struct inode *new_dir, struct dentry *new_dentry,
 +             struct inode **delegated_inode, unsigned int flags)
  {
 -      int error = 0;
 +      int error;
 +      bool is_dir = d_is_dir(old_dentry);
 +      const unsigned char *old_name;
 +      struct inode *source = old_dentry->d_inode;
        struct inode *target = new_dentry->d_inode;
 +      bool new_is_dir = false;
        unsigned max_links = new_dir->i_sb->s_max_links;
  
 +      if (source == target)
 +              return 0;
 +
 +      error = may_delete(old_dir, old_dentry, is_dir);
 +      if (error)
 +              return error;
 +
 +      if (!target) {
 +              error = may_create(new_dir, new_dentry);
 +      } else {
 +              new_is_dir = d_is_dir(new_dentry);
 +
 +              if (!(flags & RENAME_EXCHANGE))
 +                      error = may_delete(new_dir, new_dentry, is_dir);
 +              else
 +                      error = may_delete(new_dir, new_dentry, new_is_dir);
 +      }
 +      if (error)
 +              return error;
 +
 +      if (!old_dir->i_op->rename)
 +              return -EPERM;
 +
 +      if (flags && !old_dir->i_op->rename2)
 +              return -EINVAL;
 +
        /*
         * If we are going to change the parent - check write permissions,
         * we'll need to flip '..'.
         */
        if (new_dir != old_dir) {
 -              error = inode_permission(old_dentry->d_inode, MAY_WRITE);
 -              if (error)
 -                      return error;
 +              if (is_dir) {
 +                      error = inode_permission(source, MAY_WRITE);
 +                      if (error)
 +                              return error;
 +              }
 +              if ((flags & RENAME_EXCHANGE) && new_is_dir) {
 +                      error = inode_permission(target, MAY_WRITE);
 +                      if (error)
 +                              return error;
 +              }
        }
  
 -      error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
 +      error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
 +                                    flags);
        if (error)
                return error;
  
 +      old_name = fsnotify_oldname_init(old_dentry->d_name.name);
        dget(new_dentry);
 -      if (target)
 +      if (!is_dir || (flags & RENAME_EXCHANGE))
 +              lock_two_nondirectories(source, target);
 +      else if (target)
                mutex_lock(&target->i_mutex);
  
        error = -EBUSY;
        if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
                goto out;
  
 -      error = -EMLINK;
 -      if (max_links && !target && new_dir != old_dir &&
 -          new_dir->i_nlink >= max_links)
 -              goto out;
 -
 -      if (target)
 +      if (max_links && new_dir != old_dir) {
 +              error = -EMLINK;
 +              if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
 +                      goto out;
 +              if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
 +                  old_dir->i_nlink >= max_links)
 +                      goto out;
 +      }
 +      if (is_dir && !(flags & RENAME_EXCHANGE) && target)
                shrink_dcache_parent(new_dentry);
 -      error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 -      if (error)
 -              goto out;
 -
 -      if (target) {
 -              target->i_flags |= S_DEAD;
 -              dont_mount(new_dentry);
 +      if (!is_dir) {
 +              error = try_break_deleg(source, delegated_inode);
 +              if (error)
 +                      goto out;
        }
 -out:
 -      if (target)
 -              mutex_unlock(&target->i_mutex);
 -      dput(new_dentry);
 -      if (!error)
 -              if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 -                      d_move(old_dentry,new_dentry);
 -      return error;
 -}
 -
 -static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 -                          struct inode *new_dir, struct dentry *new_dentry,
 -                          struct inode **delegated_inode)
 -{
 -      struct inode *target = new_dentry->d_inode;
 -      struct inode *source = old_dentry->d_inode;
 -      int error;
 -
 -      error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
 -      if (error)
 -              return error;
 -
 -      dget(new_dentry);
 -      lock_two_nondirectories(source, target);
 -
 -      error = -EBUSY;
 -      if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
 -              goto out;
 -
 -      error = try_break_deleg(source, delegated_inode);
 -      if (error)
 -              goto out;
 -      if (target) {
 +      if (target && !new_is_dir) {
                error = try_break_deleg(target, delegated_inode);
                if (error)
                        goto out;
        }
 -      error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 +      if (!flags) {
 +              error = old_dir->i_op->rename(old_dir, old_dentry,
 +                                            new_dir, new_dentry);
 +      } else {
 +              error = old_dir->i_op->rename2(old_dir, old_dentry,
 +                                             new_dir, new_dentry, flags);
 +      }
        if (error)
                goto out;
  
 -      if (target)
 +      if (!(flags & RENAME_EXCHANGE) && target) {
 +              if (is_dir)
 +                      target->i_flags |= S_DEAD;
                dont_mount(new_dentry);
 -      if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 -              d_move(old_dentry, new_dentry);
 +      }
 +      if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
 +              if (!(flags & RENAME_EXCHANGE))
 +                      d_move(old_dentry, new_dentry);
 +              else
 +                      d_exchange(old_dentry, new_dentry);
 +      }
  out:
 -      unlock_two_nondirectories(source, target);
 +      if (!is_dir || (flags & RENAME_EXCHANGE))
 +              unlock_two_nondirectories(source, target);
 +      else if (target)
 +              mutex_unlock(&target->i_mutex);
        dput(new_dentry);
 -      return error;
 -}
 -
 -/**
 - * vfs_rename - rename a filesystem object
 - * @old_dir:  parent of source
 - * @old_dentry:       source
 - * @new_dir:  parent of destination
 - * @new_dentry:       destination
 - * @delegated_inode: returns an inode needing a delegation break
 - *
 - * The caller must hold multiple mutexes--see lock_rename()).
 - *
 - * If vfs_rename discovers a delegation in need of breaking at either
 - * the source or destination, it will return -EWOULDBLOCK and return a
 - * reference to the inode in delegated_inode.  The caller should then
 - * break the delegation and retry.  Because breaking a delegation may
 - * take a long time, the caller should drop all locks before doing
 - * so.
 - *
 - * Alternatively, a caller may pass NULL for delegated_inode.  This may
 - * be appropriate for callers that expect the underlying filesystem not
 - * to be NFS exported.
 - */
 -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 -             struct inode *new_dir, struct dentry *new_dentry,
 -             struct inode **delegated_inode)
 -{
 -      int error;
 -      int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
 -      const unsigned char *old_name;
 -
 -      if (old_dentry->d_inode == new_dentry->d_inode)
 -              return 0;
 - 
 -      error = may_delete(old_dir, old_dentry, is_dir);
 -      if (error)
 -              return error;
 -
 -      if (!new_dentry->d_inode)
 -              error = may_create(new_dir, new_dentry);
 -      else
 -              error = may_delete(new_dir, new_dentry, is_dir);
 -      if (error)
 -              return error;
 -
 -      if (!old_dir->i_op->rename)
 -              return -EPERM;
 -
 -      old_name = fsnotify_oldname_init(old_dentry->d_name.name);
 -
 -      if (is_dir)
 -              error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
 -      else
 -              error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
 -      if (!error)
 +      if (!error) {
                fsnotify_move(old_dir, new_dir, old_name, is_dir,
 -                            new_dentry->d_inode, old_dentry);
 +                            !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
 +              if (flags & RENAME_EXCHANGE) {
 +                      fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
 +                                    new_is_dir, NULL, new_dentry);
 +              }
 +      }
        fsnotify_oldname_free(old_name);
  
        return error;
  }
+ EXPORT_SYMBOL(vfs_rename);
  
 -SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
 -              int, newdfd, const char __user *, newname)
 +SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
 +              int, newdfd, const char __user *, newname, unsigned int, flags)
  {
        struct dentry *old_dir, *new_dir;
        struct dentry *old_dentry, *new_dentry;
        unsigned int lookup_flags = 0;
        bool should_retry = false;
        int error;
 +
 +      if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
 +              return -EINVAL;
 +
 +      if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE))
 +              return -EINVAL;
 +
  retry:
        from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags);
        if (IS_ERR(from)) {
                goto exit2;
  
        new_dir = newnd.path.dentry;
 +      if (flags & RENAME_NOREPLACE)
 +              error = -EEXIST;
        if (newnd.last_type != LAST_NORM)
                goto exit2;
  
  
        oldnd.flags &= ~LOOKUP_PARENT;
        newnd.flags &= ~LOOKUP_PARENT;
 -      newnd.flags |= LOOKUP_RENAME_TARGET;
 +      if (!(flags & RENAME_EXCHANGE))
 +              newnd.flags |= LOOKUP_RENAME_TARGET;
  
  retry_deleg:
        trap = lock_rename(new_dir, old_dir);
        error = -ENOENT;
        if (d_is_negative(old_dentry))
                goto exit4;
 +      new_dentry = lookup_hash(&newnd);
 +      error = PTR_ERR(new_dentry);
 +      if (IS_ERR(new_dentry))
 +              goto exit4;
 +      error = -EEXIST;
 +      if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
 +              goto exit5;
 +      if (flags & RENAME_EXCHANGE) {
 +              error = -ENOENT;
 +              if (d_is_negative(new_dentry))
 +                      goto exit5;
 +
 +              if (!d_is_dir(new_dentry)) {
 +                      error = -ENOTDIR;
 +                      if (newnd.last.name[newnd.last.len])
 +                              goto exit5;
 +              }
 +      }
        /* unless the source is a directory trailing slashes give -ENOTDIR */
 -      if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
 +      if (!d_is_dir(old_dentry)) {
                error = -ENOTDIR;
                if (oldnd.last.name[oldnd.last.len])
 -                      goto exit4;
 -              if (newnd.last.name[newnd.last.len])
 -                      goto exit4;
 +                      goto exit5;
 +              if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len])
 +                      goto exit5;
        }
        /* source should not be ancestor of target */
        error = -EINVAL;
        if (old_dentry == trap)
 -              goto exit4;
 -      new_dentry = lookup_hash(&newnd);
 -      error = PTR_ERR(new_dentry);
 -      if (IS_ERR(new_dentry))
 -              goto exit4;
 +              goto exit5;
        /* target should not be an ancestor of source */
 -      error = -ENOTEMPTY;
 +      if (!(flags & RENAME_EXCHANGE))
 +              error = -ENOTEMPTY;
        if (new_dentry == trap)
                goto exit5;
  
        error = security_path_rename(&oldnd.path, old_dentry,
 -                                   &newnd.path, new_dentry);
 +                                   &newnd.path, new_dentry, flags);
        if (error)
                goto exit5;
        error = vfs_rename(old_dir->d_inode, old_dentry,
 -                                 new_dir->d_inode, new_dentry,
 -                                 &delegated_inode);
 +                         new_dir->d_inode, new_dentry,
 +                         &delegated_inode, flags);
  exit5:
        dput(new_dentry);
  exit4:
@@@ -4293,22 -4292,14 +4313,20 @@@ exit
        return error;
  }
  
 +SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
 +              int, newdfd, const char __user *, newname)
 +{
 +      return sys_renameat2(olddfd, oldname, newdfd, newname, 0);
 +}
 +
  SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
  {
 -      return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
 +      return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
  }
  
- int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
+ int readlink_copy(char __user *buffer, int buflen, const char *link)
  {
-       int len;
-       len = PTR_ERR(link);
+       int len = PTR_ERR(link);
        if (IS_ERR(link))
                goto out;
  
  out:
        return len;
  }
+ EXPORT_SYMBOL(readlink_copy);
  
  /*
   * A helper for ->readlink().  This should be used *ONLY* for symlinks that
@@@ -4337,11 -4329,12 +4356,12 @@@ int generic_readlink(struct dentry *den
        if (IS_ERR(cookie))
                return PTR_ERR(cookie);
  
-       res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+       res = readlink_copy(buffer, buflen, nd_get_link(&nd));
        if (dentry->d_inode->i_op->put_link)
                dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
        return res;
  }
+ EXPORT_SYMBOL(generic_readlink);
  
  /* get the link contents into pagecache */
  static char *page_getlink(struct dentry * dentry, struct page **ppage)
  int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
  {
        struct page *page = NULL;
-       char *s = page_getlink(dentry, &page);
-       int res = vfs_readlink(dentry,buffer,buflen,s);
+       int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page));
        if (page) {
                kunmap(page);
                page_cache_release(page);
        }
        return res;
  }
+ EXPORT_SYMBOL(page_readlink);
  
  void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
  {
        nd_set_link(nd, page_getlink(dentry, &page));
        return page;
  }
+ EXPORT_SYMBOL(page_follow_link_light);
  
  void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
  {
                page_cache_release(page);
        }
  }
+ EXPORT_SYMBOL(page_put_link);
  
  /*
   * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
@@@ -4423,45 -4418,18 +4445,18 @@@ retry
  fail:
        return err;
  }
+ EXPORT_SYMBOL(__page_symlink);
  
  int page_symlink(struct inode *inode, const char *symname, int len)
  {
        return __page_symlink(inode, symname, len,
                        !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
  }
+ EXPORT_SYMBOL(page_symlink);
  
  const struct inode_operations page_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .follow_link    = page_follow_link_light,
        .put_link       = page_put_link,
  };
- EXPORT_SYMBOL(user_path_at);
- EXPORT_SYMBOL(follow_down_one);
- EXPORT_SYMBOL(follow_down);
- EXPORT_SYMBOL(follow_up);
- EXPORT_SYMBOL(get_write_access); /* nfsd */
- EXPORT_SYMBOL(lock_rename);
- EXPORT_SYMBOL(lookup_one_len);
- EXPORT_SYMBOL(page_follow_link_light);
- EXPORT_SYMBOL(page_put_link);
- EXPORT_SYMBOL(page_readlink);
- EXPORT_SYMBOL(__page_symlink);
- EXPORT_SYMBOL(page_symlink);
  EXPORT_SYMBOL(page_symlink_inode_operations);
- EXPORT_SYMBOL(kern_path);
- EXPORT_SYMBOL(vfs_path_lookup);
- EXPORT_SYMBOL(inode_permission);
- EXPORT_SYMBOL(unlock_rename);
- EXPORT_SYMBOL(vfs_create);
- EXPORT_SYMBOL(vfs_link);
- EXPORT_SYMBOL(vfs_mkdir);
- EXPORT_SYMBOL(vfs_mknod);
- EXPORT_SYMBOL(generic_permission);
- EXPORT_SYMBOL(vfs_readlink);
- EXPORT_SYMBOL(vfs_rename);
- EXPORT_SYMBOL(vfs_rmdir);
- EXPORT_SYMBOL(vfs_symlink);
- EXPORT_SYMBOL(vfs_unlink);
- EXPORT_SYMBOL(dentry_unhash);
- EXPORT_SYMBOL(generic_readlink);
diff --combined fs/ncpfs/inode.c
index 81b4f643ecefda84444cca57481958bfca11ea06,ceeca64f059920606c9047fda5904e4addf173b6..e31e589369a49a930f45be2124114c7252c83723
@@@ -9,8 -9,6 +9,8 @@@
   *
   */
  
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 +
  #include <linux/module.h>
  
  #include <asm/uaccess.h>
@@@ -101,7 -99,6 +101,7 @@@ static void destroy_inodecache(void
  
  static int ncp_remount(struct super_block *sb, int *flags, char* data)
  {
 +      sync_filesystem(sb);
        *flags |= MS_NODIRATIME;
        return 0;
  }
@@@ -135,7 -132,7 +135,7 @@@ void ncp_update_inode(struct inode *ino
        NCP_FINFO(inode)->access = nwinfo->access;
        memcpy(NCP_FINFO(inode)->file_handle, nwinfo->file_handle,
                        sizeof(nwinfo->file_handle));
 -      DPRINTK("ncp_update_inode: updated %s, volnum=%d, dirent=%u\n",
 +      ncp_dbg(1, "updated %s, volnum=%d, dirent=%u\n",
                nwinfo->i.entryName, NCP_FINFO(inode)->volNumber,
                NCP_FINFO(inode)->dirEntNum);
  }
  static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi)
  {
        /* NFS namespace mode overrides others if it's set. */
 -      DPRINTK(KERN_DEBUG "ncp_update_dates_and_mode: (%s) nfs.mode=0%o\n",
 -              nwi->entryName, nwi->nfs.mode);
 +      ncp_dbg(1, "(%s) nfs.mode=0%o\n", nwi->entryName, nwi->nfs.mode);
        if (nwi->nfs.mode) {
                /* XXX Security? */
                inode->i_mode = nwi->nfs.mode;
@@@ -231,7 -229,7 +231,7 @@@ static void ncp_set_attr(struct inode *
        
        ncp_update_attrs(inode, nwinfo);
  
 -      DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode);
 +      ncp_dbg(2, "inode->i_mode = %u\n", inode->i_mode);
  
        set_nlink(inode, 1);
        inode->i_uid = server->m.uid;
@@@ -259,7 -257,7 +259,7 @@@ ncp_iget(struct super_block *sb, struc
        struct inode *inode;
  
        if (info == NULL) {
 -              printk(KERN_ERR "ncp_iget: info is NULL\n");
 +              pr_err("%s: info is NULL\n", __func__);
                return NULL;
        }
  
                }
                insert_inode_hash(inode);
        } else
 -              printk(KERN_ERR "ncp_iget: iget failed!\n");
 +              pr_err("%s: iget failed!\n", __func__);
        return inode;
  }
  
  static void
  ncp_evict_inode(struct inode *inode)
  {
 -      truncate_inode_pages(&inode->i_data, 0);
 +      truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
  
        if (S_ISDIR(inode->i_mode)) {
 -              DDPRINTK("ncp_evict_inode: put directory %ld\n", inode->i_ino);
 +              ncp_dbg(2, "put directory %ld\n", inode->i_ino);
        }
  
        if (ncp_make_closed(inode) != 0) {
                /* We can't do anything but complain. */
 -              printk(KERN_ERR "ncp_evict_inode: could not close\n");
 +              pr_err("%s: could not close\n", __func__);
        }
  }
  
@@@ -470,9 -468,7 +470,7 @@@ static int ncp_fill_super(struct super_
  {
        struct ncp_mount_data_kernel data;
        struct ncp_server *server;
-       struct file *ncp_filp;
        struct inode *root_inode;
-       struct inode *sock_inode;
        struct socket *sock;
        int error;
        int default_bufsize;
        if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
            !gid_valid(data.gid))
                goto out;
-       error = -EBADF;
-       ncp_filp = fget(data.ncp_fd);
-       if (!ncp_filp)
-               goto out;
-       error = -ENOTSOCK;
-       sock_inode = file_inode(ncp_filp);
-       if (!S_ISSOCK(sock_inode->i_mode))
-               goto out_fput;
-       sock = SOCKET_I(sock_inode);
+       sock = sockfd_lookup(data.ncp_fd, &error);
        if (!sock)
-               goto out_fput;
-               
+               goto out;
        if (sock->type == SOCK_STREAM)
                default_bufsize = 0xF000;
        else
        if (error)
                goto out_fput;
  
-       server->ncp_filp = ncp_filp;
        server->ncp_sock = sock;
        
        if (data.info_fd != -1) {
-               struct socket *info_sock;
-               error = -EBADF;
-               server->info_filp = fget(data.info_fd);
-               if (!server->info_filp)
-                       goto out_bdi;
-               error = -ENOTSOCK;
-               sock_inode = file_inode(server->info_filp);
-               if (!S_ISSOCK(sock_inode->i_mode))
-                       goto out_fput2;
-               info_sock = SOCKET_I(sock_inode);
+               struct socket *info_sock = sockfd_lookup(data.info_fd, &error);
                if (!info_sock)
-                       goto out_fput2;
+                       goto out_bdi;
+               server->info_sock = info_sock;
                error = -EBADFD;
                if (info_sock->type != SOCK_STREAM)
                        goto out_fput2;
-               server->info_sock = info_sock;
        }
  
  /*    server->lock = 0;       */
           now because of PATH_MAX changes.. */
        if (server->m.time_out < 1) {
                server->m.time_out = 10;
 -              printk(KERN_INFO "You need to recompile your ncpfs utils..\n");
 +              pr_info("You need to recompile your ncpfs utils..\n");
        }
        server->m.time_out = server->m.time_out * HZ / 100;
        server->m.file_mode = (server->m.file_mode & S_IRWXUGO) | S_IFREG;
        ncp_unlock_server(server);
        if (error < 0)
                goto out_rxbuf;
 -      DPRINTK("ncp_fill_super: NCP_SBP(sb) = %x\n", (int) NCP_SBP(sb));
 +      ncp_dbg(1, "NCP_SBP(sb) = %p\n", NCP_SBP(sb));
  
        error = -EMSGSIZE;      /* -EREMOTESIDEINCOMPATIBLE */
  #ifdef CONFIG_NCPFS_PACKET_SIGNING
        if (ncp_negotiate_buffersize(server, default_bufsize,
                                     &(server->buffer_size)) != 0)
                goto out_disconnect;
 -      DPRINTK("ncpfs: bufsize = %d\n", server->buffer_size);
 +      ncp_dbg(1, "bufsize = %d\n", server->buffer_size);
  
        memset(&finfo, 0, sizeof(finfo));
        finfo.i.attributes      = aDIR;
          root_inode = ncp_iget(sb, &finfo);
          if (!root_inode)
                goto out_disconnect;
 -      DPRINTK("ncp_fill_super: root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
 +      ncp_dbg(1, "root vol=%d\n", NCP_FINFO(root_inode)->volNumber);
        sb->s_root = d_make_root(root_inode);
          if (!sb->s_root)
                goto out_disconnect;
@@@ -766,17 -743,12 +745,12 @@@ out_nls
        mutex_destroy(&server->root_setup_lock);
        mutex_destroy(&server->mutex);
  out_fput2:
-       if (server->info_filp)
-               fput(server->info_filp);
+       if (server->info_sock)
+               sockfd_put(server->info_sock);
  out_bdi:
        bdi_destroy(&server->bdi);
  out_fput:
-       /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
-        * 
-        * The previously used put_filp(ncp_filp); was bogus, since
-        * it doesn't perform proper unlocking.
-        */
-       fput(ncp_filp);
+       sockfd_put(sock);
  out:
        put_pid(data.wdog_pid);
        sb->s_fs_info = NULL;
@@@ -809,9 -781,9 +783,9 @@@ static void ncp_put_super(struct super_
        mutex_destroy(&server->root_setup_lock);
        mutex_destroy(&server->mutex);
  
-       if (server->info_filp)
-               fput(server->info_filp);
-       fput(server->ncp_filp);
+       if (server->info_sock)
+               sockfd_put(server->info_sock);
+       sockfd_put(server->ncp_sock);
        kill_pid(server->m.wdog_pid, SIGTERM, 1);
        put_pid(server->m.wdog_pid);
  
@@@ -986,7 -958,8 +960,7 @@@ int ncp_notify_change(struct dentry *de
        if ((attr->ia_valid & ATTR_SIZE) != 0) {
                int written;
  
 -              DPRINTK("ncpfs: trying to change size to %ld\n",
 -                      attr->ia_size);
 +              ncp_dbg(1, "trying to change size to %llu\n", attr->ia_size);
  
                if ((result = ncp_make_open(inode, O_WRONLY)) < 0) {
                        result = -EACCES;
@@@ -1072,7 -1045,7 +1046,7 @@@ MODULE_ALIAS_FS("ncpfs")
  static int __init init_ncp_fs(void)
  {
        int err;
 -      DPRINTK("ncpfs: init_ncp_fs called\n");
 +      ncp_dbg(1, "called\n");
  
        err = init_inodecache();
        if (err)
@@@ -1089,7 -1062,7 +1063,7 @@@ out1
  
  static void __exit exit_ncp_fs(void)
  {
 -      DPRINTK("ncpfs: exit_ncp_fs called\n");
 +      ncp_dbg(1, "called\n");
        unregister_filesystem(&ncp_fs_type);
        destroy_inodecache();
  }
diff --combined fs/ntfs/inode.c
index 9d8153ebacfb1c2784543eb319b4de4cdf5f16e7,4de660fe739c217df511f33088f6e848b756da57..f47af5e6e23037ed0c82b17274b098d00a617517
@@@ -1704,8 -1704,6 +1704,6 @@@ static int ntfs_read_locked_index_inode
        iput(bvi);
  skip_large_index_stuff:
        /* Setup the operations for this index inode. */
-       vi->i_op = NULL;
-       vi->i_fop = NULL;
        vi->i_mapping->a_ops = &ntfs_mst_aops;
        vi->i_blocks = ni->allocated_size >> 9;
        /*
@@@ -2259,7 -2257,7 +2257,7 @@@ void ntfs_evict_big_inode(struct inode 
  {
        ntfs_inode *ni = NTFS_I(vi);
  
 -      truncate_inode_pages(&vi->i_data, 0);
 +      truncate_inode_pages_final(&vi->i_data);
        clear_inode(vi);
  
  #ifdef NTFS_RW
diff --combined fs/ocfs2/cluster/tcp.c
index eb649d23a4de24e4cde6860389528e7b2ef919dc,ea63d6461f55f612d97f2bad7112be5debd5d284..dfda2ffdb16c15aa8338b1de6f58ccc42ff5b9ba
@@@ -262,17 -262,17 +262,17 @@@ static void o2net_update_recv_stats(str
  
  #endif /* CONFIG_OCFS2_FS_STATS */
  
 -static inline int o2net_reconnect_delay(void)
 +static inline unsigned int o2net_reconnect_delay(void)
  {
        return o2nm_single_cluster->cl_reconnect_delay_ms;
  }
  
 -static inline int o2net_keepalive_delay(void)
 +static inline unsigned int o2net_keepalive_delay(void)
  {
        return o2nm_single_cluster->cl_keepalive_delay_ms;
  }
  
 -static inline int o2net_idle_timeout(void)
 +static inline unsigned int o2net_idle_timeout(void)
  {
        return o2nm_single_cluster->cl_idle_timeout_ms;
  }
@@@ -916,57 -916,30 +916,30 @@@ static struct o2net_msg_handler *o2net_
  
  static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
  {
-       int ret;
-       mm_segment_t oldfs;
-       struct kvec vec = {
-               .iov_len = len,
-               .iov_base = data,
-       };
-       struct msghdr msg = {
-               .msg_iovlen = 1,
-               .msg_iov = (struct iovec *)&vec,
-                       .msg_flags = MSG_DONTWAIT,
-       };
-       oldfs = get_fs();
-       set_fs(get_ds());
-       ret = sock_recvmsg(sock, &msg, len, msg.msg_flags);
-       set_fs(oldfs);
-       return ret;
+       struct kvec vec = { .iov_len = len, .iov_base = data, };
+       struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
+       return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags);
  }
  
  static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
                              size_t veclen, size_t total)
  {
        int ret;
-       mm_segment_t oldfs;
-       struct msghdr msg = {
-               .msg_iov = (struct iovec *)vec,
-               .msg_iovlen = veclen,
-       };
+       struct msghdr msg;
  
        if (sock == NULL) {
                ret = -EINVAL;
                goto out;
        }
  
-       oldfs = get_fs();
-       set_fs(get_ds());
-       ret = sock_sendmsg(sock, &msg, total);
-       set_fs(oldfs);
-       if (ret != total) {
-               mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret,
-                    total);
-               if (ret >= 0)
-                       ret = -EPIPE; /* should be smarter, I bet */
-               goto out;
-       }
-       ret = 0;
+       ret = kernel_sendmsg(sock, &msg, vec, veclen, total);
+       if (likely(ret == total))
+               return 0;
+       mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total);
+       if (ret >= 0)
+               ret = -EPIPE; /* should be smarter, I bet */
  out:
-       if (ret < 0)
-               mlog(0, "returning error: %d\n", ret);
+       mlog(0, "returning error: %d\n", ret);
        return ret;
  }
  
@@@ -1964,30 -1937,18 +1937,30 @@@ static void o2net_listen_data_ready(str
                goto out;
        }
  
 -      /* ->sk_data_ready is also called for a newly established child socket
 -       * before it has been accepted and the acceptor has set up their
 -       * data_ready.. we only want to queue listen work for our listening
 -       * socket */
 +      /* This callback may called twice when a new connection
 +       * is  being established as a child socket inherits everything
 +       * from a parent LISTEN socket, including the data_ready cb of
 +       * the parent. This leads to a hazard. In o2net_accept_one()
 +       * we are still initializing the child socket but have not
 +       * changed the inherited data_ready callback yet when
 +       * data starts arriving.
 +       * We avoid this hazard by checking the state.
 +       * For the listening socket,  the state will be TCP_LISTEN; for the new
 +       * socket, will be  TCP_ESTABLISHED. Also, in this case,
 +       * sk->sk_user_data is not a valid function pointer.
 +       */
 +
        if (sk->sk_state == TCP_LISTEN) {
                mlog(ML_TCP, "bytes: %d\n", bytes);
                queue_work(o2net_wq, &o2net_listen_work);
 +      } else {
 +              ready = NULL;
        }
  
  out:
        read_unlock(&sk->sk_callback_lock);
 -      ready(sk, bytes);
 +      if (ready != NULL)
 +              ready(sk, bytes);
  }
  
  static int o2net_open_listening_sock(__be32 addr, __be16 port)
diff --combined fs/ocfs2/file.c
index ff33c5ef87f2b826532af886436be9adcc8f7046,9c27adf4ac7249f48bf548621899006ed152a72f..8970dcf74de53e71539e8fad7042a55f66a2c713
@@@ -175,13 -175,9 +175,13 @@@ static int ocfs2_sync_file(struct file 
                           int datasync)
  {
        int err = 0;
 -      journal_t *journal;
        struct inode *inode = file->f_mapping->host;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 +      struct ocfs2_inode_info *oi = OCFS2_I(inode);
 +      journal_t *journal = osb->journal->j_journal;
 +      int ret;
 +      tid_t commit_tid;
 +      bool needs_barrier = false;
  
        trace_ocfs2_sync_file(inode, file, file->f_path.dentry,
                              OCFS2_I(inode)->ip_blkno,
        if (err)
                return err;
  
 -      /*
 -       * Probably don't need the i_mutex at all in here, just putting it here
 -       * to be consistent with how fsync used to be called, someone more
 -       * familiar with the fs could possibly remove it.
 -       */
 -      mutex_lock(&inode->i_mutex);
 -      if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
 -              /*
 -               * We still have to flush drive's caches to get data to the
 -               * platter
 -               */
 -              if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
 -                      blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 -              goto bail;
 +      commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid;
 +      if (journal->j_flags & JBD2_BARRIER &&
 +          !jbd2_trans_will_send_data_barrier(journal, commit_tid))
 +              needs_barrier = true;
 +      err = jbd2_complete_transaction(journal, commit_tid);
 +      if (needs_barrier) {
 +              ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
 +              if (!err)
 +                      err = ret;
        }
  
 -      journal = osb->journal->j_journal;
 -      err = jbd2_journal_force_commit(journal);
 -
 -bail:
        if (err)
                mlog_errno(err);
 -      mutex_unlock(&inode->i_mutex);
  
        return (err < 0) ? -EIO : 0;
  }
@@@ -286,7 -292,6 +286,7 @@@ int ocfs2_update_inode_atime(struct ino
        inode->i_atime = CURRENT_TIME;
        di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
        di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
 +      ocfs2_update_inode_fsync_trans(handle, inode, 0);
        ocfs2_journal_dirty(handle, bh);
  
  out_commit:
@@@ -336,7 -341,6 +336,7 @@@ int ocfs2_simple_size_update(struct ino
        if (ret < 0)
                mlog_errno(ret);
  
 +      ocfs2_update_inode_fsync_trans(handle, inode, 0);
        ocfs2_commit_trans(osb, handle);
  out:
        return ret;
@@@ -431,7 -435,6 +431,7 @@@ static int ocfs2_orphan_for_truncate(st
        di->i_size = cpu_to_le64(new_i_size);
        di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
        di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
 +      ocfs2_update_inode_fsync_trans(handle, inode, 0);
  
        ocfs2_journal_dirty(handle, fe_bh);
  
@@@ -647,7 -650,7 +647,7 @@@ restarted_transaction
                        mlog_errno(status);
                goto leave;
        }
 -
 +      ocfs2_update_inode_fsync_trans(handle, inode, 1);
        ocfs2_journal_dirty(handle, bh);
  
        spin_lock(&OCFS2_I(inode)->ip_lock);
@@@ -740,7 -743,6 +740,7 @@@ static handle_t *ocfs2_zero_start_order
                                      OCFS2_JOURNAL_ACCESS_WRITE);
        if (ret)
                mlog_errno(ret);
 +      ocfs2_update_inode_fsync_trans(handle, inode, 1);
  
  out:
        if (ret) {
@@@ -838,7 -840,6 +838,7 @@@ static int ocfs2_write_zero_page(struc
                di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
                di->i_mtime_nsec = di->i_ctime_nsec;
                ocfs2_journal_dirty(handle, di_bh);
 +              ocfs2_update_inode_fsync_trans(handle, inode, 1);
                ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
        }
  
@@@ -1343,7 -1344,6 +1343,7 @@@ static int __ocfs2_write_remove_suid(st
  
        di = (struct ocfs2_dinode *) bh->b_data;
        di->i_mode = cpu_to_le16(inode->i_mode);
 +      ocfs2_update_inode_fsync_trans(handle, inode, 0);
  
        ocfs2_journal_dirty(handle, bh);
  
@@@ -1576,7 -1576,6 +1576,7 @@@ static int ocfs2_zero_partial_clusters(
                if (ret)
                        mlog_errno(ret);
        }
 +      ocfs2_update_inode_fsync_trans(handle, inode, 1);
  
        ocfs2_commit_trans(osb, handle);
  out:
@@@ -2062,6 -2061,13 +2062,6 @@@ out
        return ret;
  }
  
 -static void ocfs2_aiodio_wait(struct inode *inode)
 -{
 -      wait_queue_head_t *wq = ocfs2_ioend_wq(inode);
 -
 -      wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0));
 -}
 -
  static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
  {
        int blockmask = inode->i_sb->s_blocksize - 1;
@@@ -2339,8 -2345,10 +2339,8 @@@ relock
                 * Wait on previous unaligned aio to complete before
                 * proceeding.
                 */
 -              ocfs2_aiodio_wait(inode);
 -
 -              /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */
 -              atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio);
 +              mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio);
 +              /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */
                ocfs2_iocb_set_unaligned_aio(iocb);
        }
  
  
        if (direct_io) {
                written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
-                                                   ppos, count, ocount);
+                                                   count, ocount);
                if (written < 0) {
                        ret = written;
                        goto out_dio;
                }
        } else {
+               struct iov_iter from;
+               iov_iter_init(&from, iov, nr_segs, count, 0);
                current->backing_dev_info = file->f_mapping->backing_dev_info;
-               written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
-                                                     ppos, count, 0);
+               written = generic_perform_write(file, &from, *ppos);
+               if (likely(written >= 0))
+                       iocb->ki_pos = *ppos + written;
                current->backing_dev_info = NULL;
        }
  
@@@ -2420,7 -2431,7 +2423,7 @@@ out_dio
  
        if (unaligned_dio) {
                ocfs2_iocb_clear_unaligned_aio(iocb);
 -              atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
 +              mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
        }
  
  out:
@@@ -2637,16 -2648,7 +2640,16 @@@ static loff_t ocfs2_file_llseek(struct 
        case SEEK_SET:
                break;
        case SEEK_END:
 -              offset += inode->i_size;
 +              /* SEEK_END requires the OCFS2 inode lock for the file
 +               * because it references the file's size.
 +               */
 +              ret = ocfs2_inode_lock(inode, NULL, 0);
 +              if (ret < 0) {
 +                      mlog_errno(ret);
 +                      goto out;
 +              }
 +              offset += i_size_read(inode);
 +              ocfs2_inode_unlock(inode, 0);
                break;
        case SEEK_CUR:
                if (offset == 0) {
diff --combined fs/open.c
index 631aea815def32946433b8aebed9a312d0fc872c,37f65fa44dbfa7c2ce066934449348602a6e27b1..3d30eb1fc95e383e50e91605d3526161bcfdebde
+++ b/fs/open.c
@@@ -231,13 -231,7 +231,13 @@@ int do_fallocate(struct file *file, in
                return -EINVAL;
  
        /* Return error if mode is not supported */
 -      if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
 +      if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
 +                   FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
 +              return -EOPNOTSUPP;
 +
 +      /* Punch hole and zero range are mutually exclusive */
 +      if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
 +          (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
                return -EOPNOTSUPP;
  
        /* Punch hole must have keep size set */
            !(mode & FALLOC_FL_KEEP_SIZE))
                return -EOPNOTSUPP;
  
 +      /* Collapse range should only be used exclusively. */
 +      if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
 +          (mode & ~FALLOC_FL_COLLAPSE_RANGE))
 +              return -EINVAL;
 +
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
  
 -      /* It's not possible punch hole on append only file */
 -      if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
 +      /*
 +       * It's not possible to punch hole or perform collapse range
 +       * on append only file
 +       */
 +      if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
 +          && IS_APPEND(inode))
                return -EPERM;
  
        if (IS_IMMUTABLE(inode))
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                return -EFBIG;
  
 +      /*
 +       * There is no need to overlap collapse range with EOF, in which case
 +       * it is effectively a truncate operation
 +       */
 +      if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
 +          (offset + len >= i_size_read(inode)))
 +              return -EINVAL;
 +
        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
  
@@@ -655,35 -632,6 +655,6 @@@ out
        return error;
  }
  
- /*
-  * You have to be very careful that these write
-  * counts get cleaned up in error cases and
-  * upon __fput().  This should probably never
-  * be called outside of __dentry_open().
-  */
- static inline int __get_file_write_access(struct inode *inode,
-                                         struct vfsmount *mnt)
- {
-       int error;
-       error = get_write_access(inode);
-       if (error)
-               return error;
-       /*
-        * Do not take mount writer counts on
-        * special files since no writes to
-        * the mount itself will occur.
-        */
-       if (!special_file(inode->i_mode)) {
-               /*
-                * Balanced in __fput()
-                */
-               error = __mnt_want_write(mnt);
-               if (error)
-                       put_write_access(inode);
-       }
-       return error;
- }
  int open_check_o_direct(struct file *f)
  {
        /* NB: we're sure to have correct a_ops only after f_op->open */
@@@ -708,26 -656,28 +679,28 @@@ static int do_dentry_open(struct file *
        f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
                                FMODE_PREAD | FMODE_PWRITE;
  
-       if (unlikely(f->f_flags & O_PATH))
-               f->f_mode = FMODE_PATH;
        path_get(&f->f_path);
        inode = f->f_inode = f->f_path.dentry->d_inode;
-       if (f->f_mode & FMODE_WRITE) {
-               error = __get_file_write_access(inode, f->f_path.mnt);
-               if (error)
-                       goto cleanup_file;
-               if (!special_file(inode->i_mode))
-                       file_take_write(f);
-       }
        f->f_mapping = inode->i_mapping;
  
-       if (unlikely(f->f_mode & FMODE_PATH)) {
+       if (unlikely(f->f_flags & O_PATH)) {
+               f->f_mode = FMODE_PATH;
                f->f_op = &empty_fops;
                return 0;
        }
  
+       if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
+               error = get_write_access(inode);
+               if (unlikely(error))
+                       goto cleanup_file;
+               error = __mnt_want_write(f->f_path.mnt);
+               if (unlikely(error)) {
+                       put_write_access(inode);
+                       goto cleanup_file;
+               }
+               f->f_mode |= FMODE_WRITER;
+       }
        /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
        if (S_ISREG(inode->i_mode))
                f->f_mode |= FMODE_ATOMIC_POS;
  
  cleanup_all:
        fops_put(f->f_op);
-       if (f->f_mode & FMODE_WRITE) {
+       if (f->f_mode & FMODE_WRITER) {
                put_write_access(inode);
-               if (!special_file(inode->i_mode)) {
-                       /*
-                        * We don't consider this a real
-                        * mnt_want/drop_write() pair
-                        * because it all happenend right
-                        * here, so just reset the state.
-                        */
-                       file_reset_write(f);
-                       __mnt_drop_write(f->f_path.mnt);
-               }
+               __mnt_drop_write(f->f_path.mnt);
        }
  cleanup_file:
        path_put(&f->f_path);
diff --combined fs/xfs/xfs_file.c
index 003c0051b62fa34ce086154413095b4fbdb4317d,c3f4289f64976c3630bec8ca9e44f63062ae99f4..79e96ce987331cad3aab2d0a7513ce19cad2fcb4
@@@ -699,7 -699,7 +699,7 @@@ xfs_file_dio_aio_write
  
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
        ret = generic_file_direct_write(iocb, iovp,
-                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
+                       &nr_segs, pos, count, ocount);
  
  out:
        xfs_rw_iunlock(ip, iolock);
@@@ -715,7 -715,7 +715,7 @@@ xfs_file_buffered_aio_write
        const struct iovec      *iovp,
        unsigned long           nr_segs,
        loff_t                  pos,
-       size_t                  ocount)
+       size_t                  count)
  {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
        ssize_t                 ret;
        int                     enospc = 0;
        int                     iolock = XFS_IOLOCK_EXCL;
-       size_t                  count = ocount;
+       struct iov_iter         from;
  
        xfs_rw_ilock(ip, iolock);
  
        if (ret)
                goto out;
  
+       iov_iter_init(&from, iovp, nr_segs, count, 0);
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
  
  write_retry:
        trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_buffered_write(iocb, iovp, nr_segs,
-                       pos, &iocb->ki_pos, count, 0);
+       ret = generic_perform_write(file, &from, pos);
+       if (likely(ret >= 0))
+               iocb->ki_pos = pos + ret;
        /*
         * If we just got an ENOSPC, try to write back all dirty inodes to
         * convert delalloc space to free up some of the excess reserved
@@@ -823,8 -824,7 +824,8 @@@ xfs_file_fallocate
  
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
 -      if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
 +      if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
 +                   FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
                return -EOPNOTSUPP;
  
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
                error = xfs_free_file_space(ip, offset, len);
                if (error)
                        goto out_unlock;
 +      } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
 +              unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
 +
 +              if (offset & blksize_mask || len & blksize_mask) {
 +                      error = -EINVAL;
 +                      goto out_unlock;
 +              }
 +
 +              ASSERT(offset + len < i_size_read(inode));
 +              new_size = i_size_read(inode) - len;
 +
 +              error = xfs_collapse_file_space(ip, offset, len);
 +              if (error)
 +                      goto out_unlock;
        } else {
                if (!(mode & FALLOC_FL_KEEP_SIZE) &&
                    offset + len > i_size_read(inode)) {
                                goto out_unlock;
                }
  
 -              error = xfs_alloc_file_space(ip, offset, len,
 -                                           XFS_BMAPI_PREALLOC);
 +              if (mode & FALLOC_FL_ZERO_RANGE)
 +                      error = xfs_zero_file_space(ip, offset, len);
 +              else
 +                      error = xfs_alloc_file_space(ip, offset, len,
 +                                                   XFS_BMAPI_PREALLOC);
                if (error)
                        goto out_unlock;
        }
        if (ip->i_d.di_mode & S_IXGRP)
                ip->i_d.di_mode &= ~S_ISGID;
  
 -      if (!(mode & FALLOC_FL_PUNCH_HOLE))
 +      if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
                ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
  
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@@ -1483,7 -1466,6 +1484,7 @@@ const struct file_operations xfs_dir_fi
  
  static const struct vm_operations_struct xfs_file_vm_ops = {
        .fault          = filemap_fault,
 +      .map_pages      = filemap_map_pages,
        .page_mkwrite   = xfs_vm_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
  };
diff --combined include/linux/bio.h
index 5aa372a7380c6f26ccb77e523a5fb28367a6beed,21e27208316ce9ee847a9d32da595184eddbabd0..bba5508269219a0726ede9f97cc3d22afa97f213
@@@ -216,9 -216,9 +216,9 @@@ static inline void bvec_iter_advance(st
  }
  
  #define for_each_bvec(bvl, bio_vec, iter, start)                      \
 -      for ((iter) = start;                                            \
 -           (bvl) = bvec_iter_bvec((bio_vec), (iter)),                 \
 -              (iter).bi_size;                                         \
 +      for (iter = (start);                                            \
 +           (iter).bi_size &&                                          \
 +              ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
             bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
  
  
@@@ -388,7 -388,7 +388,7 @@@ struct sg_iovec
  struct rq_map_data;
  extern struct bio *bio_map_user_iov(struct request_queue *,
                                    struct block_device *,
-                                   struct sg_iovec *, int, int, gfp_t);
+                                   const struct sg_iovec *, int, int, gfp_t);
  extern void bio_unmap_user(struct bio *);
  extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
                                gfp_t);
@@@ -414,7 -414,8 +414,8 @@@ extern int bio_alloc_pages(struct bio *
  extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
                                 unsigned long, unsigned int, int, gfp_t);
  extern struct bio *bio_copy_user_iov(struct request_queue *,
-                                    struct rq_map_data *, struct sg_iovec *,
+                                    struct rq_map_data *,
+                                    const struct sg_iovec *,
                                     int, int, gfp_t);
  extern int bio_uncopy_user(struct bio *);
  void zero_fill_bio(struct bio *bio);
diff --combined include/linux/blkdev.h
index 99617cf7dd1a5bd29866e33e0ced51ae28279b3b,a639fd8a6d7b184cfeccf6c9805f5b3d8b14d3ce..0d84981ee03fc1c9d7bd5b656611b8b87af696e0
@@@ -99,7 -99,6 +99,7 @@@ struct request 
        union {
                struct call_single_data csd;
                struct work_struct mq_flush_work;
 +              unsigned long fifo_time;
        };
  
        struct request_queue *q;
        struct bio *bio;
        struct bio *biotail;
  
 -      struct hlist_node hash; /* merge hash */
 +      /*
 +       * The hash is used inside the scheduler, and killed once the
 +       * request reaches the dispatch list. The ipi_list is only used
 +       * to queue the request for softirq completion, which is long
 +       * after the request has been unhashed (and even removed from
 +       * the dispatch list).
 +       */
 +      union {
 +              struct hlist_node hash; /* merge hash */
 +              struct list_head ipi_list;
 +      };
 +
        /*
         * The rb_node is only used inside the io scheduler, requests
         * are pruned when moved to the dispatch queue. So let the
@@@ -835,8 -823,8 +835,8 @@@ extern int blk_rq_map_user(struct reque
  extern int blk_rq_unmap_user(struct bio *);
  extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
  extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
-                              struct rq_map_data *, struct sg_iovec *, int,
-                              unsigned int, gfp_t);
+                              struct rq_map_data *, const struct sg_iovec *,
+                              int, unsigned int, gfp_t);
  extern int blk_execute_rq(struct request_queue *, struct gendisk *,
                          struct request *, int);
  extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
diff --combined include/linux/fs.h
index 81048f9bc7837e3ce32fb12dddf158a09fbaf302,c309b7a0da2d6a5363c52153dd0e016a074493b1..7a9c5bca2b7694f5496dbcf793eea2920fd37af9
@@@ -48,6 -48,7 +48,7 @@@ struct cred
  struct swap_info_struct;
  struct seq_file;
  struct workqueue_struct;
+ struct iov_iter;
  
  extern void __init inode_init(void);
  extern void __init inode_init_early(void);
@@@ -125,6 -126,8 +126,8 @@@ typedef void (dio_iodone_t)(struct kioc
  
  /* File needs atomic accesses to f_pos */
  #define FMODE_ATOMIC_POS      ((__force fmode_t)0x8000)
+ /* Write access to underlying fs */
+ #define FMODE_WRITER          ((__force fmode_t)0x10000)
  
  /* File was opened by fanotify and shouldn't generate fanotify events */
  #define FMODE_NONOTIFY                ((__force fmode_t)0x1000000)
@@@ -293,38 -296,6 +296,6 @@@ struct page
  struct address_space;
  struct writeback_control;
  
- struct iov_iter {
-       const struct iovec *iov;
-       unsigned long nr_segs;
-       size_t iov_offset;
-       size_t count;
- };
- size_t iov_iter_copy_from_user_atomic(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes);
- size_t iov_iter_copy_from_user(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes);
- void iov_iter_advance(struct iov_iter *i, size_t bytes);
- int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
- size_t iov_iter_single_seg_count(const struct iov_iter *i);
- static inline void iov_iter_init(struct iov_iter *i,
-                       const struct iovec *iov, unsigned long nr_segs,
-                       size_t count, size_t written)
- {
-       i->iov = iov;
-       i->nr_segs = nr_segs;
-       i->iov_offset = 0;
-       i->count = count + written;
-       iov_iter_advance(i, written);
- }
- static inline size_t iov_iter_count(struct iov_iter *i)
- {
-       return i->count;
- }
  /*
   * "descriptor" for what we're up to with a read.
   * This allows us to use the same read code yet
@@@ -383,7 -354,7 +354,7 @@@ struct address_space_operations 
        int (*migratepage) (struct address_space *,
                        struct page *, struct page *, enum migrate_mode);
        int (*launder_page) (struct page *);
-       int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+       int (*is_partially_uptodate) (struct page *, unsigned long,
                                        unsigned long);
        void (*is_dirty_writeback) (struct page *, bool *, bool *);
        int (*error_remove_page)(struct address_space *, struct page *);
@@@ -419,7 -390,6 +390,7 @@@ struct address_space 
        struct mutex            i_mmap_mutex;   /* protect tree, count, list */
        /* Protected by tree_lock together with the radix tree */
        unsigned long           nrpages;        /* number of total pages */
 +      unsigned long           nrshadows;      /* number of shadow entries */
        pgoff_t                 writeback_index;/* writeback starts here */
        const struct address_space_operations *a_ops;   /* methods */
        unsigned long           flags;          /* error bits/gfp mask */
@@@ -590,9 -560,6 +561,9 @@@ struct inode 
        atomic_t                i_count;
        atomic_t                i_dio_count;
        atomic_t                i_writecount;
 +#ifdef CONFIG_IMA
 +      atomic_t                i_readcount; /* struct files open RO */
 +#endif
        const struct file_operations    *i_fop; /* former ->i_op->default_file_ops */
        struct file_lock        *i_flock;
        struct address_space    i_data;
        struct hlist_head       i_fsnotify_marks;
  #endif
  
 -#ifdef CONFIG_IMA
 -      atomic_t                i_readcount; /* struct files open RO */
 -#endif
        void                    *i_private; /* fs or device private pointer */
  };
  
@@@ -770,9 -740,6 +741,6 @@@ static inline int ra_has_index(struct f
                index <  ra->start + ra->size);
  }
  
- #define FILE_MNT_WRITE_TAKEN  1
- #define FILE_MNT_WRITE_RELEASED       2
  struct file {
        union {
                struct llist_node       fu_llist;
        struct list_head        f_tfile_llink;
  #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
- #ifdef CONFIG_DEBUG_WRITECOUNT
-       unsigned long f_mnt_write_state;
- #endif
  } __attribute__((aligned(4)));        /* lest something weird decides that 2 is OK */
  
  struct file_handle {
@@@ -830,49 -794,6 +795,6 @@@ static inline struct file *get_file(str
  #define fput_atomic(x)        atomic_long_add_unless(&(x)->f_count, -1, 1)
  #define file_count(x) atomic_long_read(&(x)->f_count)
  
- #ifdef CONFIG_DEBUG_WRITECOUNT
- static inline void file_take_write(struct file *f)
- {
-       WARN_ON(f->f_mnt_write_state != 0);
-       f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
- }
- static inline void file_release_write(struct file *f)
- {
-       f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
- }
- static inline void file_reset_write(struct file *f)
- {
-       f->f_mnt_write_state = 0;
- }
- static inline void file_check_state(struct file *f)
- {
-       /*
-        * At this point, either both or neither of these bits
-        * should be set.
-        */
-       WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
-       WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
- }
- static inline int file_check_writeable(struct file *f)
- {
-       if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
-               return 0;
-       printk(KERN_WARNING "writeable file with no "
-                           "mnt_want_write()\n");
-       WARN_ON(1);
-       return -EINVAL;
- }
- #else /* !CONFIG_DEBUG_WRITECOUNT */
- static inline void file_take_write(struct file *filp) {}
- static inline void file_release_write(struct file *filp) {}
- static inline void file_reset_write(struct file *filp) {}
- static inline void file_check_state(struct file *filp) {}
- static inline int file_check_writeable(struct file *filp)
- {
-       return 0;
- }
- #endif /* CONFIG_DEBUG_WRITECOUNT */
  #define       MAX_NON_LFS     ((1UL<<31) - 1)
  
  /* Page cache limit. The filesystems should put that into their s_maxbytes 
  #define FL_SLEEP      128     /* A blocking lock */
  #define FL_DOWNGRADE_PENDING  256 /* Lease is being downgraded */
  #define FL_UNLOCK_PENDING     512 /* Lease is being broken */
 +#define FL_FILE_PVT   1024    /* lock is private to the file */
  
  /*
   * Special return value from posix_lock_file() and vfs_lock_file() for
@@@ -998,12 -918,12 +920,12 @@@ struct file_lock 
  extern void send_sigio(struct fown_struct *fown, int fd, int band);
  
  #ifdef CONFIG_FILE_LOCKING
 -extern int fcntl_getlk(struct file *, struct flock __user *);
 +extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
  extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
                        struct flock __user *);
  
  #if BITS_PER_LONG == 32
 -extern int fcntl_getlk64(struct file *, struct flock64 __user *);
 +extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *);
  extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
                        struct flock64 __user *);
  #endif
@@@ -1018,7 -938,7 +940,7 @@@ extern struct file_lock * locks_alloc_l
  extern void locks_copy_lock(struct file_lock *, struct file_lock *);
  extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
  extern void locks_remove_posix(struct file *, fl_owner_t);
 -extern void locks_remove_flock(struct file *);
 +extern void locks_remove_file(struct file *);
  extern void locks_release_private(struct file_lock *);
  extern void posix_test_lock(struct file *, struct file_lock *);
  extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
@@@ -1036,8 -956,7 +958,8 @@@ extern int lease_modify(struct file_loc
  extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
  extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
  #else /* !CONFIG_FILE_LOCKING */
 -static inline int fcntl_getlk(struct file *file, struct flock __user *user)
 +static inline int fcntl_getlk(struct file *file, unsigned int cmd,
 +                            struct flock __user *user)
  {
        return -EINVAL;
  }
@@@ -1049,8 -968,7 +971,8 @@@ static inline int fcntl_setlk(unsigned 
  }
  
  #if BITS_PER_LONG == 32
 -static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user)
 +static inline int fcntl_getlk64(struct file *file, unsigned int cmd,
 +                              struct flock64 __user *user)
  {
        return -EINVAL;
  }
@@@ -1091,7 -1009,7 +1013,7 @@@ static inline void locks_remove_posix(s
        return;
  }
  
 -static inline void locks_remove_flock(struct file *filp)
 +static inline void locks_remove_file(struct file *filp)
  {
        return;
  }
@@@ -1464,7 -1382,7 +1386,7 @@@ extern int vfs_symlink(struct inode *, 
  extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
  extern int vfs_rmdir(struct inode *, struct dentry *);
  extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **);
 +extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
  
  /*
   * VFS dentry helper functions.
@@@ -1575,8 -1493,6 +1497,8 @@@ struct inode_operations 
        int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
        int (*rename) (struct inode *, struct dentry *,
                        struct inode *, struct dentry *);
 +      int (*rename2) (struct inode *, struct dentry *,
 +                      struct inode *, struct dentry *, unsigned int);
        int (*setattr) (struct dentry *, struct iattr *);
        int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
        int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@@ -1919,11 -1835,6 +1841,11 @@@ extern int current_umask(void)
  extern void ihold(struct inode * inode);
  extern void iput(struct inode *);
  
 +static inline struct inode *file_inode(struct file *f)
 +{
 +      return f->f_inode;
 +}
 +
  /* /sys/fs */
  extern struct kobject *fs_kobj;
  
  #define FLOCK_VERIFY_WRITE 2
  
  #ifdef CONFIG_FILE_LOCKING
 -extern int locks_mandatory_locked(struct inode *);
 +extern int locks_mandatory_locked(struct file *);
  extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
  
  /*
@@@ -1956,10 -1867,10 +1878,10 @@@ static inline int mandatory_lock(struc
        return IS_MANDLOCK(ino) && __mandatory_lock(ino);
  }
  
 -static inline int locks_verify_locked(struct inode *inode)
 +static inline int locks_verify_locked(struct file *file)
  {
 -      if (mandatory_lock(inode))
 -              return locks_mandatory_locked(inode);
 +      if (mandatory_lock(file_inode(file)))
 +              return locks_mandatory_locked(file);
        return 0;
  }
  
@@@ -1979,12 -1890,6 +1901,12 @@@ static inline int locks_verify_truncate
  
  static inline int break_lease(struct inode *inode, unsigned int mode)
  {
 +      /*
 +       * Since this check is lockless, we must ensure that any refcounts
 +       * taken are done before checking inode->i_flock. Otherwise, we could
 +       * end up racing with tasks trying to set a new lease on this file.
 +       */
 +      smp_mb();
        if (inode->i_flock)
                return __break_lease(inode, mode, FL_LEASE);
        return 0;
@@@ -2020,7 -1925,7 +1942,7 @@@ static inline int break_deleg_wait(stru
  }
  
  #else /* !CONFIG_FILE_LOCKING */
 -static inline int locks_mandatory_locked(struct inode *inode)
 +static inline int locks_mandatory_locked(struct file *file)
  {
        return 0;
  }
@@@ -2042,7 -1947,7 +1964,7 @@@ static inline int mandatory_lock(struc
        return 0;
  }
  
 -static inline int locks_verify_locked(struct inode *inode)
 +static inline int locks_verify_locked(struct file *file)
  {
        return 0;
  }
@@@ -2316,6 -2221,11 +2238,6 @@@ static inline bool execute_ok(struct in
        return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
  }
  
 -static inline struct inode *file_inode(struct file *f)
 -{
 -      return f->f_inode;
 -}
 -
  static inline void file_start_write(struct file *file)
  {
        if (!S_ISREG(file_inode(file)->i_mode))
@@@ -2481,16 -2391,13 +2403,13 @@@ extern int generic_file_mmap(struct fil
  extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
  extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
                unsigned long size, pgoff_t pgoff);
- extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
  int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
  extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
- extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
-               loff_t *);
+ extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
  extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
  extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
-               unsigned long *, loff_t, loff_t *, size_t, size_t);
- extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
-               unsigned long, loff_t, loff_t *, size_t, ssize_t);
+               unsigned long *, loff_t, size_t, size_t);
+ extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
  extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
  extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
  extern int generic_segment_checks(const struct iovec *iov,
@@@ -2550,9 -2457,6 +2469,9 @@@ enum 
  
        /* filesystem does not support filling holes */
        DIO_SKIP_HOLES  = 0x02,
 +
 +      /* filesystem can handle aio writes beyond i_size */
 +      DIO_ASYNC_EXTEND = 0x04,
  };
  
  void dio_end_io(struct bio *bio, int error);
@@@ -2575,14 -2479,11 +2494,14 @@@ static inline ssize_t blockdev_direct_I
  void inode_dio_wait(struct inode *inode);
  void inode_dio_done(struct inode *inode);
  
 +extern void inode_set_flags(struct inode *inode, unsigned int flags,
 +                          unsigned int mask);
 +
  extern const struct file_operations generic_ro_fops;
  
  #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
  
- extern int vfs_readlink(struct dentry *, char __user *, int, const char *);
+ extern int readlink_copy(char __user *, int, const char *);
  extern int page_readlink(struct dentry *, char __user *, int);
  extern void *page_follow_link_light(struct dentry *, struct nameidata *);
  extern void page_put_link(struct dentry *, struct nameidata *, void *);
index 4d9389c79e61b4abe20666aa9f9c89d676a8c6b1,6dffcebe6105a5b372bcccb89bb248996908cf55..eb8b8ac6df3c844e2bd84903e0a50ff07f1575fe
@@@ -35,7 -35,7 +35,7 @@@ struct pipe_buffer 
   *    @tmp_page: cached released page
   *    @readers: number of current readers of this pipe
   *    @writers: number of current writers of this pipe
 - *    @files: number of struct file refering this pipe (protected by ->i_lock)
 + *    @files: number of struct file referring this pipe (protected by ->i_lock)
   *    @waiting_writers: number of writers blocked waiting for room
   *    @r_counter: reader counter
   *    @w_counter: writer counter
@@@ -82,23 -82,6 +82,6 @@@ struct pipe_buf_operations 
         */
        int can_merge;
  
-       /*
-        * ->map() returns a virtual address mapping of the pipe buffer.
-        * The last integer flag reflects whether this should be an atomic
-        * mapping or not. The atomic map is faster, however you can't take
-        * page faults before calling ->unmap() again. So if you need to eg
-        * access user data through copy_to/from_user(), then you must get
-        * a non-atomic map. ->map() uses the kmap_atomic slot for
-        * atomic maps, you have to be careful if mapping another page as
-        * source or destination for a copy.
-        */
-       void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
-       /*
-        * Undoes ->map(), finishes the virtual mapping of the pipe buffer.
-        */
-       void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
        /*
         * ->confirm() verifies that the data in the pipe buffer is there
         * and that the contents are good. If the pages in the pipe belong
@@@ -150,8 -133,6 +133,6 @@@ struct pipe_inode_info *alloc_pipe_info
  void free_pipe_info(struct pipe_inode_info *);
  
  /* Generic pipe buffer ops functions */
- void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
- void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
  void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
  int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
  int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
diff --combined kernel/relay.c
index 52d6a6f56261d26dabc458684a3529d15710e30b,7d38607649a3298f4a84efcd12208e43acd4b5fc..5a56d3c8dc03a799e5f53169c85c7f33f05727eb
@@@ -227,7 -227,7 +227,7 @@@ static void relay_destroy_buf(struct rc
   *    relay_remove_buf - remove a channel buffer
   *    @kref: target kernel reference that contains the relay buffer
   *
 - *    Removes the file from the fileystem, which also frees the
 + *    Removes the file from the filesystem, which also frees the
   *    rchan_buf_struct and the channel buffer.  Should only be called from
   *    kref_put().
   */
@@@ -1195,8 -1195,6 +1195,6 @@@ static void relay_pipe_buf_release(stru
  
  static const struct pipe_buf_operations relay_pipe_buf_ops = {
        .can_merge = 0,
-       .map = generic_pipe_buf_map,
-       .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = relay_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
@@@ -1253,7 -1251,7 +1251,7 @@@ static ssize_t subbuf_splice_actor(stru
        subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
        pidx = (read_start / PAGE_SIZE) % subbuf_pages;
        poff = read_start & ~PAGE_MASK;
-       nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers);
+       nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max);
  
        for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
                unsigned int this_len, this_end, private;
diff --combined kernel/trace/trace.c
index e3e665685ee5286ff47d3f570ef7418a9d56072b,27924caaa124397754854385c635d5c374006269..737b0efa1a624aae606a0c50406de5f652e51862
@@@ -73,8 -73,7 +73,8 @@@ static struct tracer_flags dummy_tracer
        .opts = dummy_tracer_opt
  };
  
 -static int dummy_set_flag(u32 old_flags, u32 bit, int set)
 +static int
 +dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
  {
        return 0;
  }
@@@ -119,7 -118,7 +119,7 @@@ enum ftrace_dump_mode ftrace_dump_on_oo
  /* When set, tracing will stop when a WARN*() is hit */
  int __disable_trace_on_warning;
  
 -static int tracing_set_tracer(const char *buf);
 +static int tracing_set_tracer(struct trace_array *tr, const char *buf);
  
  #define MAX_TRACER_SIZE               100
  static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
@@@ -181,17 -180,6 +181,17 @@@ static int __init set_trace_boot_option
  }
  __setup("trace_options=", set_trace_boot_options);
  
 +static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
 +static char *trace_boot_clock __initdata;
 +
 +static int __init set_trace_boot_clock(char *str)
 +{
 +      strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
 +      trace_boot_clock = trace_boot_clock_buf;
 +      return 0;
 +}
 +__setup("trace_clock=", set_trace_boot_clock);
 +
  
  unsigned long long ns2usecs(cycle_t nsec)
  {
@@@ -1242,7 -1230,7 +1242,7 @@@ int register_tracer(struct tracer *type
  
        printk(KERN_INFO "Starting tracer '%s'\n", type->name);
        /* Do we want this tracer to start on bootup? */
 -      tracing_set_tracer(type->name);
 +      tracing_set_tracer(&global_trace, type->name);
        default_bootup_tracer = NULL;
        /* disable other selftests, since this will break it. */
        tracing_selftest_disabled = true;
@@@ -3149,52 -3137,27 +3149,52 @@@ static int tracing_open(struct inode *i
        return ret;
  }
  
 +/*
 + * Some tracers are not suitable for instance buffers.
 + * A tracer is always available for the global array (toplevel)
 + * or if it explicitly states that it is.
 + */
 +static bool
 +trace_ok_for_array(struct tracer *t, struct trace_array *tr)
 +{
 +      return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
 +}
 +
 +/* Find the next tracer that this trace array may use */
 +static struct tracer *
 +get_tracer_for_array(struct trace_array *tr, struct tracer *t)
 +{
 +      while (t && !trace_ok_for_array(t, tr))
 +              t = t->next;
 +
 +      return t;
 +}
 +
  static void *
  t_next(struct seq_file *m, void *v, loff_t *pos)
  {
 +      struct trace_array *tr = m->private;
        struct tracer *t = v;
  
        (*pos)++;
  
        if (t)
 -              t = t->next;
 +              t = get_tracer_for_array(tr, t->next);
  
        return t;
  }
  
  static void *t_start(struct seq_file *m, loff_t *pos)
  {
 +      struct trace_array *tr = m->private;
        struct tracer *t;
        loff_t l = 0;
  
        mutex_lock(&trace_types_lock);
 -      for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
 -              ;
 +
 +      t = get_tracer_for_array(tr, trace_types);
 +      for (; t && l < *pos; t = t_next(m, t, &l))
 +                      ;
  
        return t;
  }
@@@ -3229,21 -3192,10 +3229,21 @@@ static const struct seq_operations show
  
  static int show_traces_open(struct inode *inode, struct file *file)
  {
 +      struct trace_array *tr = inode->i_private;
 +      struct seq_file *m;
 +      int ret;
 +
        if (tracing_disabled)
                return -ENODEV;
  
 -      return seq_open(file, &show_traces_seq_ops);
 +      ret = seq_open(file, &show_traces_seq_ops);
 +      if (ret)
 +              return ret;
 +
 +      m = file->private_data;
 +      m->private = tr;
 +
 +      return 0;
  }
  
  static ssize_t
@@@ -3403,14 -3355,13 +3403,14 @@@ static int tracing_trace_options_show(s
        return 0;
  }
  
 -static int __set_tracer_option(struct tracer *trace,
 +static int __set_tracer_option(struct trace_array *tr,
                               struct tracer_flags *tracer_flags,
                               struct tracer_opt *opts, int neg)
  {
 +      struct tracer *trace = tr->current_trace;
        int ret;
  
 -      ret = trace->set_flag(tracer_flags->val, opts->bit, !neg);
 +      ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
        if (ret)
                return ret;
  
  }
  
  /* Try to assign a tracer specific option */
 -static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
 +static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
  {
 +      struct tracer *trace = tr->current_trace;
        struct tracer_flags *tracer_flags = trace->flags;
        struct tracer_opt *opts = NULL;
        int i;
                opts = &tracer_flags->opts[i];
  
                if (strcmp(cmp, opts->name) == 0)
 -                      return __set_tracer_option(trace, trace->flags,
 -                                                 opts, neg);
 +                      return __set_tracer_option(tr, trace->flags, opts, neg);
        }
  
        return -EINVAL;
@@@ -3456,7 -3407,7 +3456,7 @@@ int set_tracer_flag(struct trace_array 
  
        /* Give the tracer a chance to approve the change */
        if (tr->current_trace->flag_changed)
 -              if (tr->current_trace->flag_changed(tr->current_trace, mask, !!enabled))
 +              if (tr->current_trace->flag_changed(tr, mask, !!enabled))
                        return -EINVAL;
  
        if (enabled)
@@@ -3505,7 -3456,7 +3505,7 @@@ static int trace_set_options(struct tra
  
        /* If no option could be set, test the specific tracer options */
        if (!trace_options[i])
 -              ret = set_tracer_option(tr->current_trace, cmp, neg);
 +              ret = set_tracer_option(tr, cmp, neg);
  
        mutex_unlock(&trace_types_lock);
  
@@@ -3611,8 -3562,6 +3611,8 @@@ static const char readme_msg[] 
  #ifdef CONFIG_TRACER_SNAPSHOT
        "\t\t      snapshot\n"
  #endif
 +      "\t\t      dump\n"
 +      "\t\t      cpudump\n"
        "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
        "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
        "\t     The first one will disable tracing every time do_fault is hit\n"
@@@ -3936,26 -3885,10 +3936,26 @@@ create_trace_option_files(struct trace_
  static void
  destroy_trace_option_files(struct trace_option_dentry *topts);
  
 -static int tracing_set_tracer(const char *buf)
 +/*
 + * Used to clear out the tracer before deletion of an instance.
 + * Must have trace_types_lock held.
 + */
 +static void tracing_set_nop(struct trace_array *tr)
 +{
 +      if (tr->current_trace == &nop_trace)
 +              return;
 +      
 +      tr->current_trace->enabled--;
 +
 +      if (tr->current_trace->reset)
 +              tr->current_trace->reset(tr);
 +
 +      tr->current_trace = &nop_trace;
 +}
 +
 +static int tracing_set_tracer(struct trace_array *tr, const char *buf)
  {
        static struct trace_option_dentry *topts;
 -      struct trace_array *tr = &global_trace;
        struct tracer *t;
  #ifdef CONFIG_TRACER_MAX_TRACE
        bool had_max_tr;
        if (t == tr->current_trace)
                goto out;
  
 +      /* Some tracers are only allowed for the top level buffer */
 +      if (!trace_ok_for_array(t, tr)) {
 +              ret = -EINVAL;
 +              goto out;
 +      }
 +
        trace_branch_disable();
  
 -      tr->current_trace->enabled = false;
 +      tr->current_trace->enabled--;
  
        if (tr->current_trace->reset)
                tr->current_trace->reset(tr);
                free_snapshot(tr);
        }
  #endif
 -      destroy_trace_option_files(topts);
 -
 -      topts = create_trace_option_files(tr, t);
 +      /* Currently, only the top instance has options */
 +      if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
 +              destroy_trace_option_files(topts);
 +              topts = create_trace_option_files(tr, t);
 +      }
  
  #ifdef CONFIG_TRACER_MAX_TRACE
        if (t->use_max_tr && !had_max_tr) {
        }
  
        tr->current_trace = t;
 -      tr->current_trace->enabled = true;
 +      tr->current_trace->enabled++;
        trace_branch_enable(tr);
   out:
        mutex_unlock(&trace_types_lock);
@@@ -4047,7 -3972,6 +4047,7 @@@ static ssize_
  tracing_set_trace_write(struct file *filp, const char __user *ubuf,
                        size_t cnt, loff_t *ppos)
  {
 +      struct trace_array *tr = filp->private_data;
        char buf[MAX_TRACER_SIZE+1];
        int i;
        size_t ret;
        for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
                buf[i] = 0;
  
 -      err = tracing_set_tracer(buf);
 +      err = tracing_set_tracer(tr, buf);
        if (err)
                return err;
  
@@@ -4392,8 -4316,6 +4392,6 @@@ static void tracing_spd_release_pipe(st
  
  static const struct pipe_buf_operations tracing_pipe_buf_ops = {
        .can_merge              = 0,
-       .map                    = generic_pipe_buf_map,
-       .unmap                  = generic_pipe_buf_unmap,
        .confirm                = generic_pipe_buf_confirm,
        .release                = generic_pipe_buf_release,
        .steal                  = generic_pipe_buf_steal,
@@@ -4488,7 -4410,7 +4486,7 @@@ static ssize_t tracing_splice_read_pipe
        trace_access_lock(iter->cpu_file);
  
        /* Fill as many pages as possible. */
-       for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
+       for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
                spd.pages[i] = alloc_page(GFP_KERNEL);
                if (!spd.pages[i])
                        break;
@@@ -4775,10 -4697,25 +4773,10 @@@ static int tracing_clock_show(struct se
        return 0;
  }
  
 -static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 -                                 size_t cnt, loff_t *fpos)
 +static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
  {
 -      struct seq_file *m = filp->private_data;
 -      struct trace_array *tr = m->private;
 -      char buf[64];
 -      const char *clockstr;
        int i;
  
 -      if (cnt >= sizeof(buf))
 -              return -EINVAL;
 -
 -      if (copy_from_user(&buf, ubuf, cnt))
 -              return -EFAULT;
 -
 -      buf[cnt] = 0;
 -
 -      clockstr = strstrip(buf);
 -
        for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
                if (strcmp(trace_clocks[i].name, clockstr) == 0)
                        break;
  
        mutex_unlock(&trace_types_lock);
  
 +      return 0;
 +}
 +
 +static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
 +                                 size_t cnt, loff_t *fpos)
 +{
 +      struct seq_file *m = filp->private_data;
 +      struct trace_array *tr = m->private;
 +      char buf[64];
 +      const char *clockstr;
 +      int ret;
 +
 +      if (cnt >= sizeof(buf))
 +              return -EINVAL;
 +
 +      if (copy_from_user(&buf, ubuf, cnt))
 +              return -EFAULT;
 +
 +      buf[cnt] = 0;
 +
 +      clockstr = strstrip(buf);
 +
 +      ret = tracing_set_clock(tr, clockstr);
 +      if (ret)
 +              return ret;
 +
        *fpos += cnt;
  
        return cnt;
@@@ -5281,8 -5192,6 +5279,6 @@@ static void buffer_pipe_buf_get(struct 
  /* Pipe buffer operations for a buffer. */
  static const struct pipe_buf_operations buffer_pipe_buf_ops = {
        .can_merge              = 0,
-       .map                    = generic_pipe_buf_map,
-       .unmap                  = generic_pipe_buf_unmap,
        .confirm                = generic_pipe_buf_confirm,
        .release                = buffer_pipe_buf_release,
        .steal                  = generic_pipe_buf_steal,
@@@ -5358,7 -5267,7 +5354,7 @@@ tracing_buffers_splice_read(struct fil
        trace_access_lock(iter->cpu_file);
        entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
  
-       for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
+       for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
                struct page *page;
                int r;
  
@@@ -5792,7 -5701,7 +5788,7 @@@ trace_options_write(struct file *filp, 
  
        if (!!(topt->flags->val & topt->opt->bit) != val) {
                mutex_lock(&trace_types_lock);
 -              ret = __set_tracer_option(topt->tr->current_trace, topt->flags,
 +              ret = __set_tracer_option(topt->tr, topt->flags,
                                          topt->opt, !val);
                mutex_unlock(&trace_types_lock);
                if (ret)
@@@ -6199,9 -6108,7 +6195,9 @@@ static int instance_delete(const char *
  
        list_del(&tr->list);
  
 +      tracing_set_nop(tr);
        event_trace_del_tracer(tr);
 +      ftrace_destroy_function_files(tr);
        debugfs_remove_recursive(tr->dir);
        free_percpu(tr->trace_buffer.data);
        ring_buffer_free(tr->trace_buffer.buffer);
@@@ -6296,12 -6203,6 +6292,12 @@@ init_tracer_debugfs(struct trace_array 
  {
        int cpu;
  
 +      trace_create_file("available_tracers", 0444, d_tracer,
 +                      tr, &show_traces_fops);
 +
 +      trace_create_file("current_tracer", 0644, d_tracer,
 +                      tr, &set_tracer_fops);
 +
        trace_create_file("tracing_cpumask", 0644, d_tracer,
                          tr, &tracing_cpumask_fops);
  
        trace_create_file("tracing_on", 0644, d_tracer,
                          tr, &rb_simple_fops);
  
 +      if (ftrace_create_function_files(tr, d_tracer))
 +              WARN(1, "Could not allocate function filter files");
 +
  #ifdef CONFIG_TRACER_SNAPSHOT
        trace_create_file("snapshot", 0644, d_tracer,
                          tr, &snapshot_fops);
@@@ -6357,6 -6255,12 +6353,6 @@@ static __init int tracer_init_debugfs(v
  
        init_tracer_debugfs(&global_trace, d_tracer);
  
 -      trace_create_file("available_tracers", 0444, d_tracer,
 -                      &global_trace, &show_traces_fops);
 -
 -      trace_create_file("current_tracer", 0644, d_tracer,
 -                      &global_trace, &set_tracer_fops);
 -
  #ifdef CONFIG_TRACER_MAX_TRACE
        trace_create_file("tracing_max_latency", 0644, d_tracer,
                        &tracing_max_latency, &tracing_max_lat_fops);
@@@ -6619,13 -6523,6 +6615,13 @@@ __init static int tracer_alloc_buffers(
  
        trace_init_cmdlines();
  
 +      if (trace_boot_clock) {
 +              ret = tracing_set_clock(&global_trace, trace_boot_clock);
 +              if (ret < 0)
 +                      pr_warning("Trace clock %s not defined, going back to default\n",
 +                                 trace_boot_clock);
 +      }
 +
        /*
         * register_tracer() might reference current_trace, so it
         * needs to be set before we register anything. This is
diff --combined lib/Kconfig.debug
index dd7f8858188a6ac92ac19bb7ae032d62786ee612,7a0859314bdf19ba9bb780579981e711d545186e..140b66a874c1d5e9a069eade2fa6aebc6d65013d
@@@ -980,21 -980,6 +980,21 @@@ config DEBUG_LOCKING_API_SELFTEST
          The following locking APIs are covered: spinlocks, rwlocks,
          mutexes and rwsems.
  
 +config LOCK_TORTURE_TEST
 +      tristate "torture tests for locking"
 +      depends on DEBUG_KERNEL
 +      select TORTURE_TEST
 +      default n
 +      help
 +        This option provides a kernel module that runs torture tests
 +        on kernel locking primitives.  The kernel module may be built
 +        after the fact on the running kernel to be tested, if desired.
 +
 +        Say Y here if you want kernel locking-primitive torture tests
 +        to be built into the kernel.
 +        Say M if you want these torture tests to build as a module.
 +        Say N if you are unsure.
 +
  endmenu # lock debugging
  
  config TRACE_IRQFLAGS
@@@ -1045,16 -1030,6 +1045,6 @@@ config DEBUG_BUGVERBOS
          of the BUG call as well as the EIP and oops trace.  This aids
          debugging but costs about 70-100K of memory.
  
- config DEBUG_WRITECOUNT
-       bool "Debug filesystem writers count"
-       depends on DEBUG_KERNEL
-       help
-         Enable this to catch wrong use of the writers count in struct
-         vfsmount.  This will increase the size of each file struct by
-         32 bits.
-         If unsure, say N.
  config DEBUG_LIST
        bool "Debug linked list manipulation"
        depends on DEBUG_KERNEL
@@@ -1156,14 -1131,9 +1146,14 @@@ config SPARSE_RCU_POINTE
  
         Say N if you are unsure.
  
 +config TORTURE_TEST
 +      tristate
 +      default n
 +
  config RCU_TORTURE_TEST
        tristate "torture tests for RCU"
        depends on DEBUG_KERNEL
 +      select TORTURE_TEST
        default n
        help
          This option provides a kernel module that runs torture tests
diff --combined mm/Makefile
index 9e5aaf92197d3fcc7038e86d9769dc0def5bf1c3,178a43406b0cc5cb787c80fb2143ed0c06a53446..b484452dac57ea5e531918837d658c9d5d159ea6
@@@ -16,8 -16,8 +16,9 @@@ obj-y                 := filemap.o mempool.o oom_kill
                           readahead.o swap.o truncate.o vmscan.o shmem.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
 -                         compaction.o balloon_compaction.o \
 -                         interval_tree.o list_lru.o iov_iter.o $(mmu-y)
 +                         compaction.o balloon_compaction.o vmacache.o \
-                          interval_tree.o list_lru.o workingset.o $(mmu-y)
++                         interval_tree.o list_lru.o workingset.o \
++                         iov_iter.o $(mmu-y)
  
  obj-y += init-mm.o
  
@@@ -61,4 -61,3 +62,4 @@@ obj-$(CONFIG_CLEANCACHE) += cleancache.
  obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
  obj-$(CONFIG_ZBUD)    += zbud.o
  obj-$(CONFIG_ZSMALLOC)        += zsmalloc.o
 +obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
diff --combined mm/filemap.c
index 27ebc0c9571bb8831ceb38c4e71e1bf57227f098,1a455142784db7a69f76287360ade1e276cf3c0c..a82fbe4c9e8e1c1d5a3eed5e2649ec87a7bfd16d
@@@ -33,7 -33,6 +33,7 @@@
  #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
  #include <linux/memcontrol.h>
  #include <linux/cleancache.h>
 +#include <linux/rmap.h>
  #include "internal.h"
  
  #define CREATE_TRACE_POINTS
@@@ -77,7 -76,7 +77,7 @@@
   *  ->mmap_sem
   *    ->lock_page             (access_process_vm)
   *
-  *  ->i_mutex                 (generic_file_buffered_write)
+  *  ->i_mutex                 (generic_perform_write)
   *    ->mmap_sem              (fault_in_pages_readable->do_page_fault)
   *
   *  bdi->wb.list_lock
   *   ->tasklist_lock            (memory_failure, collect_procs_ao)
   */
  
 +static void page_cache_tree_delete(struct address_space *mapping,
 +                                 struct page *page, void *shadow)
 +{
 +      struct radix_tree_node *node;
 +      unsigned long index;
 +      unsigned int offset;
 +      unsigned int tag;
 +      void **slot;
 +
 +      VM_BUG_ON(!PageLocked(page));
 +
 +      __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
 +
 +      if (shadow) {
 +              mapping->nrshadows++;
 +              /*
 +               * Make sure the nrshadows update is committed before
 +               * the nrpages update so that final truncate racing
 +               * with reclaim does not see both counters 0 at the
 +               * same time and miss a shadow entry.
 +               */
 +              smp_wmb();
 +      }
 +      mapping->nrpages--;
 +
 +      if (!node) {
 +              /* Clear direct pointer tags in root node */
 +              mapping->page_tree.gfp_mask &= __GFP_BITS_MASK;
 +              radix_tree_replace_slot(slot, shadow);
 +              return;
 +      }
 +
 +      /* Clear tree tags for the removed page */
 +      index = page->index;
 +      offset = index & RADIX_TREE_MAP_MASK;
 +      for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
 +              if (test_bit(offset, node->tags[tag]))
 +                      radix_tree_tag_clear(&mapping->page_tree, index, tag);
 +      }
 +
 +      /* Delete page, swap shadow entry */
 +      radix_tree_replace_slot(slot, shadow);
 +      workingset_node_pages_dec(node);
 +      if (shadow)
 +              workingset_node_shadows_inc(node);
 +      else
 +              if (__radix_tree_delete_node(&mapping->page_tree, node))
 +                      return;
 +
 +      /*
 +       * Track node that only contains shadow entries.
 +       *
 +       * Avoid acquiring the list_lru lock if already tracked.  The
 +       * list_empty() test is safe as node->private_list is
 +       * protected by mapping->tree_lock.
 +       */
 +      if (!workingset_node_pages(node) &&
 +          list_empty(&node->private_list)) {
 +              node->private_data = mapping;
 +              list_lru_add(&workingset_shadow_nodes, &node->private_list);
 +      }
 +}
 +
  /*
   * Delete a page from the page cache and free it. Caller has to make
   * sure the page is locked and that nobody else uses it - or that usage
   * is safe.  The caller must hold the mapping's tree_lock.
   */
 -void __delete_from_page_cache(struct page *page)
 +void __delete_from_page_cache(struct page *page, void *shadow)
  {
        struct address_space *mapping = page->mapping;
  
        else
                cleancache_invalidate_page(mapping, page);
  
 -      radix_tree_delete(&mapping->page_tree, page->index);
 +      page_cache_tree_delete(mapping, page, shadow);
 +
        page->mapping = NULL;
        /* Leave page->index set: truncation lookup relies upon it */
 -      mapping->nrpages--;
 +
        __dec_zone_page_state(page, NR_FILE_PAGES);
        if (PageSwapBacked(page))
                __dec_zone_page_state(page, NR_SHMEM);
@@@ -231,7 -166,7 +231,7 @@@ void delete_from_page_cache(struct pag
  
        freepage = mapping->a_ops->freepage;
        spin_lock_irq(&mapping->tree_lock);
 -      __delete_from_page_cache(page);
 +      __delete_from_page_cache(page, NULL);
        spin_unlock_irq(&mapping->tree_lock);
        mem_cgroup_uncharge_cache_page(page);
  
@@@ -491,7 -426,7 +491,7 @@@ int replace_page_cache_page(struct pag
                new->index = offset;
  
                spin_lock_irq(&mapping->tree_lock);
 -              __delete_from_page_cache(old);
 +              __delete_from_page_cache(old, NULL);
                error = radix_tree_insert(&mapping->page_tree, offset, new);
                BUG_ON(error);
                mapping->nrpages++;
  }
  EXPORT_SYMBOL_GPL(replace_page_cache_page);
  
 -/**
 - * add_to_page_cache_locked - add a locked page to the pagecache
 - * @page:     page to add
 - * @mapping:  the page's address_space
 - * @offset:   page index
 - * @gfp_mask: page allocation mode
 - *
 - * This function is used to add a page to the pagecache. It must be locked.
 - * This function does not add the page to the LRU.  The caller must do that.
 - */
 -int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 -              pgoff_t offset, gfp_t gfp_mask)
 +static int page_cache_tree_insert(struct address_space *mapping,
 +                                struct page *page, void **shadowp)
 +{
 +      struct radix_tree_node *node;
 +      void **slot;
 +      int error;
 +
 +      error = __radix_tree_create(&mapping->page_tree, page->index,
 +                                  &node, &slot);
 +      if (error)
 +              return error;
 +      if (*slot) {
 +              void *p;
 +
 +              p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
 +              if (!radix_tree_exceptional_entry(p))
 +                      return -EEXIST;
 +              if (shadowp)
 +                      *shadowp = p;
 +              mapping->nrshadows--;
 +              if (node)
 +                      workingset_node_shadows_dec(node);
 +      }
 +      radix_tree_replace_slot(slot, page);
 +      mapping->nrpages++;
 +      if (node) {
 +              workingset_node_pages_inc(node);
 +              /*
 +               * Don't track node that contains actual pages.
 +               *
 +               * Avoid acquiring the list_lru lock if already
 +               * untracked.  The list_empty() test is safe as
 +               * node->private_list is protected by
 +               * mapping->tree_lock.
 +               */
 +              if (!list_empty(&node->private_list))
 +                      list_lru_del(&workingset_shadow_nodes,
 +                                   &node->private_list);
 +      }
 +      return 0;
 +}
 +
 +static int __add_to_page_cache_locked(struct page *page,
 +                                    struct address_space *mapping,
 +                                    pgoff_t offset, gfp_t gfp_mask,
 +                                    void **shadowp)
  {
        int error;
  
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageSwapBacked(page), page);
  
 -      error = mem_cgroup_cache_charge(page, current->mm,
 +      error = mem_cgroup_charge_file(page, current->mm,
                                        gfp_mask & GFP_RECLAIM_MASK);
        if (error)
                return error;
        page->index = offset;
  
        spin_lock_irq(&mapping->tree_lock);
 -      error = radix_tree_insert(&mapping->page_tree, offset, page);
 +      error = page_cache_tree_insert(mapping, page, shadowp);
        radix_tree_preload_end();
        if (unlikely(error))
                goto err_insert;
 -      mapping->nrpages++;
        __inc_zone_page_state(page, NR_FILE_PAGES);
        spin_unlock_irq(&mapping->tree_lock);
        trace_mm_filemap_add_to_page_cache(page);
@@@ -595,49 -497,16 +595,49 @@@ err_insert
        page_cache_release(page);
        return error;
  }
 +
 +/**
 + * add_to_page_cache_locked - add a locked page to the pagecache
 + * @page:     page to add
 + * @mapping:  the page's address_space
 + * @offset:   page index
 + * @gfp_mask: page allocation mode
 + *
 + * This function is used to add a page to the pagecache. It must be locked.
 + * This function does not add the page to the LRU.  The caller must do that.
 + */
 +int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 +              pgoff_t offset, gfp_t gfp_mask)
 +{
 +      return __add_to_page_cache_locked(page, mapping, offset,
 +                                        gfp_mask, NULL);
 +}
  EXPORT_SYMBOL(add_to_page_cache_locked);
  
  int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                                pgoff_t offset, gfp_t gfp_mask)
  {
 +      void *shadow = NULL;
        int ret;
  
 -      ret = add_to_page_cache(page, mapping, offset, gfp_mask);
 -      if (ret == 0)
 -              lru_cache_add_file(page);
 +      __set_page_locked(page);
 +      ret = __add_to_page_cache_locked(page, mapping, offset,
 +                                       gfp_mask, &shadow);
 +      if (unlikely(ret))
 +              __clear_page_locked(page);
 +      else {
 +              /*
 +               * The page might have been evicted from cache only
 +               * recently, in which case it should be activated like
 +               * any other repeatedly accessed page.
 +               */
 +              if (shadow && workingset_refault(shadow)) {
 +                      SetPageActive(page);
 +                      workingset_activation(page);
 +              } else
 +                      ClearPageActive(page);
 +              lru_cache_add(page);
 +      }
        return ret;
  }
  EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
@@@ -651,10 -520,10 +651,10 @@@ struct page *__page_cache_alloc(gfp_t g
        if (cpuset_do_page_mem_spread()) {
                unsigned int cpuset_mems_cookie;
                do {
 -                      cpuset_mems_cookie = get_mems_allowed();
 +                      cpuset_mems_cookie = read_mems_allowed_begin();
                        n = cpuset_mem_spread_node();
                        page = alloc_pages_exact_node(n, gfp, 0);
 -              } while (!put_mems_allowed(cpuset_mems_cookie) && !page);
 +              } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
  
                return page;
        }
@@@ -817,101 -686,14 +817,101 @@@ int __lock_page_or_retry(struct page *p
  }
  
  /**
 - * find_get_page - find and get a page reference
 + * page_cache_next_hole - find the next hole (not-present entry)
 + * @mapping: mapping
 + * @index: index
 + * @max_scan: maximum range to search
 + *
 + * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the
 + * lowest indexed hole.
 + *
 + * Returns: the index of the hole if found, otherwise returns an index
 + * outside of the set specified (in which case 'return - index >=
 + * max_scan' will be true). In rare cases of index wrap-around, 0 will
 + * be returned.
 + *
 + * page_cache_next_hole may be called under rcu_read_lock. However,
 + * like radix_tree_gang_lookup, this will not atomically search a
 + * snapshot of the tree at a single point in time. For example, if a
 + * hole is created at index 5, then subsequently a hole is created at
 + * index 10, page_cache_next_hole covering both indexes may return 10
 + * if called under rcu_read_lock.
 + */
 +pgoff_t page_cache_next_hole(struct address_space *mapping,
 +                           pgoff_t index, unsigned long max_scan)
 +{
 +      unsigned long i;
 +
 +      for (i = 0; i < max_scan; i++) {
 +              struct page *page;
 +
 +              page = radix_tree_lookup(&mapping->page_tree, index);
 +              if (!page || radix_tree_exceptional_entry(page))
 +                      break;
 +              index++;
 +              if (index == 0)
 +                      break;
 +      }
 +
 +      return index;
 +}
 +EXPORT_SYMBOL(page_cache_next_hole);
 +
 +/**
 + * page_cache_prev_hole - find the prev hole (not-present entry)
 + * @mapping: mapping
 + * @index: index
 + * @max_scan: maximum range to search
 + *
 + * Search backwards in the range [max(index-max_scan+1, 0), index] for
 + * the first hole.
 + *
 + * Returns: the index of the hole if found, otherwise returns an index
 + * outside of the set specified (in which case 'index - return >=
 + * max_scan' will be true). In rare cases of wrap-around, ULONG_MAX
 + * will be returned.
 + *
 + * page_cache_prev_hole may be called under rcu_read_lock. However,
 + * like radix_tree_gang_lookup, this will not atomically search a
 + * snapshot of the tree at a single point in time. For example, if a
 + * hole is created at index 10, then subsequently a hole is created at
 + * index 5, page_cache_prev_hole covering both indexes may return 5 if
 + * called under rcu_read_lock.
 + */
 +pgoff_t page_cache_prev_hole(struct address_space *mapping,
 +                           pgoff_t index, unsigned long max_scan)
 +{
 +      unsigned long i;
 +
 +      for (i = 0; i < max_scan; i++) {
 +              struct page *page;
 +
 +              page = radix_tree_lookup(&mapping->page_tree, index);
 +              if (!page || radix_tree_exceptional_entry(page))
 +                      break;
 +              index--;
 +              if (index == ULONG_MAX)
 +                      break;
 +      }
 +
 +      return index;
 +}
 +EXPORT_SYMBOL(page_cache_prev_hole);
 +
 +/**
 + * find_get_entry - find and get a page cache entry
   * @mapping: the address_space to search
 - * @offset: the page index
 + * @offset: the page cache index
 + *
 + * Looks up the page cache slot at @mapping & @offset.  If there is a
 + * page cache page, it is returned with an increased refcount.
 + *
 + * If the slot holds a shadow entry of a previously evicted page, it
 + * is returned.
   *
 - * Is there a pagecache struct page at the given (mapping, offset) tuple?
 - * If yes, increment its refcount and return it; if no, return NULL.
 + * Otherwise, %NULL is returned.
   */
 -struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
 +struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
  {
        void **pagep;
        struct page *page;
@@@ -952,50 -734,24 +952,50 @@@ out
  
        return page;
  }
 -EXPORT_SYMBOL(find_get_page);
 +EXPORT_SYMBOL(find_get_entry);
  
  /**
 - * find_lock_page - locate, pin and lock a pagecache page
 + * find_get_page - find and get a page reference
   * @mapping: the address_space to search
   * @offset: the page index
   *
 - * Locates the desired pagecache page, locks it, increments its reference
 - * count and returns its address.
 + * Looks up the page cache slot at @mapping & @offset.  If there is a
 + * page cache page, it is returned with an increased refcount.
   *
 - * Returns zero if the page was not present. find_lock_page() may sleep.
 + * Otherwise, %NULL is returned.
   */
 -struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
 +struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
 +{
 +      struct page *page = find_get_entry(mapping, offset);
 +
 +      if (radix_tree_exceptional_entry(page))
 +              page = NULL;
 +      return page;
 +}
 +EXPORT_SYMBOL(find_get_page);
 +
 +/**
 + * find_lock_entry - locate, pin and lock a page cache entry
 + * @mapping: the address_space to search
 + * @offset: the page cache index
 + *
 + * Looks up the page cache slot at @mapping & @offset.  If there is a
 + * page cache page, it is returned locked and with an increased
 + * refcount.
 + *
 + * If the slot holds a shadow entry of a previously evicted page, it
 + * is returned.
 + *
 + * Otherwise, %NULL is returned.
 + *
 + * find_lock_entry() may sleep.
 + */
 +struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset)
  {
        struct page *page;
  
  repeat:
 -      page = find_get_page(mapping, offset);
 +      page = find_get_entry(mapping, offset);
        if (page && !radix_tree_exception(page)) {
                lock_page(page);
                /* Has the page been truncated? */
        }
        return page;
  }
 +EXPORT_SYMBOL(find_lock_entry);
 +
 +/**
 + * find_lock_page - locate, pin and lock a pagecache page
 + * @mapping: the address_space to search
 + * @offset: the page index
 + *
 + * Looks up the page cache slot at @mapping & @offset.  If there is a
 + * page cache page, it is returned locked and with an increased
 + * refcount.
 + *
 + * Otherwise, %NULL is returned.
 + *
 + * find_lock_page() may sleep.
 + */
 +struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
 +{
 +      struct page *page = find_lock_entry(mapping, offset);
 +
 +      if (radix_tree_exceptional_entry(page))
 +              page = NULL;
 +      return page;
 +}
  EXPORT_SYMBOL(find_lock_page);
  
  /**
   * @index: the page's index into the mapping
   * @gfp_mask: page allocation mode
   *
 - * Locates a page in the pagecache.  If the page is not present, a new page
 - * is allocated using @gfp_mask and is added to the pagecache and to the VM's
 - * LRU list.  The returned page is locked and has its reference count
 - * incremented.
 + * Looks up the page cache slot at @mapping & @offset.  If there is a
 + * page cache page, it is returned locked and with an increased
 + * refcount.
   *
 - * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic
 - * allocation!
 + * If the page is not present, a new page is allocated using @gfp_mask
 + * and added to the page cache and the VM's LRU list.  The page is
 + * returned locked and with an increased refcount.
   *
 - * find_or_create_page() returns the desired page's address, or zero on
 - * memory exhaustion.
 + * On memory exhaustion, %NULL is returned.
 + *
 + * find_or_create_page() may sleep, even if @gfp_flags specifies an
 + * atomic allocation!
   */
  struct page *find_or_create_page(struct address_space *mapping,
                pgoff_t index, gfp_t gfp_mask)
@@@ -1082,76 -813,6 +1082,76 @@@ repeat
  }
  EXPORT_SYMBOL(find_or_create_page);
  
 +/**
 + * find_get_entries - gang pagecache lookup
 + * @mapping:  The address_space to search
 + * @start:    The starting page cache index
 + * @nr_entries:       The maximum number of entries
 + * @entries:  Where the resulting entries are placed
 + * @indices:  The cache indices corresponding to the entries in @entries
 + *
 + * find_get_entries() will search for and return a group of up to
 + * @nr_entries entries in the mapping.  The entries are placed at
 + * @entries.  find_get_entries() takes a reference against any actual
 + * pages it returns.
 + *
 + * The search returns a group of mapping-contiguous page cache entries
 + * with ascending indexes.  There may be holes in the indices due to
 + * not-present pages.
 + *
 + * Any shadow entries of evicted pages are included in the returned
 + * array.
 + *
 + * find_get_entries() returns the number of pages and shadow entries
 + * which were found.
 + */
 +unsigned find_get_entries(struct address_space *mapping,
 +                        pgoff_t start, unsigned int nr_entries,
 +                        struct page **entries, pgoff_t *indices)
 +{
 +      void **slot;
 +      unsigned int ret = 0;
 +      struct radix_tree_iter iter;
 +
 +      if (!nr_entries)
 +              return 0;
 +
 +      rcu_read_lock();
 +restart:
 +      radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
 +              struct page *page;
 +repeat:
 +              page = radix_tree_deref_slot(slot);
 +              if (unlikely(!page))
 +                      continue;
 +              if (radix_tree_exception(page)) {
 +                      if (radix_tree_deref_retry(page))
 +                              goto restart;
 +                      /*
 +                       * Otherwise, we must be storing a swap entry
 +                       * here as an exceptional entry: so return it
 +                       * without attempting to raise page count.
 +                       */
 +                      goto export;
 +              }
 +              if (!page_cache_get_speculative(page))
 +                      goto repeat;
 +
 +              /* Has the page moved? */
 +              if (unlikely(page != *slot)) {
 +                      page_cache_release(page);
 +                      goto repeat;
 +              }
 +export:
 +              indices[ret] = iter.index;
 +              entries[ret] = page;
 +              if (++ret == nr_entries)
 +                      break;
 +      }
 +      rcu_read_unlock();
 +      return ret;
 +}
 +
  /**
   * find_get_pages - gang pagecache lookup
   * @mapping:  The address_space to search
@@@ -1428,7 -1089,8 +1428,8 @@@ static void shrink_readahead_size_eio(s
   * do_generic_file_read - generic file read routine
   * @filp:     the file to read
   * @ppos:     current file position
-  * @desc:     read_descriptor
+  * @iter:     data destination
+  * @written:  already copied
   *
   * This is a generic file read routine, and uses the
   * mapping->a_ops->readpage() function for the actual low-level stuff.
   * This is really ugly. But the goto's actually try to clarify some
   * of the logic when it comes to error handling etc.
   */
- static void do_generic_file_read(struct file *filp, loff_t *ppos,
-               read_descriptor_t *desc)
+ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
+               struct iov_iter *iter, ssize_t written)
  {
        struct address_space *mapping = filp->f_mapping;
        struct inode *inode = mapping->host;
        pgoff_t prev_index;
        unsigned long offset;      /* offset into pagecache page */
        unsigned int prev_offset;
-       int error;
+       int error = 0;
  
        index = *ppos >> PAGE_CACHE_SHIFT;
        prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
        prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
-       last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
+       last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
        offset = *ppos & ~PAGE_CACHE_MASK;
  
        for (;;) {
@@@ -1487,7 -1149,7 +1488,7 @@@ find_page
                        if (!page->mapping)
                                goto page_not_up_to_date_locked;
                        if (!mapping->a_ops->is_partially_uptodate(page,
-                                                               desc, offset))
+                                                       offset, iter->count))
                                goto page_not_up_to_date_locked;
                        unlock_page(page);
                }
@@@ -1537,24 -1199,23 +1538,23 @@@ page_ok
                /*
                 * Ok, we have the page, and it's up-to-date, so
                 * now we can copy it to user space...
-                *
-                * The file_read_actor routine returns how many bytes were
-                * actually used..
-                * NOTE! This may not be the same as how much of a user buffer
-                * we filled up (we may be padding etc), so we can only update
-                * "pos" here (the actor routine has to update the user buffer
-                * pointers and the remaining count).
                 */
-               ret = file_read_actor(desc, page, offset, nr);
+               ret = copy_page_to_iter(page, offset, nr, iter);
                offset += ret;
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
                prev_offset = offset;
  
                page_cache_release(page);
-               if (ret == nr && desc->count)
-                       continue;
-               goto out;
+               written += ret;
+               if (!iov_iter_count(iter))
+                       goto out;
+               if (ret < nr) {
+                       error = -EFAULT;
+                       goto out;
+               }
+               continue;
  
  page_not_up_to_date:
                /* Get exclusive access to the page ... */
@@@ -1589,6 -1250,7 +1589,7 @@@ readpage
                if (unlikely(error)) {
                        if (error == AOP_TRUNCATED_PAGE) {
                                page_cache_release(page);
+                               error = 0;
                                goto find_page;
                        }
                        goto readpage_error;
  
  readpage_error:
                /* UHHUH! A synchronous read error occurred. Report it */
-               desc->error = error;
                page_cache_release(page);
                goto out;
  
@@@ -1630,16 -1291,17 +1630,17 @@@ no_cached_page
                 */
                page = page_cache_alloc_cold(mapping);
                if (!page) {
-                       desc->error = -ENOMEM;
+                       error = -ENOMEM;
                        goto out;
                }
                error = add_to_page_cache_lru(page, mapping,
                                                index, GFP_KERNEL);
                if (error) {
                        page_cache_release(page);
-                       if (error == -EEXIST)
+                       if (error == -EEXIST) {
+                               error = 0;
                                goto find_page;
-                       desc->error = error;
+                       }
                        goto out;
                }
                goto readpage;
@@@ -1652,44 -1314,7 +1653,7 @@@ out
  
        *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
        file_accessed(filp);
- }
- int file_read_actor(read_descriptor_t *desc, struct page *page,
-                       unsigned long offset, unsigned long size)
- {
-       char *kaddr;
-       unsigned long left, count = desc->count;
-       if (size > count)
-               size = count;
-       /*
-        * Faults on the destination of a read are common, so do it before
-        * taking the kmap.
-        */
-       if (!fault_in_pages_writeable(desc->arg.buf, size)) {
-               kaddr = kmap_atomic(page);
-               left = __copy_to_user_inatomic(desc->arg.buf,
-                                               kaddr + offset, size);
-               kunmap_atomic(kaddr);
-               if (left == 0)
-                       goto success;
-       }
-       /* Do it the slow way */
-       kaddr = kmap(page);
-       left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
-       kunmap(page);
-       if (left) {
-               size -= left;
-               desc->error = -EFAULT;
-       }
- success:
-       desc->count = count - size;
-       desc->written += size;
-       desc->arg.buf += size;
-       return size;
+       return written ? written : error;
  }
  
  /*
@@@ -1747,14 -1372,15 +1711,15 @@@ generic_file_aio_read(struct kiocb *ioc
  {
        struct file *filp = iocb->ki_filp;
        ssize_t retval;
-       unsigned long seg = 0;
        size_t count;
        loff_t *ppos = &iocb->ki_pos;
+       struct iov_iter i;
  
        count = 0;
        retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
        if (retval)
                return retval;
+       iov_iter_init(&i, iov, nr_segs, count, 0);
  
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (filp->f_flags & O_DIRECT) {
                if (retval > 0) {
                        *ppos = pos + retval;
                        count -= retval;
+                       /*
+                        * If we did a short DIO read we need to skip the
+                        * section of the iov that we've already read data into.
+                        */
+                       iov_iter_advance(&i, retval);
                }
  
                /*
                }
        }
  
-       count = retval;
-       for (seg = 0; seg < nr_segs; seg++) {
-               read_descriptor_t desc;
-               loff_t offset = 0;
-               /*
-                * If we did a short DIO read we need to skip the section of the
-                * iov that we've already read data into.
-                */
-               if (count) {
-                       if (count > iov[seg].iov_len) {
-                               count -= iov[seg].iov_len;
-                               continue;
-                       }
-                       offset = count;
-                       count = 0;
-               }
-               desc.written = 0;
-               desc.arg.buf = iov[seg].iov_base + offset;
-               desc.count = iov[seg].iov_len - offset;
-               if (desc.count == 0)
-                       continue;
-               desc.error = 0;
-               do_generic_file_read(filp, ppos, &desc);
-               retval += desc.written;
-               if (desc.error) {
-                       retval = retval ?: desc.error;
-                       break;
-               }
-               if (desc.count > 0)
-                       break;
-       }
+       retval = do_generic_file_read(filp, ppos, &i, retval);
  out:
        return retval;
  }
@@@ -1953,11 -1552,11 +1891,11 @@@ int filemap_fault(struct vm_area_struc
        struct inode *inode = mapping->host;
        pgoff_t offset = vmf->pgoff;
        struct page *page;
 -      pgoff_t size;
 +      loff_t size;
        int ret = 0;
  
 -      size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 -      if (offset >= size)
 +      size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
 +      if (offset >= size >> PAGE_CACHE_SHIFT)
                return VM_FAULT_SIGBUS;
  
        /*
@@@ -2006,8 -1605,8 +1944,8 @@@ retry_find
         * Found the page and have a reference on it.
         * We must recheck i_size under page lock.
         */
 -      size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 -      if (unlikely(offset >= size)) {
 +      size = round_up(i_size_read(inode), PAGE_CACHE_SIZE);
 +      if (unlikely(offset >= size >> PAGE_CACHE_SHIFT)) {
                unlock_page(page);
                page_cache_release(page);
                return VM_FAULT_SIGBUS;
@@@ -2065,78 -1664,6 +2003,78 @@@ page_not_uptodate
  }
  EXPORT_SYMBOL(filemap_fault);
  
 +void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
 +{
 +      struct radix_tree_iter iter;
 +      void **slot;
 +      struct file *file = vma->vm_file;
 +      struct address_space *mapping = file->f_mapping;
 +      loff_t size;
 +      struct page *page;
 +      unsigned long address = (unsigned long) vmf->virtual_address;
 +      unsigned long addr;
 +      pte_t *pte;
 +
 +      rcu_read_lock();
 +      radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, vmf->pgoff) {
 +              if (iter.index > vmf->max_pgoff)
 +                      break;
 +repeat:
 +              page = radix_tree_deref_slot(slot);
 +              if (unlikely(!page))
 +                      goto next;
 +              if (radix_tree_exception(page)) {
 +                      if (radix_tree_deref_retry(page))
 +                              break;
 +                      else
 +                              goto next;
 +              }
 +
 +              if (!page_cache_get_speculative(page))
 +                      goto repeat;
 +
 +              /* Has the page moved? */
 +              if (unlikely(page != *slot)) {
 +                      page_cache_release(page);
 +                      goto repeat;
 +              }
 +
 +              if (!PageUptodate(page) ||
 +                              PageReadahead(page) ||
 +                              PageHWPoison(page))
 +                      goto skip;
 +              if (!trylock_page(page))
 +                      goto skip;
 +
 +              if (page->mapping != mapping || !PageUptodate(page))
 +                      goto unlock;
 +
 +              size = round_up(i_size_read(mapping->host), PAGE_CACHE_SIZE);
 +              if (page->index >= size >> PAGE_CACHE_SHIFT)
 +                      goto unlock;
 +
 +              pte = vmf->pte + page->index - vmf->pgoff;
 +              if (!pte_none(*pte))
 +                      goto unlock;
 +
 +              if (file->f_ra.mmap_miss > 0)
 +                      file->f_ra.mmap_miss--;
 +              addr = address + (page->index - vmf->pgoff) * PAGE_SIZE;
 +              do_set_pte(vma, addr, page, pte, false, false);
 +              unlock_page(page);
 +              goto next;
 +unlock:
 +              unlock_page(page);
 +skip:
 +              page_cache_release(page);
 +next:
 +              if (iter.index == vmf->max_pgoff)
 +                      break;
 +      }
 +      rcu_read_unlock();
 +}
 +EXPORT_SYMBOL(filemap_map_pages);
 +
  int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
  {
        struct page *page = vmf->page;
@@@ -2166,7 -1693,6 +2104,7 @@@ EXPORT_SYMBOL(filemap_page_mkwrite)
  
  const struct vm_operations_struct generic_file_vm_ops = {
        .fault          = filemap_fault,
 +      .map_pages      = filemap_map_pages,
        .page_mkwrite   = filemap_page_mkwrite,
        .remap_pages    = generic_file_remap_pages,
  };
@@@ -2207,18 -1733,6 +2145,18 @@@ int generic_file_readonly_mmap(struct f
  EXPORT_SYMBOL(generic_file_mmap);
  EXPORT_SYMBOL(generic_file_readonly_mmap);
  
 +static struct page *wait_on_page_read(struct page *page)
 +{
 +      if (!IS_ERR(page)) {
 +              wait_on_page_locked(page);
 +              if (!PageUptodate(page)) {
 +                      page_cache_release(page);
 +                      page = ERR_PTR(-EIO);
 +              }
 +      }
 +      return page;
 +}
 +
  static struct page *__read_cache_page(struct address_space *mapping,
                                pgoff_t index,
                                int (*filler)(void *, struct page *),
@@@ -2245,8 -1759,6 +2183,8 @@@ repeat
                if (err < 0) {
                        page_cache_release(page);
                        page = ERR_PTR(err);
 +              } else {
 +                      page = wait_on_page_read(page);
                }
        }
        return page;
@@@ -2283,10 -1795,6 +2221,10 @@@ retry
        if (err < 0) {
                page_cache_release(page);
                return ERR_PTR(err);
 +      } else {
 +              page = wait_on_page_read(page);
 +              if (IS_ERR(page))
 +                      return page;
        }
  out:
        mark_page_accessed(page);
  }
  
  /**
 - * read_cache_page_async - read into page cache, fill it if needed
 + * read_cache_page - read into page cache, fill it if needed
   * @mapping:  the page's address_space
   * @index:    the page index
   * @filler:   function to perform the read
   * @data:     first arg to filler(data, page) function, often left as NULL
   *
 - * Same as read_cache_page, but don't wait for page to become unlocked
 - * after submitting it to the filler.
 - *
   * Read into the page cache. If a page already exists, and PageUptodate() is
 - * not set, try to fill the page but don't wait for it to become unlocked.
 + * not set, try to fill the page and wait for it to become unlocked.
   *
   * If the page does not get brought uptodate, return -EIO.
   */
 -struct page *read_cache_page_async(struct address_space *mapping,
 +struct page *read_cache_page(struct address_space *mapping,
                                pgoff_t index,
                                int (*filler)(void *, struct page *),
                                void *data)
  {
        return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping));
  }
 -EXPORT_SYMBOL(read_cache_page_async);
 -
 -static struct page *wait_on_page_read(struct page *page)
 -{
 -      if (!IS_ERR(page)) {
 -              wait_on_page_locked(page);
 -              if (!PageUptodate(page)) {
 -                      page_cache_release(page);
 -                      page = ERR_PTR(-EIO);
 -              }
 -      }
 -      return page;
 -}
 +EXPORT_SYMBOL(read_cache_page);
  
  /**
   * read_cache_page_gfp - read into page cache, using specified page allocation flags.
@@@ -2331,154 -1854,31 +2269,10 @@@ struct page *read_cache_page_gfp(struc
  {
        filler_t *filler = (filler_t *)mapping->a_ops->readpage;
  
 -      return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp));
 +      return do_read_cache_page(mapping, index, filler, NULL, gfp);
  }
  EXPORT_SYMBOL(read_cache_page_gfp);
  
- static size_t __iovec_copy_from_user_inatomic(char *vaddr,
-                       const struct iovec *iov, size_t base, size_t bytes)
- {
-       size_t copied = 0, left = 0;
-       while (bytes) {
-               char __user *buf = iov->iov_base + base;
-               int copy = min(bytes, iov->iov_len - base);
-               base = 0;
-               left = __copy_from_user_inatomic(vaddr, buf, copy);
-               copied += copy;
-               bytes -= copy;
-               vaddr += copy;
-               iov++;
-               if (unlikely(left))
-                       break;
-       }
-       return copied - left;
- }
- /*
-  * Copy as much as we can into the page and return the number of bytes which
-  * were successfully copied.  If a fault is encountered then return the number of
-  * bytes which were copied.
-  */
- size_t iov_iter_copy_from_user_atomic(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes)
- {
-       char *kaddr;
-       size_t copied;
-       BUG_ON(!in_atomic());
-       kaddr = kmap_atomic(page);
-       if (likely(i->nr_segs == 1)) {
-               int left;
-               char __user *buf = i->iov->iov_base + i->iov_offset;
-               left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
-               copied = bytes - left;
-       } else {
-               copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-                                               i->iov, i->iov_offset, bytes);
-       }
-       kunmap_atomic(kaddr);
-       return copied;
- }
- EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
- /*
-  * This has the same sideeffects and return value as
-  * iov_iter_copy_from_user_atomic().
-  * The difference is that it attempts to resolve faults.
-  * Page must not be locked.
-  */
- size_t iov_iter_copy_from_user(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes)
- {
-       char *kaddr;
-       size_t copied;
-       kaddr = kmap(page);
-       if (likely(i->nr_segs == 1)) {
-               int left;
-               char __user *buf = i->iov->iov_base + i->iov_offset;
-               left = __copy_from_user(kaddr + offset, buf, bytes);
-               copied = bytes - left;
-       } else {
-               copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-                                               i->iov, i->iov_offset, bytes);
-       }
-       kunmap(page);
-       return copied;
- }
- EXPORT_SYMBOL(iov_iter_copy_from_user);
- void iov_iter_advance(struct iov_iter *i, size_t bytes)
- {
-       BUG_ON(i->count < bytes);
-       if (likely(i->nr_segs == 1)) {
-               i->iov_offset += bytes;
-               i->count -= bytes;
-       } else {
-               const struct iovec *iov = i->iov;
-               size_t base = i->iov_offset;
-               unsigned long nr_segs = i->nr_segs;
-               /*
-                * The !iov->iov_len check ensures we skip over unlikely
-                * zero-length segments (without overruning the iovec).
-                */
-               while (bytes || unlikely(i->count && !iov->iov_len)) {
-                       int copy;
-                       copy = min(bytes, iov->iov_len - base);
-                       BUG_ON(!i->count || i->count < copy);
-                       i->count -= copy;
-                       bytes -= copy;
-                       base += copy;
-                       if (iov->iov_len == base) {
-                               iov++;
-                               nr_segs--;
-                               base = 0;
-                       }
-               }
-               i->iov = iov;
-               i->iov_offset = base;
-               i->nr_segs = nr_segs;
-       }
- }
- EXPORT_SYMBOL(iov_iter_advance);
- /*
-  * Fault in the first iovec of the given iov_iter, to a maximum length
-  * of bytes. Returns 0 on success, or non-zero if the memory could not be
-  * accessed (ie. because it is an invalid address).
 -/**
 - * read_cache_page - read into page cache, fill it if needed
 - * @mapping:  the page's address_space
 - * @index:    the page index
 - * @filler:   function to perform the read
 - * @data:     first arg to filler(data, page) function, often left as NULL
-- *
-  * writev-intensive code may want this to prefault several iovecs -- that
-  * would be possible (callers must not rely on the fact that _only_ the
-  * first iovec will be faulted with the current implementation).
-  */
- int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
- {
-       char __user *buf = i->iov->iov_base + i->iov_offset;
-       bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-       return fault_in_pages_readable(buf, bytes);
- }
- EXPORT_SYMBOL(iov_iter_fault_in_readable);
- /*
-  * Return the count of just the current iov_iter segment.
 - * Read into the page cache. If a page already exists, and PageUptodate() is
 - * not set, try to fill the page then wait for it to become unlocked.
 - *
 - * If the page does not get brought uptodate, return -EIO.
-- */
- size_t iov_iter_single_seg_count(const struct iov_iter *i)
 -struct page *read_cache_page(struct address_space *mapping,
 -                              pgoff_t index,
 -                              int (*filler)(void *, struct page *),
 -                              void *data)
--{
-       const struct iovec *iov = i->iov;
-       if (i->nr_segs == 1)
-               return i->count;
-       else
-               return min(i->count, iov->iov_len - i->iov_offset);
 -      return wait_on_page_read(read_cache_page_async(mapping, index, filler, data));
--}
- EXPORT_SYMBOL(iov_iter_single_seg_count);
 -EXPORT_SYMBOL(read_cache_page);
--
  /*
   * Performs necessary checks before doing a write
   *
@@@ -2585,7 -1985,7 +2379,7 @@@ EXPORT_SYMBOL(pagecache_write_end)
  
  ssize_t
  generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long *nr_segs, loff_t pos, loff_t *ppos,
+               unsigned long *nr_segs, loff_t pos,
                size_t count, size_t ocount)
  {
        struct file     *file = iocb->ki_filp;
                        i_size_write(inode, pos);
                        mark_inode_dirty(inode);
                }
-               *ppos = pos;
+               iocb->ki_pos = pos;
        }
  out:
        return written;
@@@ -2692,7 -2092,7 +2486,7 @@@ found
  }
  EXPORT_SYMBOL(grab_cache_page_write_begin);
  
- static ssize_t generic_perform_write(struct file *file,
+ ssize_t generic_perform_write(struct file *file,
                                struct iov_iter *i, loff_t pos)
  {
        struct address_space *mapping = file->f_mapping;
@@@ -2742,9 -2142,7 +2536,7 @@@ again
                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
  
-               pagefault_disable();
                copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
-               pagefault_enable();
                flush_dcache_page(page);
  
                mark_page_accessed(page);
  
        return written ? written : status;
  }
- ssize_t
- generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos, loff_t *ppos,
-               size_t count, ssize_t written)
- {
-       struct file *file = iocb->ki_filp;
-       ssize_t status;
-       struct iov_iter i;
-       iov_iter_init(&i, iov, nr_segs, count, written);
-       status = generic_perform_write(file, &i, pos);
-       if (likely(status >= 0)) {
-               written += status;
-               *ppos = pos + status;
-       }
-       
-       return written ? written : status;
- }
- EXPORT_SYMBOL(generic_file_buffered_write);
+ EXPORT_SYMBOL(generic_perform_write);
  
  /**
   * __generic_file_aio_write - write data to a file
   * avoid syncing under i_mutex.
   */
  ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                                unsigned long nr_segs, loff_t *ppos)
+                                unsigned long nr_segs)
  {
        struct file *file = iocb->ki_filp;
        struct address_space * mapping = file->f_mapping;
        size_t ocount;          /* original count */
        size_t count;           /* after file limit checks */
        struct inode    *inode = mapping->host;
-       loff_t          pos;
-       ssize_t         written;
+       loff_t          pos = iocb->ki_pos;
+       ssize_t         written = 0;
        ssize_t         err;
+       ssize_t         status;
+       struct iov_iter from;
  
        ocount = 0;
        err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
                return err;
  
        count = ocount;
-       pos = *ppos;
  
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
-       written = 0;
        err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
        if (err)
                goto out;
        if (err)
                goto out;
  
+       iov_iter_init(&from, iov, nr_segs, count, 0);
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (unlikely(file->f_flags & O_DIRECT)) {
                loff_t endbyte;
-               ssize_t written_buffered;
  
-               written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
-                                                       ppos, count, ocount);
+               written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
+                                                       count, ocount);
                if (written < 0 || written == count)
                        goto out;
+               iov_iter_advance(&from, written);
                /*
                 * direct-io write to a hole: fall through to buffered I/O
                 * for completing the rest of the request.
                 */
                pos += written;
                count -= written;
-               written_buffered = generic_file_buffered_write(iocb, iov,
-                                               nr_segs, pos, ppos, count,
-                                               written);
+               status = generic_perform_write(file, &from, pos);
                /*
-                * If generic_file_buffered_write() retuned a synchronous error
+                * If generic_perform_write() returned a synchronous error
                 * then we want to return the number of bytes which were
                 * direct-written, or the error code if that was zero.  Note
                 * that this differs from normal direct-io semantics, which
                 * will return -EFOO even if some bytes were written.
                 */
-               if (written_buffered < 0) {
-                       err = written_buffered;
+               if (unlikely(status < 0) && !written) {
+                       err = status;
                        goto out;
                }
+               iocb->ki_pos = pos + status;
                /*
                 * We need to ensure that the page cache pages are written to
                 * disk and invalidated to preserve the expected O_DIRECT
                 * semantics.
                 */
-               endbyte = pos + written_buffered - written - 1;
+               endbyte = pos + status - 1;
                err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
                if (err == 0) {
-                       written = written_buffered;
+                       written += status;
                        invalidate_mapping_pages(mapping,
                                                 pos >> PAGE_CACHE_SHIFT,
                                                 endbyte >> PAGE_CACHE_SHIFT);
                         */
                }
        } else {
-               written = generic_file_buffered_write(iocb, iov, nr_segs,
-                               pos, ppos, count, written);
+               written = generic_perform_write(file, &from, pos);
+               if (likely(written >= 0))
+                       iocb->ki_pos = pos + written;
        }
  out:
        current->backing_dev_info = NULL;
@@@ -2941,7 -2321,7 +2715,7 @@@ ssize_t generic_file_aio_write(struct k
        BUG_ON(iocb->ki_pos != pos);
  
        mutex_lock(&inode->i_mutex);
-       ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+       ret = __generic_file_aio_write(iocb, iov, nr_segs);
        mutex_unlock(&inode->i_mutex);
  
        if (ret > 0) {
diff --combined mm/process_vm_access.c
index cb79065c19e55f054888430555b44199425dd72b,d6bd3fdd692510539402b1313ca49c4da99c7fad..8505c9262b35853e22580c6c9b74c4d12bc86acc
  
  /**
   * process_vm_rw_pages - read/write pages from task specified
-  * @task: task to read/write from
-  * @mm: mm for task
-  * @process_pages: struct pages area that can store at least
-  *  nr_pages_to_copy struct page pointers
-  * @pa: address of page in task to start copying from/to
+  * @pages: array of pointers to pages we want to copy
   * @start_offset: offset in page to start copying from/to
   * @len: number of bytes to copy
-  * @lvec: iovec array specifying where to copy to/from
-  * @lvec_cnt: number of elements in iovec array
-  * @lvec_current: index in iovec array we are up to
-  * @lvec_offset: offset in bytes from current iovec iov_base we are up to
+  * @iter: where to copy to/from locally
   * @vm_write: 0 means copy from, 1 means copy to
-  * @nr_pages_to_copy: number of pages to copy
-  * @bytes_copied: returns number of bytes successfully copied
   * Returns 0 on success, error code otherwise
   */
- static int process_vm_rw_pages(struct task_struct *task,
-                              struct mm_struct *mm,
-                              struct page **process_pages,
-                              unsigned long pa,
-                              unsigned long start_offset,
-                              unsigned long len,
-                              const struct iovec *lvec,
-                              unsigned long lvec_cnt,
-                              unsigned long *lvec_current,
-                              size_t *lvec_offset,
-                              int vm_write,
-                              unsigned int nr_pages_to_copy,
-                              ssize_t *bytes_copied)
+ static int process_vm_rw_pages(struct page **pages,
+                              unsigned offset,
+                              size_t len,
+                              struct iov_iter *iter,
+                              int vm_write)
  {
-       int pages_pinned;
-       void *target_kaddr;
-       int pgs_copied = 0;
-       int j;
-       int ret;
-       ssize_t bytes_to_copy;
-       ssize_t rc = 0;
-       *bytes_copied = 0;
-       /* Get the pages we're interested in */
-       down_read(&mm->mmap_sem);
-       pages_pinned = get_user_pages(task, mm, pa,
-                                     nr_pages_to_copy,
-                                     vm_write, 0, process_pages, NULL);
-       up_read(&mm->mmap_sem);
-       if (pages_pinned != nr_pages_to_copy) {
-               rc = -EFAULT;
-               goto end;
-       }
        /* Do the copy for each page */
-       for (pgs_copied = 0;
-            (pgs_copied < nr_pages_to_copy) && (*lvec_current < lvec_cnt);
-            pgs_copied++) {
-               /* Make sure we have a non zero length iovec */
-               while (*lvec_current < lvec_cnt
-                      && lvec[*lvec_current].iov_len == 0)
-                       (*lvec_current)++;
-               if (*lvec_current == lvec_cnt)
-                       break;
-               /*
-                * Will copy smallest of:
-                * - bytes remaining in page
-                * - bytes remaining in destination iovec
-                */
-               bytes_to_copy = min_t(ssize_t, PAGE_SIZE - start_offset,
-                                     len - *bytes_copied);
-               bytes_to_copy = min_t(ssize_t, bytes_to_copy,
-                                     lvec[*lvec_current].iov_len
-                                     - *lvec_offset);
-               target_kaddr = kmap(process_pages[pgs_copied]) + start_offset;
-               if (vm_write)
-                       ret = copy_from_user(target_kaddr,
-                                            lvec[*lvec_current].iov_base
-                                            + *lvec_offset,
-                                            bytes_to_copy);
-               else
-                       ret = copy_to_user(lvec[*lvec_current].iov_base
-                                          + *lvec_offset,
-                                          target_kaddr, bytes_to_copy);
-               kunmap(process_pages[pgs_copied]);
-               if (ret) {
-                       *bytes_copied += bytes_to_copy - ret;
-                       pgs_copied++;
-                       rc = -EFAULT;
-                       goto end;
-               }
-               *bytes_copied += bytes_to_copy;
-               *lvec_offset += bytes_to_copy;
-               if (*lvec_offset == lvec[*lvec_current].iov_len) {
-                       /*
-                        * Need to copy remaining part of page into the
-                        * next iovec if there are any bytes left in page
-                        */
-                       (*lvec_current)++;
-                       *lvec_offset = 0;
-                       start_offset = (start_offset + bytes_to_copy)
-                               % PAGE_SIZE;
-                       if (start_offset)
-                               pgs_copied--;
+       while (len && iov_iter_count(iter)) {
+               struct page *page = *pages++;
+               size_t copy = PAGE_SIZE - offset;
+               size_t copied;
+               if (copy > len)
+                       copy = len;
+               if (vm_write) {
+                       if (copy > iov_iter_count(iter))
+                               copy = iov_iter_count(iter);
+                       copied = iov_iter_copy_from_user(page, iter,
+                                       offset, copy);
+                       iov_iter_advance(iter, copied);
+                       set_page_dirty_lock(page);
                } else {
-                       start_offset = 0;
-               }
-       }
- end:
-       if (vm_write) {
-               for (j = 0; j < pages_pinned; j++) {
-                       if (j < pgs_copied)
-                               set_page_dirty_lock(process_pages[j]);
-                       put_page(process_pages[j]);
+                       copied = copy_page_to_iter(page, offset, copy, iter);
                }
-       } else {
-               for (j = 0; j < pages_pinned; j++)
-                       put_page(process_pages[j]);
+               len -= copied;
+               if (copied < copy && iov_iter_count(iter))
+                       return -EFAULT;
+               offset = 0;
        }
-       return rc;
+       return 0;
  }
  
  /* Maximum number of pages kmalloc'd to hold struct page's during copy */
   * process_vm_rw_single_vec - read/write pages from task specified
   * @addr: start memory address of target process
   * @len: size of area to copy to/from
-  * @lvec: iovec array specifying where to copy to/from locally
-  * @lvec_cnt: number of elements in iovec array
-  * @lvec_current: index in iovec array we are up to
-  * @lvec_offset: offset in bytes from current iovec iov_base we are up to
+  * @iter: where to copy to/from locally
   * @process_pages: struct pages area that can store at least
   *  nr_pages_to_copy struct page pointers
   * @mm: mm for task
   * @task: task to read/write from
   * @vm_write: 0 means copy from, 1 means copy to
-  * @bytes_copied: returns number of bytes successfully copied
   * Returns 0 on success or on failure error code
   */
  static int process_vm_rw_single_vec(unsigned long addr,
                                    unsigned long len,
-                                   const struct iovec *lvec,
-                                   unsigned long lvec_cnt,
-                                   unsigned long *lvec_current,
-                                   size_t *lvec_offset,
+                                   struct iov_iter *iter,
                                    struct page **process_pages,
                                    struct mm_struct *mm,
                                    struct task_struct *task,
-                                   int vm_write,
-                                   ssize_t *bytes_copied)
+                                   int vm_write)
  {
        unsigned long pa = addr & PAGE_MASK;
        unsigned long start_offset = addr - pa;
        unsigned long nr_pages;
-       ssize_t bytes_copied_loop;
        ssize_t rc = 0;
-       unsigned long nr_pages_copied = 0;
-       unsigned long nr_pages_to_copy;
        unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
                / sizeof(struct pages *);
  
-       *bytes_copied = 0;
        /* Work out address and page range required */
        if (len == 0)
                return 0;
        nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
  
-       while ((nr_pages_copied < nr_pages) && (*lvec_current < lvec_cnt)) {
-               nr_pages_to_copy = min(nr_pages - nr_pages_copied,
-                                      max_pages_per_loop);
+       while (!rc && nr_pages && iov_iter_count(iter)) {
+               int pages = min(nr_pages, max_pages_per_loop);
+               size_t bytes;
  
-               rc = process_vm_rw_pages(task, mm, process_pages, pa,
-                                        start_offset, len,
-                                        lvec, lvec_cnt,
-                                        lvec_current, lvec_offset,
-                                        vm_write, nr_pages_to_copy,
-                                        &bytes_copied_loop);
-               start_offset = 0;
-               *bytes_copied += bytes_copied_loop;
+               /* Get the pages we're interested in */
+               down_read(&mm->mmap_sem);
+               pages = get_user_pages(task, mm, pa, pages,
+                                     vm_write, 0, process_pages, NULL);
+               up_read(&mm->mmap_sem);
  
-               if (rc < 0) {
-                       return rc;
-               } else {
-                       len -= bytes_copied_loop;
-                       nr_pages_copied += nr_pages_to_copy;
-                       pa += nr_pages_to_copy * PAGE_SIZE;
-               }
+               if (pages <= 0)
+                       return -EFAULT;
+               bytes = pages * PAGE_SIZE - start_offset;
+               if (bytes > len)
+                       bytes = len;
+               rc = process_vm_rw_pages(process_pages,
+                                        start_offset, bytes, iter,
+                                        vm_write);
+               len -= bytes;
+               start_offset = 0;
+               nr_pages -= pages;
+               pa += pages * PAGE_SIZE;
+               while (pages)
+                       put_page(process_pages[--pages]);
        }
  
        return rc;
  /**
   * process_vm_rw_core - core of reading/writing pages from task specified
   * @pid: PID of process to read/write from/to
-  * @lvec: iovec array specifying where to copy to/from locally
-  * @liovcnt: size of lvec array
+  * @iter: where to copy to/from locally
   * @rvec: iovec array specifying where to copy to/from in the other process
   * @riovcnt: size of rvec array
   * @flags: currently unused
   *  return less bytes than expected if an error occurs during the copying
   *  process.
   */
- static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
-                                 unsigned long liovcnt,
+ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter,
                                  const struct iovec *rvec,
                                  unsigned long riovcnt,
                                  unsigned long flags, int vm_write)
        struct mm_struct *mm;
        unsigned long i;
        ssize_t rc = 0;
-       ssize_t bytes_copied_loop;
-       ssize_t bytes_copied = 0;
        unsigned long nr_pages = 0;
        unsigned long nr_pages_iov;
-       unsigned long iov_l_curr_idx = 0;
-       size_t iov_l_curr_offset = 0;
        ssize_t iov_len;
+       size_t total_len = iov_iter_count(iter);
  
        /*
         * Work out how many pages of struct pages we're going to need
                goto put_task_struct;
        }
  
-       for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
+       for (i = 0; i < riovcnt && iov_iter_count(iter) && !rc; i++)
                rc = process_vm_rw_single_vec(
                        (unsigned long)rvec[i].iov_base, rvec[i].iov_len,
-                       lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset,
-                       process_pages, mm, task, vm_write, &bytes_copied_loop);
-               bytes_copied += bytes_copied_loop;
-               if (rc != 0) {
-                       /* If we have managed to copy any data at all then
-                          we return the number of bytes copied. Otherwise
-                          we return the error code */
-                       if (bytes_copied)
-                               rc = bytes_copied;
-                       goto put_mm;
-               }
-       }
+                       iter, process_pages, mm, task, vm_write);
+       /* copied = space before - space after */
+       total_len -= iov_iter_count(iter);
+       /* If we have managed to copy any data at all then
+          we return the number of bytes copied. Otherwise
+          we return the error code */
+       if (total_len)
+               rc = total_len;
  
-       rc = bytes_copied;
- put_mm:
        mmput(mm);
  
  put_task_struct:
@@@ -363,6 -262,7 +262,7 @@@ static ssize_t process_vm_rw(pid_t pid
        struct iovec iovstack_r[UIO_FASTIOV];
        struct iovec *iov_l = iovstack_l;
        struct iovec *iov_r = iovstack_r;
+       struct iov_iter iter;
        ssize_t rc;
  
        if (flags != 0)
        if (rc <= 0)
                goto free_iovecs;
  
+       iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
        rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
                                   iovstack_r, &iov_r);
        if (rc <= 0)
                goto free_iovecs;
  
-       rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags,
-                               vm_write);
+       rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
  
  free_iovecs:
        if (iov_r != iovstack_r)
@@@ -412,7 -313,7 +313,7 @@@ SYSCALL_DEFINE6(process_vm_writev, pid_
  
  #ifdef CONFIG_COMPAT
  
 -asmlinkage ssize_t
 +static ssize_t
  compat_process_vm_rw(compat_pid_t pid,
                     const struct compat_iovec __user *lvec,
                     unsigned long liovcnt,
        struct iovec iovstack_r[UIO_FASTIOV];
        struct iovec *iov_l = iovstack_l;
        struct iovec *iov_r = iovstack_r;
+       struct iov_iter iter;
        ssize_t rc = -EFAULT;
  
        if (flags != 0)
                                                  &iov_l);
        if (rc <= 0)
                goto free_iovecs;
+       iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
        rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
                                          UIO_FASTIOV, iovstack_r,
                                          &iov_r);
        if (rc <= 0)
                goto free_iovecs;
  
-       rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags,
-                          vm_write);
+       rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
  
  free_iovecs:
        if (iov_r != iovstack_r)
        return rc;
  }
  
 -asmlinkage ssize_t
 -compat_sys_process_vm_readv(compat_pid_t pid,
 -                          const struct compat_iovec __user *lvec,
 -                          unsigned long liovcnt,
 -                          const struct compat_iovec __user *rvec,
 -                          unsigned long riovcnt,
 -                          unsigned long flags)
 +COMPAT_SYSCALL_DEFINE6(process_vm_readv, compat_pid_t, pid,
 +                     const struct compat_iovec __user *, lvec,
 +                     compat_ulong_t, liovcnt,
 +                     const struct compat_iovec __user *, rvec,
 +                     compat_ulong_t, riovcnt,
 +                     compat_ulong_t, flags)
  {
        return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
                                    riovcnt, flags, 0);
  }
  
 -asmlinkage ssize_t
 -compat_sys_process_vm_writev(compat_pid_t pid,
 -                           const struct compat_iovec __user *lvec,
 -                           unsigned long liovcnt,
 -                           const struct compat_iovec __user *rvec,
 -                           unsigned long riovcnt,
 -                           unsigned long flags)
 +COMPAT_SYSCALL_DEFINE6(process_vm_writev, compat_pid_t, pid,
 +                     const struct compat_iovec __user *, lvec,
 +                     compat_ulong_t, liovcnt,
 +                     const struct compat_iovec __user *, rvec,
 +                     compat_ulong_t, riovcnt,
 +                     compat_ulong_t, flags)
  {
        return compat_process_vm_rw(pid, lvec, liovcnt, rvec,
                                    riovcnt, flags, 1);
diff --combined mm/shmem.c
index 70273f8df5867a33aeea7267e4268a949a35aaaa,37400a148f29e5bf99611ecdf8dbed44212248fd..8f1a95406bae6a61b20be247b50603cdedc30398
@@@ -242,17 -242,19 +242,17 @@@ static int shmem_radix_tree_replace(str
                        pgoff_t index, void *expected, void *replacement)
  {
        void **pslot;
 -      void *item = NULL;
 +      void *item;
  
        VM_BUG_ON(!expected);
 +      VM_BUG_ON(!replacement);
        pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
 -      if (pslot)
 -              item = radix_tree_deref_slot_protected(pslot,
 -                                                      &mapping->tree_lock);
 +      if (!pslot)
 +              return -ENOENT;
 +      item = radix_tree_deref_slot_protected(pslot, &mapping->tree_lock);
        if (item != expected)
                return -ENOENT;
 -      if (replacement)
 -              radix_tree_replace_slot(pslot, replacement);
 -      else
 -              radix_tree_delete(&mapping->page_tree, index);
 +      radix_tree_replace_slot(pslot, replacement);
        return 0;
  }
  
@@@ -328,21 -330,85 +328,21 @@@ static void shmem_delete_from_page_cach
        BUG_ON(error);
  }
  
 -/*
 - * Like find_get_pages, but collecting swap entries as well as pages.
 - */
 -static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
 -                                      pgoff_t start, unsigned int nr_pages,
 -                                      struct page **pages, pgoff_t *indices)
 -{
 -      void **slot;
 -      unsigned int ret = 0;
 -      struct radix_tree_iter iter;
 -
 -      if (!nr_pages)
 -              return 0;
 -
 -      rcu_read_lock();
 -restart:
 -      radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
 -              struct page *page;
 -repeat:
 -              page = radix_tree_deref_slot(slot);
 -              if (unlikely(!page))
 -                      continue;
 -              if (radix_tree_exception(page)) {
 -                      if (radix_tree_deref_retry(page))
 -                              goto restart;
 -                      /*
 -                       * Otherwise, we must be storing a swap entry
 -                       * here as an exceptional entry: so return it
 -                       * without attempting to raise page count.
 -                       */
 -                      goto export;
 -              }
 -              if (!page_cache_get_speculative(page))
 -                      goto repeat;
 -
 -              /* Has the page moved? */
 -              if (unlikely(page != *slot)) {
 -                      page_cache_release(page);
 -                      goto repeat;
 -              }
 -export:
 -              indices[ret] = iter.index;
 -              pages[ret] = page;
 -              if (++ret == nr_pages)
 -                      break;
 -      }
 -      rcu_read_unlock();
 -      return ret;
 -}
 -
  /*
   * Remove swap entry from radix tree, free the swap and its page cache.
   */
  static int shmem_free_swap(struct address_space *mapping,
                           pgoff_t index, void *radswap)
  {
 -      int error;
 +      void *old;
  
        spin_lock_irq(&mapping->tree_lock);
 -      error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
 +      old = radix_tree_delete_item(&mapping->page_tree, index, radswap);
        spin_unlock_irq(&mapping->tree_lock);
 -      if (!error)
 -              free_swap_and_cache(radix_to_swp_entry(radswap));
 -      return error;
 -}
 -
 -/*
 - * Pagevec may contain swap entries, so shuffle up pages before releasing.
 - */
 -static void shmem_deswap_pagevec(struct pagevec *pvec)
 -{
 -      int i, j;
 -
 -      for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
 -              struct page *page = pvec->pages[i];
 -              if (!radix_tree_exceptional_entry(page))
 -                      pvec->pages[j++] = page;
 -      }
 -      pvec->nr = j;
 +      if (old != radswap)
 +              return -ENOENT;
 +      free_swap_and_cache(radix_to_swp_entry(radswap));
 +      return 0;
  }
  
  /*
@@@ -363,12 -429,12 +363,12 @@@ void shmem_unlock_mapping(struct addres
                 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
                 * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
                 */
 -              pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
 -                                      PAGEVEC_SIZE, pvec.pages, indices);
 +              pvec.nr = find_get_entries(mapping, index,
 +                                         PAGEVEC_SIZE, pvec.pages, indices);
                if (!pvec.nr)
                        break;
                index = indices[pvec.nr - 1] + 1;
 -              shmem_deswap_pagevec(&pvec);
 +              pagevec_remove_exceptionals(&pvec);
                check_move_unevictable_pages(pvec.pages, pvec.nr);
                pagevec_release(&pvec);
                cond_resched();
@@@ -400,9 -466,9 +400,9 @@@ static void shmem_undo_range(struct ino
        pagevec_init(&pvec, 0);
        index = start;
        while (index < end) {
 -              pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
 -                              min(end - index, (pgoff_t)PAGEVEC_SIZE),
 -                                                      pvec.pages, indices);
 +              pvec.nr = find_get_entries(mapping, index,
 +                      min(end - index, (pgoff_t)PAGEVEC_SIZE),
 +                      pvec.pages, indices);
                if (!pvec.nr)
                        break;
                mem_cgroup_uncharge_start();
                        }
                        unlock_page(page);
                }
 -              shmem_deswap_pagevec(&pvec);
 +              pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
                mem_cgroup_uncharge_end();
                cond_resched();
        index = start;
        for ( ; ; ) {
                cond_resched();
 -              pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
 +
 +              pvec.nr = find_get_entries(mapping, index,
                                min(end - index, (pgoff_t)PAGEVEC_SIZE),
 -                                                      pvec.pages, indices);
 +                              pvec.pages, indices);
                if (!pvec.nr) {
                        if (index == start || unfalloc)
                                break;
                        continue;
                }
                if ((index == start || unfalloc) && indices[0] >= end) {
 -                      shmem_deswap_pagevec(&pvec);
 +                      pagevec_remove_exceptionals(&pvec);
                        pagevec_release(&pvec);
                        break;
                }
                        }
                        unlock_page(page);
                }
 -              shmem_deswap_pagevec(&pvec);
 +              pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
                mem_cgroup_uncharge_end();
                index++;
@@@ -683,7 -748,7 +683,7 @@@ int shmem_unuse(swp_entry_t swap, struc
         * the shmem_swaplist_mutex which might hold up shmem_writepage().
         * Charged back to the user (not to caller) when swap account is used.
         */
 -      error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
 +      error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL);
        if (error)
                goto out;
        /* No radix_tree_preload: swap entry keeps a place for page in tree */
@@@ -1015,7 -1080,7 +1015,7 @@@ static int shmem_getpage_gfp(struct ino
                return -EFBIG;
  repeat:
        swap.val = 0;
 -      page = find_lock_page(mapping, index);
 +      page = find_lock_entry(mapping, index);
        if (radix_tree_exceptional_entry(page)) {
                swap = radix_to_swp_entry(page);
                page = NULL;
                                goto failed;
                }
  
 -              error = mem_cgroup_cache_charge(page, current->mm,
 +              error = mem_cgroup_charge_file(page, current->mm,
                                                gfp & GFP_RECLAIM_MASK);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
  
                SetPageSwapBacked(page);
                __set_page_locked(page);
 -              error = mem_cgroup_cache_charge(page, current->mm,
 +              error = mem_cgroup_charge_file(page, current->mm,
                                                gfp & GFP_RECLAIM_MASK);
                if (error)
                        goto decused;
@@@ -1352,11 -1417,6 +1352,11 @@@ static struct inode *shmem_get_inode(st
        return inode;
  }
  
 +bool shmem_mapping(struct address_space *mapping)
 +{
 +      return mapping->backing_dev_info == &shmem_backing_dev_info;
 +}
 +
  #ifdef CONFIG_TMPFS
  static const struct inode_operations shmem_symlink_inode_operations;
  static const struct inode_operations shmem_short_symlink_operations;
@@@ -1402,13 -1462,25 +1402,25 @@@ shmem_write_end(struct file *file, stru
        return copied;
  }
  
- static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
+ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
+               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
  {
-       struct inode *inode = file_inode(filp);
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
        struct address_space *mapping = inode->i_mapping;
        pgoff_t index;
        unsigned long offset;
        enum sgp_type sgp = SGP_READ;
+       int error;
+       ssize_t retval;
+       size_t count;
+       loff_t *ppos = &iocb->ki_pos;
+       struct iov_iter iter;
+       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
+       if (retval)
+               return retval;
+       iov_iter_init(&iter, iov, nr_segs, count, 0);
  
        /*
         * Might this read be for a stacking filesystem?  Then when reading
                                break;
                }
  
-               desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
-               if (desc->error) {
-                       if (desc->error == -EINVAL)
-                               desc->error = 0;
+               error = shmem_getpage(inode, index, &page, sgp, NULL);
+               if (error) {
+                       if (error == -EINVAL)
+                               error = 0;
                        break;
                }
                if (page)
                /*
                 * Ok, we have the page, and it's up-to-date, so
                 * now we can copy it to user space...
-                *
-                * The actor routine returns how many bytes were actually used..
-                * NOTE! This may not be the same as how much of a user buffer
-                * we filled up (we may be padding etc), so we can only update
-                * "pos" here (the actor routine has to update the user buffer
-                * pointers and the remaining count).
                 */
-               ret = actor(desc, page, offset, nr);
+               ret = copy_page_to_iter(page, offset, nr, &iter);
+               retval += ret;
                offset += ret;
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
  
                page_cache_release(page);
-               if (ret != nr || !desc->count)
+               if (!iov_iter_count(&iter))
                        break;
+               if (ret < nr) {
+                       error = -EFAULT;
+                       break;
+               }
                cond_resched();
        }
  
        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
-       file_accessed(filp);
- }
- static ssize_t shmem_file_aio_read(struct kiocb *iocb,
-               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
- {
-       struct file *filp = iocb->ki_filp;
-       ssize_t retval;
-       unsigned long seg;
-       size_t count;
-       loff_t *ppos = &iocb->ki_pos;
-       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-       if (retval)
-               return retval;
-       for (seg = 0; seg < nr_segs; seg++) {
-               read_descriptor_t desc;
-               desc.written = 0;
-               desc.arg.buf = iov[seg].iov_base;
-               desc.count = iov[seg].iov_len;
-               if (desc.count == 0)
-                       continue;
-               desc.error = 0;
-               do_shmem_file_read(filp, ppos, &desc, file_read_actor);
-               retval += desc.written;
-               if (desc.error) {
-                       retval = retval ?: desc.error;
-                       break;
-               }
-               if (desc.count > 0)
-                       break;
-       }
-       return retval;
+       file_accessed(file);
+       return retval ? retval : error;
  }
  
  static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
        index = *ppos >> PAGE_CACHE_SHIFT;
        loff = *ppos & ~PAGE_CACHE_MASK;
        req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-       nr_pages = min(req_pages, pipe->buffers);
+       nr_pages = min(req_pages, spd.nr_pages_max);
  
        spd.nr_pages = find_get_pages_contig(mapping, index,
                                                nr_pages, spd.pages);
@@@ -1669,7 -1706,7 +1646,7 @@@ static pgoff_t shmem_seek_hole_data(str
        pagevec_init(&pvec, 0);
        pvec.nr = 1;            /* start small: we may be there already */
        while (!done) {
 -              pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
 +              pvec.nr = find_get_entries(mapping, index,
                                        pvec.nr, pvec.pages, indices);
                if (!pvec.nr) {
                        if (whence == SEEK_DATA)
                                break;
                        }
                }
 -              shmem_deswap_pagevec(&pvec);
 +              pagevec_remove_exceptionals(&pvec);
                pagevec_release(&pvec);
                pvec.nr = PAGEVEC_SIZE;
                cond_resched();
@@@ -2723,7 -2760,6 +2700,7 @@@ static const struct super_operations sh
  
  static const struct vm_operations_struct shmem_vm_ops = {
        .fault          = shmem_fault,
 +      .map_pages      = filemap_map_pages,
  #ifdef CONFIG_NUMA
        .set_policy     = shmem_set_policy,
        .get_policy     = shmem_get_policy,
index babd8626bf968f584153518d7ecb60e9a4616d39,e90ab0e20db8c90c41bb7339872558c06477da40..6b540f1822e0b43c175466615ff78db50b0df0f5
@@@ -13,8 -13,6 +13,8 @@@
   *     Using root's kernel master key (kmk), calculate the HMAC
   */
  
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 +
  #include <linux/module.h>
  #include <linux/crypto.h>
  #include <linux/xattr.h>
@@@ -105,13 -103,13 +105,13 @@@ static void hmac_add_misc(struct shash_
                umode_t mode;
        } hmac_misc;
  
 -      memset(&hmac_misc, 0, sizeof hmac_misc);
 +      memset(&hmac_misc, 0, sizeof(hmac_misc));
        hmac_misc.ino = inode->i_ino;
        hmac_misc.generation = inode->i_generation;
        hmac_misc.uid = from_kuid(&init_user_ns, inode->i_uid);
        hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid);
        hmac_misc.mode = inode->i_mode;
 -      crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof hmac_misc);
 +      crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
        if (evm_hmac_version > 1)
                crypto_shash_update(desc, inode->i_sb->s_uuid,
                                    sizeof(inode->i_sb->s_uuid));
@@@ -139,7 -137,7 +139,7 @@@ static int evm_calc_hmac_or_hash(struc
        int error;
        int size;
  
-       if (!inode->i_op || !inode->i_op->getxattr)
+       if (!inode->i_op->getxattr)
                return -EOPNOTSUPP;
        desc = init_desc(type);
        if (IS_ERR(desc))
@@@ -223,7 -221,7 +223,7 @@@ int evm_init_hmac(struct inode *inode, 
  
        desc = init_desc(EVM_XATTR_HMAC);
        if (IS_ERR(desc)) {
 -              printk(KERN_INFO "init_desc failed\n");
 +              pr_info("init_desc failed\n");
                return PTR_ERR(desc);
        }
  
index 996092f21b64a4b71ee1a46acc49a94802e7618e,bab1c39ffcaf234f4889587b8a69afd1432f5af3..6e0bd933b6a9a8a815f5d57c147f1d18dfbfec36
@@@ -14,8 -14,6 +14,8 @@@
   *    evm_inode_removexattr, and evm_verifyxattr
   */
  
 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 +
  #include <linux/module.h>
  #include <linux/crypto.h>
  #include <linux/audit.h>
@@@ -64,7 -62,7 +64,7 @@@ static int evm_find_protected_xattrs(st
        int error;
        int count = 0;
  
-       if (!inode->i_op || !inode->i_op->getxattr)
+       if (!inode->i_op->getxattr)
                return -EOPNOTSUPP;
  
        for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) {
@@@ -434,7 -432,7 +434,7 @@@ static int __init init_evm(void
  
        error = evm_init_secfs();
        if (error < 0) {
 -              printk(KERN_INFO "EVM: Error registering secfs\n");
 +              pr_info("Error registering secfs\n");
                goto err;
        }
  
@@@ -451,7 -449,7 +451,7 @@@ static int __init evm_display_config(vo
        char **xattrname;
  
        for (xattrname = evm_config_xattrnames; *xattrname != NULL; xattrname++)
 -              printk(KERN_INFO "EVM: %s\n", *xattrname);
 +              pr_info("%s\n", *xattrname);
        return 0;
  }