Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Apr 2014 21:49:50 +0000 (14:49 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Apr 2014 21:49:50 +0000 (14:49 -0700)
Pull vfs updates from Al Viro:
 "The first vfs pile, with deep apologies for being very late in this
  window.

  Assorted cleanups and fixes, plus a large preparatory part of iov_iter
  work.  There's a lot more of that, but it'll probably go into the next
  merge window - it *does* shape up nicely, removes a lot of
  boilerplate, gets rid of locking inconsistencie between aio_write and
  splice_write and I hope to get Kent's direct-io rewrite merged into
  the same queue, but some of the stuff after this point is having
  (mostly trivial) conflicts with the things already merged into
  mainline and with some I want more testing.

  This one passes LTP and xfstests without regressions, in addition to
  usual beating.  BTW, readahead02 in ltp syscalls testsuite has started
  giving failures since "mm/readahead.c: fix readahead failure for
  memoryless NUMA nodes and limit readahead pages" - might be a false
  positive, might be a real regression..."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (63 commits)
  missing bits of "splice: fix racy pipe->buffers uses"
  cifs: fix the race in cifs_writev()
  ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure
  kill generic_file_buffered_write()
  ocfs2_file_aio_write(): switch to generic_perform_write()
  ceph_aio_write(): switch to generic_perform_write()
  xfs_file_buffered_aio_write(): switch to generic_perform_write()
  export generic_perform_write(), start getting rid of generic_file_buffer_write()
  generic_file_direct_write(): get rid of ppos argument
  btrfs_file_aio_write(): get rid of ppos
  kill the 5th argument of generic_file_buffered_write()
  kill the 4th argument of __generic_file_aio_write()
  lustre: don't open-code kernel_recvmsg()
  ocfs2: don't open-code kernel_recvmsg()
  drbd: don't open-code kernel_recvmsg()
  constify blk_rq_map_user_iov() and friends
  lustre: switch to kernel_sendmsg()
  ocfs2: don't open-code kernel_sendmsg()
  take iov_iter stuff to mm/iov_iter.c
  process_vm_access: tidy up a bit
  ...

76 files changed:
Documentation/filesystems/Locking
Documentation/filesystems/vfs.txt
arch/mn10300/include/asm/highmem.h
arch/powerpc/configs/ppc6xx_defconfig
arch/powerpc/configs/ps3_defconfig
arch/s390/configs/default_defconfig
arch/sh/configs/rsk7203_defconfig
arch/xtensa/configs/iss_defconfig
arch/xtensa/configs/s6105_defconfig
block/blk-map.c
drivers/block/drbd/drbd_receiver.c
drivers/block/nbd.c
drivers/char/virtio_console.c
drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c
drivers/staging/lustre/lustre/llite/symlink.c
drivers/staging/usbip/stub_dev.c
drivers/staging/usbip/usbip_common.c
drivers/staging/usbip/usbip_common.h
drivers/staging/usbip/vhci_hcd.c
drivers/staging/usbip/vhci_sysfs.c
drivers/vhost/net.c
fs/bio.c
fs/block_dev.c
fs/btrfs/file.c
fs/buffer.c
fs/cachefiles/bind.c
fs/cachefiles/namei.c
fs/ceph/file.c
fs/cifs/cifsfs.c
fs/cifs/file.c
fs/exec.c
fs/ext4/file.c
fs/file.c
fs/file_table.c
fs/fuse/dev.c
fs/fuse/file.c
fs/mount.h
fs/namei.c
fs/namespace.c
fs/ncpfs/inode.c
fs/ncpfs/ncp_fs_sb.h
fs/ntfs/inode.c
fs/ocfs2/cluster/tcp.c
fs/ocfs2/file.c
fs/open.c
fs/pipe.c
fs/pnode.c
fs/pnode.h
fs/proc/namespaces.c
fs/proc/self.c
fs/proc_namespace.c
fs/splice.c
fs/udf/file.c
fs/xfs/xfs_file.c
fs/xfs/xfs_ioctl.c
include/linux/bio.h
include/linux/blkdev.h
include/linux/buffer_head.h
include/linux/fdtable.h
include/linux/fs.h
include/linux/mount.h
include/linux/nbd.h
include/linux/pipe_fs_i.h
include/linux/uio.h
kernel/relay.c
kernel/trace/trace.c
lib/Kconfig.debug
mm/Makefile
mm/filemap.c
mm/iov_iter.c [new file with mode: 0644]
mm/process_vm_access.c
mm/shmem.c
security/integrity/evm/evm_crypto.c
security/integrity/evm/evm_main.c
security/tomoyo/realpath.c

index efca5c1bbb1028245a6d75e846e96a6ecb4150bd..eba7901342531d2dc089c9a39d990aa924b86526 100644 (file)
@@ -202,7 +202,7 @@ prototypes:
                                unsigned long *);
        int (*migratepage)(struct address_space *, struct page *, struct page *);
        int (*launder_page)(struct page *);
-       int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
+       int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
        int (*error_remove_page)(struct address_space *, struct page *);
        int (*swap_activate)(struct file *);
        int (*swap_deactivate)(struct file *);
index 94eb86287bcb08f3ebc0fa826438fed1af8ded1a..617f6d70c0778ce37716d25fde6f0c158f492707 100644 (file)
@@ -596,7 +596,7 @@ struct address_space_operations {
        /* migrate the contents of a page to the specified target */
        int (*migratepage) (struct page *, struct page *);
        int (*launder_page) (struct page *);
-       int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+       int (*is_partially_uptodate) (struct page *, unsigned long,
                                        unsigned long);
        void (*is_dirty_writeback) (struct page *, bool *, bool *);
        int (*error_remove_page) (struct mapping *mapping, struct page *page);
index 7c137cd8aa37490e07e2e9b7e917785e63236f5b..2fbbe4d920aa2efb353ed5fd52babaf309a386db 100644 (file)
@@ -70,7 +70,7 @@ static inline void kunmap(struct page *page)
  * be used in IRQ contexts, so in some (very limited) cases we need
  * it.
  */
-static inline unsigned long kmap_atomic(struct page *page)
+static inline void *kmap_atomic(struct page *page)
 {
        unsigned long vaddr;
        int idx, type;
@@ -89,7 +89,7 @@ static inline unsigned long kmap_atomic(struct page *page)
        set_pte(kmap_pte - idx, mk_pte(page, kmap_prot));
        local_flush_tlb_one(vaddr);
 
-       return vaddr;
+       return (void *)vaddr;
 }
 
 static inline void __kunmap_atomic(unsigned long vaddr)
index c2353bf059fd49ac95525a90aedd432c8a61701b..175a8b99c196e3942d2c14f1f082f3796243cc82 100644 (file)
@@ -1244,7 +1244,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y
 CONFIG_DEBUG_HIGHMEM=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_WRITECOUNT=y
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
index 139a8308070c8fee09665b81d168ef092e584d3e..fdee37fab81c5aa1f70263e7c4979fc0df7e9956 100644 (file)
@@ -174,7 +174,6 @@ CONFIG_DETECT_HUNG_TASK=y
 CONFIG_PROVE_LOCKING=y
 CONFIG_DEBUG_LOCKDEP=y
 CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_WRITECOUNT=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_DEBUG_LIST=y
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
index ddaae2f5c9137d0155ef5d5e943b40d097471881..8df022c43af7e7db7afb8d803b19fe4a162c18ca 100644 (file)
@@ -581,7 +581,6 @@ CONFIG_LOCK_STAT=y
 CONFIG_DEBUG_LOCKDEP=y
 CONFIG_DEBUG_ATOMIC_SLEEP=y
 CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
-CONFIG_DEBUG_WRITECOUNT=y
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
 CONFIG_DEBUG_NOTIFIERS=y
index 4e5229b0c5bbbc5adc116a5dbe18c02cf83f4cee..47236573db83bb3624f8542b4d905b156bcae5ed 100644 (file)
@@ -128,7 +128,6 @@ CONFIG_DEBUG_MUTEXES=y
 CONFIG_DEBUG_SPINLOCK_SLEEP=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_VM=y
-CONFIG_DEBUG_WRITECOUNT=y
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_SG=y
 CONFIG_FRAME_POINTER=y
index d57d917ff2406c308b6b12b4790bf6654f4d900a..1493c68352d11454a50a7fdfb5f11f49ab54de88 100644 (file)
@@ -627,7 +627,6 @@ CONFIG_SCHED_DEBUG=y
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_VM is not set
-# CONFIG_DEBUG_WRITECOUNT is not set
 # CONFIG_DEBUG_MEMORY_INIT is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
index 583c2b0974cab79dfb08e7381836702688c2ed52..12a492ab6d17f9fbf74dedd1a9723a269ee7cbef 100644 (file)
@@ -569,7 +569,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_VM is not set
 CONFIG_DEBUG_NOMMU_REGIONS=y
-# CONFIG_DEBUG_WRITECOUNT is not set
 # CONFIG_DEBUG_MEMORY_INIT is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_DEBUG_SG is not set
index cca6356d216d13977665e17a846aef31ab1e4a87..f7b22bc215180d4b7f467135faeaf52975a77013 100644 (file)
@@ -188,7 +188,7 @@ EXPORT_SYMBOL(blk_rq_map_user);
  *    unmapping.
  */
 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
-                       struct rq_map_data *map_data, struct sg_iovec *iov,
+                       struct rq_map_data *map_data, const struct sg_iovec *iov,
                        int iov_count, unsigned int len, gfp_t gfp_mask)
 {
        struct bio *bio;
index 18c76e84d54085c0819d46f3af1f1ef3aa6fcb3b..68e3992e88381cd4974ebfa2da3400708ab4afa0 100644 (file)
@@ -469,24 +469,14 @@ static void drbd_wait_ee_list_empty(struct drbd_device *device,
 
 static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
 {
-       mm_segment_t oldfs;
        struct kvec iov = {
                .iov_base = buf,
                .iov_len = size,
        };
        struct msghdr msg = {
-               .msg_iovlen = 1,
-               .msg_iov = (struct iovec *)&iov,
                .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
        };
-       int rv;
-
-       oldfs = get_fs();
-       set_fs(KERNEL_DS);
-       rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
-       set_fs(oldfs);
-
-       return rv;
+       return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
 }
 
 static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
index 55298db36b2d61a113f25c22905fffb0f22ddd32..3a70ea2f7cd69b2641302e6c44560f32245a078c 100644 (file)
@@ -630,37 +630,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
        }
  
        case NBD_CLEAR_SOCK: {
-               struct file *file;
-
+               struct socket *sock = nbd->sock;
                nbd->sock = NULL;
-               file = nbd->file;
-               nbd->file = NULL;
                nbd_clear_que(nbd);
                BUG_ON(!list_empty(&nbd->queue_head));
                BUG_ON(!list_empty(&nbd->waiting_queue));
                kill_bdev(bdev);
-               if (file)
-                       fput(file);
+               if (sock)
+                       sockfd_put(sock);
                return 0;
        }
 
        case NBD_SET_SOCK: {
-               struct file *file;
-               if (nbd->file)
+               struct socket *sock;
+               int err;
+               if (nbd->sock)
                        return -EBUSY;
-               file = fget(arg);
-               if (file) {
-                       struct inode *inode = file_inode(file);
-                       if (S_ISSOCK(inode->i_mode)) {
-                               nbd->file = file;
-                               nbd->sock = SOCKET_I(inode);
-                               if (max_part > 0)
-                                       bdev->bd_invalidated = 1;
-                               nbd->disconnect = 0; /* we're connected now */
-                               return 0;
-                       } else {
-                               fput(file);
-                       }
+               sock = sockfd_lookup(arg, &err);
+               if (sock) {
+                       nbd->sock = sock;
+                       if (max_part > 0)
+                               bdev->bd_invalidated = 1;
+                       nbd->disconnect = 0; /* we're connected now */
+                       return 0;
                }
                return -EINVAL;
        }
@@ -697,12 +689,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 
        case NBD_DO_IT: {
                struct task_struct *thread;
-               struct file *file;
+               struct socket *sock;
                int error;
 
                if (nbd->pid)
                        return -EBUSY;
-               if (!nbd->file)
+               if (!nbd->sock)
                        return -EINVAL;
 
                mutex_unlock(&nbd->tx_lock);
@@ -731,15 +723,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
                if (error)
                        return error;
                sock_shutdown(nbd, 0);
-               file = nbd->file;
-               nbd->file = NULL;
+               sock = nbd->sock;
+               nbd->sock = NULL;
                nbd_clear_que(nbd);
                dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
                kill_bdev(bdev);
                queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
                set_device_ro(bdev, false);
-               if (file)
-                       fput(file);
+               if (sock)
+                       sockfd_put(sock);
                nbd->flags = 0;
                nbd->bytesize = 0;
                bdev->bd_inode->i_size = 0;
@@ -875,9 +867,7 @@ static int __init nbd_init(void)
 
        for (i = 0; i < nbds_max; i++) {
                struct gendisk *disk = nbd_dev[i].disk;
-               nbd_dev[i].file = NULL;
                nbd_dev[i].magic = NBD_MAGIC;
-               nbd_dev[i].flags = 0;
                INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
                spin_lock_init(&nbd_dev[i].queue_lock);
                INIT_LIST_HEAD(&nbd_dev[i].queue_head);
index 6928d094451d607795b4f2a07d7599e01712f824..60aafb8a1f2e24d38993aa57c114f913d5524735 100644 (file)
@@ -901,9 +901,9 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                if (len + offset > PAGE_SIZE)
                        len = PAGE_SIZE - offset;
 
-               src = buf->ops->map(pipe, buf, 1);
+               src = kmap_atomic(buf->page);
                memcpy(page_address(page) + offset, src + buf->offset, len);
-               buf->ops->unmap(pipe, buf, src);
+               kunmap_atomic(src);
 
                sg_set_page(&(sgl->sg[sgl->n]), page, len, offset);
        }
index a54b506ba7ca030230e99d91092638f0bcfc6cb8..b87b246111c0c9727709838c1860df6d0421fc00 100644 (file)
@@ -99,16 +99,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
                struct iovec   *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
                unsigned int    niov = tx->tx_niov;
 #endif
-               struct msghdr msg = {
-                       .msg_name       = NULL,
-                       .msg_namelen    = 0,
-                       .msg_iov        = scratchiov,
-                       .msg_iovlen     = niov,
-                       .msg_control    = NULL,
-                       .msg_controllen = 0,
-                       .msg_flags      = MSG_DONTWAIT
-               };
-               mm_segment_t oldmm = get_fs();
+               struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
                int  i;
 
                for (nob = i = 0; i < niov; i++) {
@@ -120,9 +111,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
                    nob < tx->tx_resid)
                        msg.msg_flags |= MSG_MORE;
 
-               set_fs (KERNEL_DS);
-               rc = sock_sendmsg(sock, &msg, nob);
-               set_fs (oldmm);
+               rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
        }
        return rc;
 }
@@ -174,16 +163,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
                unsigned int  niov = tx->tx_nkiov;
 #endif
-               struct msghdr msg = {
-                       .msg_name       = NULL,
-                       .msg_namelen    = 0,
-                       .msg_iov        = scratchiov,
-                       .msg_iovlen     = niov,
-                       .msg_control    = NULL,
-                       .msg_controllen = 0,
-                       .msg_flags      = MSG_DONTWAIT
-               };
-               mm_segment_t  oldmm = get_fs();
+               struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
                int        i;
 
                for (nob = i = 0; i < niov; i++) {
@@ -196,9 +176,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
                    nob < tx->tx_resid)
                        msg.msg_flags |= MSG_MORE;
 
-               set_fs (KERNEL_DS);
-               rc = sock_sendmsg(sock, &msg, nob);
-               set_fs (oldmm);
+               rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
 
                for (i = 0; i < niov; i++)
                        kunmap(kiov[i].kiov_page);
@@ -237,15 +215,8 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
 #endif
        struct iovec *iov = conn->ksnc_rx_iov;
        struct msghdr msg = {
-               .msg_name       = NULL,
-               .msg_namelen    = 0,
-               .msg_iov        = scratchiov,
-               .msg_iovlen     = niov,
-               .msg_control    = NULL,
-               .msg_controllen = 0,
                .msg_flags      = 0
        };
-       mm_segment_t oldmm = get_fs();
        int       nob;
        int       i;
        int       rc;
@@ -263,10 +234,8 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
        }
        LASSERT (nob <= conn->ksnc_rx_nob_wanted);
 
-       set_fs (KERNEL_DS);
-       rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
-       /* NB this is just a boolean..........................^ */
-       set_fs (oldmm);
+       rc = kernel_recvmsg(conn->ksnc_sock, &msg,
+               (struct kvec *)scratchiov, niov, nob, MSG_DONTWAIT);
 
        saved_csum = 0;
        if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
@@ -355,14 +324,8 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
 #endif
        lnet_kiov_t   *kiov = conn->ksnc_rx_kiov;
        struct msghdr msg = {
-               .msg_name       = NULL,
-               .msg_namelen    = 0,
-               .msg_iov        = scratchiov,
-               .msg_control    = NULL,
-               .msg_controllen = 0,
                .msg_flags      = 0
        };
-       mm_segment_t oldmm = get_fs();
        int       nob;
        int       i;
        int       rc;
@@ -370,13 +333,14 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
        void    *addr;
        int       sum;
        int       fragnob;
+       int n;
 
        /* NB we can't trust socket ops to either consume our iovs
         * or leave them alone. */
        addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages);
        if (addr != NULL) {
                nob = scratchiov[0].iov_len;
-               msg.msg_iovlen = 1;
+               n = 1;
 
        } else {
                for (nob = i = 0; i < niov; i++) {
@@ -384,15 +348,13 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
                        scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
                                                 kiov[i].kiov_offset;
                }
-               msg.msg_iovlen = niov;
+               n = niov;
        }
 
        LASSERT (nob <= conn->ksnc_rx_nob_wanted);
 
-       set_fs (KERNEL_DS);
-       rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT);
-       /* NB this is just a boolean.......................^ */
-       set_fs (oldmm);
+       rc = kernel_recvmsg(conn->ksnc_sock, &msg,
+                       (struct kvec *)scratchiov, n, nob, MSG_DONTWAIT);
 
        if (conn->ksnc_msg.ksm_csum != 0) {
                for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
index e6069d78af6ba2e6c7ca02d1fe890a60e2bbad8a..7539fe16d76f1d9889745d0fc00f8124cbd01e92 100644 (file)
@@ -265,17 +265,11 @@ libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
         * empty enough to take the whole message immediately */
 
        for (;;) {
-               struct iovec  iov = {
+               struct kvec  iov = {
                        .iov_base = buffer,
                        .iov_len  = nob
                };
                struct msghdr msg = {
-                       .msg_name       = NULL,
-                       .msg_namelen    = 0,
-                       .msg_iov        = &iov,
-                       .msg_iovlen     = 1,
-                       .msg_control    = NULL,
-                       .msg_controllen = 0,
                        .msg_flags      = (timeout == 0) ? MSG_DONTWAIT : 0
                };
 
@@ -297,11 +291,9 @@ libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
                        }
                }
 
-               set_fs (KERNEL_DS);
                then = jiffies;
-               rc = sock_sendmsg (sock, &msg, iov.iov_len);
+               rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
                ticks -= jiffies - then;
-               set_fs (oldmm);
 
                if (rc == nob)
                        return 0;
@@ -338,17 +330,11 @@ libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
        LASSERT (ticks > 0);
 
        for (;;) {
-               struct iovec  iov = {
+               struct kvec  iov = {
                        .iov_base = buffer,
                        .iov_len  = nob
                };
                struct msghdr msg = {
-                       .msg_name       = NULL,
-                       .msg_namelen    = 0,
-                       .msg_iov        = &iov,
-                       .msg_iovlen     = 1,
-                       .msg_control    = NULL,
-                       .msg_controllen = 0,
                        .msg_flags      = 0
                };
 
@@ -367,11 +353,9 @@ libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
                        return rc;
                }
 
-               set_fs(KERNEL_DS);
                then = jiffies;
-               rc = sock_recvmsg(sock, &msg, iov.iov_len, 0);
+               rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
                ticks -= jiffies - then;
-               set_fs(oldmm);
 
                if (rc < 0)
                        return rc;
index ab06891f7fc7cf68c41cb82119a393d11a1797da..80d48b5ae24706655c0290ad01f59bb1591f3912 100644 (file)
@@ -115,27 +115,6 @@ failed:
        return rc;
 }
 
-static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
-{
-       struct inode *inode = dentry->d_inode;
-       struct ptlrpc_request *request;
-       char *symname;
-       int rc;
-
-       CDEBUG(D_VFSTRACE, "VFS Op\n");
-
-       ll_inode_size_lock(inode);
-       rc = ll_readlink_internal(inode, &request, &symname);
-       if (rc)
-               GOTO(out, rc);
-
-       rc = vfs_readlink(dentry, buffer, buflen, symname);
- out:
-       ptlrpc_req_finished(request);
-       ll_inode_size_unlock(inode);
-       return rc;
-}
-
 static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
@@ -175,7 +154,7 @@ static void ll_put_link(struct dentry *dentry, struct nameidata *nd, void *cooki
 }
 
 struct inode_operations ll_fast_symlink_inode_operations = {
-       .readlink       = ll_readlink,
+       .readlink       = generic_readlink,
        .setattr        = ll_setattr,
        .follow_link    = ll_follow_link,
        .put_link       = ll_put_link,
index 773d8ca07a004c6ec7972ae62f9be9d266c366a4..de692d7011a5cc4937298e5d3d697f519f0c9d04 100644 (file)
@@ -86,7 +86,6 @@ static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
        struct stub_device *sdev = dev_get_drvdata(dev);
        int sockfd = 0;
        struct socket *socket;
-       ssize_t err = -EINVAL;
        int rv;
 
        if (!sdev) {
@@ -99,6 +98,7 @@ static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
                return -EINVAL;
 
        if (sockfd != -1) {
+               int err;
                dev_info(dev, "stub up\n");
 
                spin_lock_irq(&sdev->ud.lock);
@@ -108,7 +108,7 @@ static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
                        goto err;
                }
 
-               socket = sockfd_to_socket(sockfd);
+               socket = sockfd_lookup(sockfd, &err);
                if (!socket)
                        goto err;
 
@@ -141,7 +141,7 @@ static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
 
 err:
        spin_unlock_irq(&sdev->ud.lock);
-       return err;
+       return -EINVAL;
 }
 static DEVICE_ATTR(usbip_sockfd, S_IWUSR, NULL, store_sockfd);
 
@@ -211,7 +211,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
         * not touch NULL socket.
         */
        if (ud->tcp_socket) {
-               fput(ud->tcp_socket->file);
+               sockfd_put(ud->tcp_socket);
                ud->tcp_socket = NULL;
        }
 
index 184fa70365db3e32a37f55efc819858b431578d6..facaaf003f19931b2f15603568bb565f3de40607 100644 (file)
@@ -382,31 +382,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(usbip_recv);
 
-struct socket *sockfd_to_socket(unsigned int sockfd)
-{
-       struct socket *socket;
-       struct file *file;
-       struct inode *inode;
-
-       file = fget(sockfd);
-       if (!file) {
-               pr_err("invalid sockfd\n");
-               return NULL;
-       }
-
-       inode = file_inode(file);
-
-       if (!inode || !S_ISSOCK(inode->i_mode)) {
-               fput(file);
-               return NULL;
-       }
-
-       socket = SOCKET_I(inode);
-
-       return socket;
-}
-EXPORT_SYMBOL_GPL(sockfd_to_socket);
-
 /* there may be more cases to tweak the flags. */
 static unsigned int tweak_transfer_flags(unsigned int flags)
 {
index 732fb636a1e5b26c19b816448f7faa08391f5a35..f555d834f134a8a72e52751de62d3305bde27ecc 100644 (file)
@@ -299,7 +299,6 @@ void usbip_dump_urb(struct urb *purb);
 void usbip_dump_header(struct usbip_header *pdu);
 
 int usbip_recv(struct socket *sock, void *buf, int size);
-struct socket *sockfd_to_socket(unsigned int sockfd);
 
 void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd,
                    int pack);
index 1e84577230ef4120f4847b6ca67af1e028a102cb..70e17551943dc45bb49edfd4f3ba9e38a87df8fe 100644 (file)
@@ -788,7 +788,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
 
        /* active connection is closed */
        if (vdev->ud.tcp_socket) {
-               fput(vdev->ud.tcp_socket->file);
+               sockfd_put(vdev->ud.tcp_socket);
                vdev->ud.tcp_socket = NULL;
        }
        pr_info("release socket\n");
@@ -835,7 +835,7 @@ static void vhci_device_reset(struct usbip_device *ud)
        vdev->udev = NULL;
 
        if (ud->tcp_socket) {
-               fput(ud->tcp_socket->file);
+               sockfd_put(ud->tcp_socket);
                ud->tcp_socket = NULL;
        }
        ud->status = VDEV_ST_NULL;
index e0980324fb0366e06a39b84ca26c109b36381e35..47bddcdde0a621330a063d297b132f8da8396b72 100644 (file)
@@ -176,6 +176,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
        struct socket *socket;
        int sockfd = 0;
        __u32 rhport = 0, devid = 0, speed = 0;
+       int err;
 
        /*
         * @rhport: port number of vhci_hcd
@@ -194,8 +195,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
                return -EINVAL;
 
        /* Extract socket from fd. */
-       /* The correct way to clean this up is to fput(socket->file). */
-       socket = sockfd_to_socket(sockfd);
+       socket = sockfd_lookup(sockfd, &err);
        if (!socket)
                return -EINVAL;
 
@@ -211,7 +211,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
                spin_unlock(&vdev->ud.lock);
                spin_unlock(&the_controller->lock);
 
-               fput(socket->file);
+               sockfd_put(socket);
 
                dev_err(dev, "port %d already used\n", rhport);
                return -EINVAL;
index e1e22e0f01e881fe2961dbf1c43cb56f4e22cade..be414d2b2b22d8005723b0c1c45674c3c33aa41b 100644 (file)
@@ -818,9 +818,9 @@ static int vhost_net_release(struct inode *inode, struct file *f)
        vhost_dev_cleanup(&n->dev, false);
        vhost_net_vq_reset(n);
        if (tx_sock)
-               fput(tx_sock->file);
+               sockfd_put(tx_sock);
        if (rx_sock)
-               fput(rx_sock->file);
+               sockfd_put(rx_sock);
        /* Make sure no callbacks are outstanding */
        synchronize_rcu_bh();
        /* We do an extra flush before freeing memory,
@@ -860,7 +860,7 @@ static struct socket *get_raw_socket(int fd)
        }
        return sock;
 err:
-       fput(sock->file);
+       sockfd_put(sock);
        return ERR_PTR(r);
 }
 
@@ -966,7 +966,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 
        if (oldsock) {
                vhost_net_flush_vq(n, index);
-               fput(oldsock->file);
+               sockfd_put(oldsock);
        }
 
        mutex_unlock(&n->dev.mutex);
@@ -978,7 +978,7 @@ err_used:
        if (ubufs)
                vhost_net_ubuf_put_wait_and_free(ubufs);
 err_ubufs:
-       fput(sock->file);
+       sockfd_put(sock);
 err_vq:
        mutex_unlock(&vq->mutex);
 err:
@@ -1009,9 +1009,9 @@ static long vhost_net_reset_owner(struct vhost_net *n)
 done:
        mutex_unlock(&n->dev.mutex);
        if (tx_sock)
-               fput(tx_sock->file);
+               sockfd_put(tx_sock);
        if (rx_sock)
-               fput(rx_sock->file);
+               sockfd_put(rx_sock);
        return err;
 }
 
index b1bc722b89aa6b99a6e2c8dcf64aa4dbbd0d82ff..6f0362b77806c61909aa37433a9e77eb77476cff 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1002,7 +1002,7 @@ struct bio_map_data {
 };
 
 static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
-                            struct sg_iovec *iov, int iov_count,
+                            const struct sg_iovec *iov, int iov_count,
                             int is_our_pages)
 {
        memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
@@ -1022,7 +1022,7 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs,
                       sizeof(struct sg_iovec) * iov_count, gfp_mask);
 }
 
-static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
+static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
                          int to_user, int from_user, int do_free_page)
 {
        int ret = 0, i;
@@ -1120,7 +1120,7 @@ EXPORT_SYMBOL(bio_uncopy_user);
  */
 struct bio *bio_copy_user_iov(struct request_queue *q,
                              struct rq_map_data *map_data,
-                             struct sg_iovec *iov, int iov_count,
+                             const struct sg_iovec *iov, int iov_count,
                              int write_to_vm, gfp_t gfp_mask)
 {
        struct bio_map_data *bmd;
@@ -1259,7 +1259,7 @@ EXPORT_SYMBOL(bio_copy_user);
 
 static struct bio *__bio_map_user_iov(struct request_queue *q,
                                      struct block_device *bdev,
-                                     struct sg_iovec *iov, int iov_count,
+                                     const struct sg_iovec *iov, int iov_count,
                                      int write_to_vm, gfp_t gfp_mask)
 {
        int i, j;
@@ -1407,7 +1407,7 @@ EXPORT_SYMBOL(bio_map_user);
  *     device. Returns an error pointer in case of error.
  */
 struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
-                            struct sg_iovec *iov, int iov_count,
+                            const struct sg_iovec *iov, int iov_count,
                             int write_to_vm, gfp_t gfp_mask)
 {
        struct bio *bio;
index ba0d2b05bb787a28e59629a05586441e8b9386af..552a8d13bc321f4d1cf64fb9b3171893e28e73e9 100644 (file)
@@ -1518,7 +1518,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
        BUG_ON(iocb->ki_pos != pos);
 
        blk_start_plug(&plug);
-       ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+       ret = __generic_file_aio_write(iocb, iov, nr_segs);
        if (ret > 0) {
                ssize_t err;
 
index c5998477fe60ef3f53dafd8e741e1f750be6daee..eb742c07e7a41aacdb595b0252a12b3584bbee83 100644 (file)
@@ -425,13 +425,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
                struct page *page = prepared_pages[pg];
                /*
                 * Copy data from userspace to the current page
-                *
-                * Disable pagefault to avoid recursive lock since
-                * the pages are already locked
                 */
-               pagefault_disable();
                copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
-               pagefault_enable();
 
                /* Flush processor's dcache for this page */
                flush_dcache_page(page);
@@ -1665,7 +1660,7 @@ again:
 static ssize_t __btrfs_direct_write(struct kiocb *iocb,
                                    const struct iovec *iov,
                                    unsigned long nr_segs, loff_t pos,
-                                   loff_t *ppos, size_t count, size_t ocount)
+                                   size_t count, size_t ocount)
 {
        struct file *file = iocb->ki_filp;
        struct iov_iter i;
@@ -1674,7 +1669,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
        loff_t endbyte;
        int err;
 
-       written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
+       written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
                                            count, ocount);
 
        if (written < 0 || written == count)
@@ -1693,7 +1688,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
        if (err)
                goto out;
        written += written_buffered;
-       *ppos = pos + written_buffered;
+       iocb->ki_pos = pos + written_buffered;
        invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
                                 endbyte >> PAGE_CACHE_SHIFT);
 out:
@@ -1725,7 +1720,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       loff_t *ppos = &iocb->ki_pos;
        u64 start_pos;
        u64 end_pos;
        ssize_t num_written = 0;
@@ -1796,7 +1790,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
        if (unlikely(file->f_flags & O_DIRECT)) {
                num_written = __btrfs_direct_write(iocb, iov, nr_segs,
-                                                  pos, ppos, count, ocount);
+                                                  pos, count, ocount);
        } else {
                struct iov_iter i;
 
@@ -1804,7 +1798,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
                num_written = __btrfs_buffered_write(file, &i, pos);
                if (num_written > 0)
-                       *ppos = pos + num_written;
+                       iocb->ki_pos = pos + num_written;
        }
 
        mutex_unlock(&inode->i_mutex);
index 8c53a2b15ecbaffcc19ab5f45b53c19174998c91..9ddb9fc7d923fa31299a8aba228f61973d3b429f 100644 (file)
@@ -2114,8 +2114,8 @@ EXPORT_SYMBOL(generic_write_end);
  * Returns true if all buffers which correspond to a file portion
  * we want to read are uptodate.
  */
-int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
-                                       unsigned long from)
+int block_is_partially_uptodate(struct page *page, unsigned long from,
+                                       unsigned long count)
 {
        unsigned block_start, block_end, blocksize;
        unsigned to;
@@ -2127,7 +2127,7 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
 
        head = page_buffers(page);
        blocksize = head->b_size;
-       to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+       to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
        to = from + to;
        if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
                return 0;
index 622f4696e48435f47759ab35869ce692cc697188..5b99bafc31d13011801bb1ab3002b59f6c41cad0 100644 (file)
@@ -124,7 +124,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
        /* check parameters */
        ret = -EOPNOTSUPP;
        if (!root->d_inode ||
-           !root->d_inode->i_op ||
            !root->d_inode->i_op->lookup ||
            !root->d_inode->i_op->mkdir ||
            !root->d_inode->i_op->setxattr ||
index 6494d9f673aa51490a59694d600bd8a7101559bf..c0a681705104fc7a8aae428169e3f342bb4cb8a4 100644 (file)
@@ -779,8 +779,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
        }
 
        ret = -EPERM;
-       if (!subdir->d_inode->i_op ||
-           !subdir->d_inode->i_op->setxattr ||
+       if (!subdir->d_inode->i_op->setxattr ||
            !subdir->d_inode->i_op->getxattr ||
            !subdir->d_inode->i_op->lookup ||
            !subdir->d_inode->i_op->mkdir ||
index 66075a4ad97900edbfaf98775d484c31c7496200..39da1c2efa5030216d18bc6bb3020a78afb4c5f6 100644 (file)
@@ -601,7 +601,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
                                            false);
                if (IS_ERR(req)) {
                        ret = PTR_ERR(req);
-                       goto out;
+                       break;
                }
 
                num_pages = calc_pages_for(page_align, len);
@@ -719,7 +719,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
                                            false);
                if (IS_ERR(req)) {
                        ret = PTR_ERR(req);
-                       goto out;
+                       break;
                }
 
                /*
@@ -972,6 +972,7 @@ retry_snap:
                }
        } else {
                loff_t old_size = inode->i_size;
+               struct iov_iter from;
                /*
                 * No need to acquire the i_truncate_mutex. Because
                 * the MDS revokes Fwb caps before sending truncate
@@ -979,9 +980,10 @@ retry_snap:
                 * are pending vmtruncate. So write and vmtruncate
                 * can not run at the same time
                 */
-               written = generic_file_buffered_write(iocb, iov, nr_segs,
-                                                     pos, &iocb->ki_pos,
-                                                     count, 0);
+               iov_iter_init(&from, iov, nr_segs, count, 0);
+               written = generic_perform_write(file, &from, pos);
+               if (likely(written >= 0))
+                       iocb->ki_pos = pos + written;
                if (inode->i_size > old_size)
                        ceph_fscache_update_objectsize(inode);
                mutex_unlock(&inode->i_mutex);
index 2c70cbe35d39c3b7df79dd0b7e2b3fde6401f8fe..df9c9141c0998383522b181664b9d57256bc99fa 100644 (file)
@@ -850,7 +850,6 @@ const struct inode_operations cifs_file_inode_ops = {
 /*     revalidate:cifs_revalidate, */
        .setattr = cifs_setattr,
        .getattr = cifs_getattr, /* do we need this anymore? */
-       .rename = cifs_rename,
        .permission = cifs_permission,
 #ifdef CONFIG_CIFS_XATTR
        .setxattr = cifs_setxattr,
index 216d7e99f9219317bd0f2567c898925df77ec68d..8807442c94dd3323cbb7f9f8283c4f1c1a2d1480 100644 (file)
@@ -2579,19 +2579,32 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
        struct cifsInodeInfo *cinode = CIFS_I(inode);
        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
        ssize_t rc = -EACCES;
-       loff_t lock_pos = pos;
+       loff_t lock_pos = iocb->ki_pos;
 
-       if (file->f_flags & O_APPEND)
-               lock_pos = i_size_read(inode);
        /*
         * We need to hold the sem to be sure nobody modifies lock list
         * with a brlock that prevents writing.
         */
        down_read(&cinode->lock_sem);
+       mutex_lock(&inode->i_mutex);
+       if (file->f_flags & O_APPEND)
+               lock_pos = i_size_read(inode);
        if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs),
                                     server->vals->exclusive_lock_type, NULL,
-                                    CIFS_WRITE_OP))
-               rc = generic_file_aio_write(iocb, iov, nr_segs, pos);
+                                    CIFS_WRITE_OP)) {
+               rc = __generic_file_aio_write(iocb, iov, nr_segs);
+               mutex_unlock(&inode->i_mutex);
+
+               if (rc > 0) {
+                       ssize_t err;
+
+                       err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+                       if (rc < 0)
+                               rc = err;
+               }
+       } else {
+               mutex_unlock(&inode->i_mutex);
+       }
        up_read(&cinode->lock_sem);
        return rc;
 }
@@ -2727,56 +2740,27 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
 /**
  * cifs_readdata_to_iov - copy data from pages in response to an iovec
  * @rdata:     the readdata response with list of pages holding data
- * @iov:       vector in which we should copy the data
- * @nr_segs:   number of segments in vector
- * @offset:    offset into file of the first iovec
- * @copied:    used to return the amount of data copied to the iov
+ * @iter:      destination for our data
  *
  * This function copies data from a list of pages in a readdata response into
  * an array of iovecs. It will first calculate where the data should go
  * based on the info in the readdata and then copy the data into that spot.
  */
-static ssize_t
-cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
-                       unsigned long nr_segs, loff_t offset, ssize_t *copied)
+static int
+cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
 {
-       int rc = 0;
-       struct iov_iter ii;
-       size_t pos = rdata->offset - offset;
-       ssize_t remaining = rdata->bytes;
-       unsigned char *pdata;
+       size_t remaining = rdata->bytes;
        unsigned int i;
 
-       /* set up iov_iter and advance to the correct offset */
-       iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
-       iov_iter_advance(&ii, pos);
-
-       *copied = 0;
        for (i = 0; i < rdata->nr_pages; i++) {
-               ssize_t copy;
                struct page *page = rdata->pages[i];
-
-               /* copy a whole page or whatever's left */
-               copy = min_t(ssize_t, remaining, PAGE_SIZE);
-
-               /* ...but limit it to whatever space is left in the iov */
-               copy = min_t(ssize_t, copy, iov_iter_count(&ii));
-
-               /* go while there's data to be copied and no errors */
-               if (copy && !rc) {
-                       pdata = kmap(page);
-                       rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
-                                               (int)copy);
-                       kunmap(page);
-                       if (!rc) {
-                               *copied += copy;
-                               remaining -= copy;
-                               iov_iter_advance(&ii, copy);
-                       }
-               }
+               size_t copy = min(remaining, PAGE_SIZE);
+               size_t written = copy_page_to_iter(page, 0, copy, iter);
+               remaining -= written;
+               if (written < copy && iov_iter_count(iter) > 0)
+                       break;
        }
-
-       return rc;
+       return remaining ? -EFAULT : 0;
 }
 
 static void
@@ -2837,20 +2821,21 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
        return total_read > 0 ? total_read : result;
 }
 
-static ssize_t
-cifs_iovec_read(struct file *file, const struct iovec *iov,
-                unsigned long nr_segs, loff_t *poffset)
+ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
+                              unsigned long nr_segs, loff_t pos)
 {
+       struct file *file = iocb->ki_filp;
        ssize_t rc;
        size_t len, cur_len;
        ssize_t total_read = 0;
-       loff_t offset = *poffset;
+       loff_t offset = pos;
        unsigned int npages;
        struct cifs_sb_info *cifs_sb;
        struct cifs_tcon *tcon;
        struct cifsFileInfo *open_file;
        struct cifs_readdata *rdata, *tmp;
        struct list_head rdata_list;
+       struct iov_iter to;
        pid_t pid;
 
        if (!nr_segs)
@@ -2860,6 +2845,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
        if (!len)
                return 0;
 
+       iov_iter_init(&to, iov, nr_segs, len, 0);
+
        INIT_LIST_HEAD(&rdata_list);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        open_file = file->private_data;
@@ -2917,55 +2904,44 @@ error:
        if (!list_empty(&rdata_list))
                rc = 0;
 
+       len = iov_iter_count(&to);
        /* the loop below should proceed in the order of increasing offsets */
-restart_loop:
        list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
+       again:
                if (!rc) {
-                       ssize_t copied;
-
                        /* FIXME: freezable sleep too? */
                        rc = wait_for_completion_killable(&rdata->done);
                        if (rc)
                                rc = -EINTR;
-                       else if (rdata->result)
+                       else if (rdata->result) {
                                rc = rdata->result;
-                       else {
-                               rc = cifs_readdata_to_iov(rdata, iov,
-                                                       nr_segs, *poffset,
-                                                       &copied);
-                               total_read += copied;
+                               /* resend call if it's a retryable error */
+                               if (rc == -EAGAIN) {
+                                       rc = cifs_retry_async_readv(rdata);
+                                       goto again;
+                               }
+                       } else {
+                               rc = cifs_readdata_to_iov(rdata, &to);
                        }
 
-                       /* resend call if it's a retryable error */
-                       if (rc == -EAGAIN) {
-                               rc = cifs_retry_async_readv(rdata);
-                               goto restart_loop;
-                       }
                }
                list_del_init(&rdata->list);
                kref_put(&rdata->refcount, cifs_uncached_readdata_release);
        }
 
+       total_read = len - iov_iter_count(&to);
+
        cifs_stats_bytes_read(tcon, total_read);
-       *poffset += total_read;
 
        /* mask nodata case */
        if (rc == -ENODATA)
                rc = 0;
 
-       return total_read ? total_read : rc;
-}
-
-ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
-                              unsigned long nr_segs, loff_t pos)
-{
-       ssize_t read;
-
-       read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
-       if (read > 0)
-               iocb->ki_pos = pos;
-
-       return read;
+       if (total_read) {
+               iocb->ki_pos = pos + total_read;
+               return total_read;
+       }
+       return rc;
 }
 
 ssize_t
index 9e81c630dfa76469cdd452e179882403a93b03c8..476f3ebf437ef40ddd7432200080825b7e9e992c 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -813,7 +813,7 @@ EXPORT_SYMBOL(kernel_read);
 
 ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
 {
-       ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
+       ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
        if (res > 0)
                flush_icache_range(addr, addr + len);
        return res;
index 4e508fc83dcf1b0b9b2934e4cf69506e063b0f6e..ca7502d89fdee07b96585c768854375b207daaf6 100644 (file)
@@ -146,7 +146,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
                        overwrite = 1;
        }
 
-       ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+       ret = __generic_file_aio_write(iocb, iov, nr_segs);
        mutex_unlock(&inode->i_mutex);
 
        if (ret > 0) {
index b61293badfb1a9c98742a5bcc790751979251741..8f294cfac69749024c2c2e19d4b156755130e9ed 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
 
 int sysctl_nr_open __read_mostly = 1024*1024;
 int sysctl_nr_open_min = BITS_PER_LONG;
-int sysctl_nr_open_max = 1024 * 1024; /* raised later */
+/* our max() is unusable in constant expressions ;-/ */
+#define __const_max(x, y) ((x) < (y) ? (x) : (y))
+int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) &
+                        -BITS_PER_LONG;
 
 static void *alloc_fdmem(size_t size)
 {
@@ -429,12 +432,6 @@ void exit_files(struct task_struct *tsk)
        }
 }
 
-void __init files_defer_init(void)
-{
-       sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
-                            -BITS_PER_LONG;
-}
-
 struct files_struct init_files = {
        .count          = ATOMIC_INIT(1),
        .fdt            = &init_files.fdtab,
index 01071c4d752e1e41099c8082a31a55305c5c647d..a374f5033e97bab814977f8375f5c8cb1c899228 100644 (file)
@@ -52,7 +52,6 @@ static void file_free_rcu(struct rcu_head *head)
 static inline void file_free(struct file *f)
 {
        percpu_counter_dec(&nr_files);
-       file_check_state(f);
        call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
 }
 
@@ -178,47 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode,
        file->f_mapping = path->dentry->d_inode->i_mapping;
        file->f_mode = mode;
        file->f_op = fop;
-
-       /*
-        * These mounts don't really matter in practice
-        * for r/o bind mounts.  They aren't userspace-
-        * visible.  We do this for consistency, and so
-        * that we can do debugging checks at __fput()
-        */
-       if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
-               file_take_write(file);
-               WARN_ON(mnt_clone_write(path->mnt));
-       }
        if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_inc(path->dentry->d_inode);
        return file;
 }
 EXPORT_SYMBOL(alloc_file);
 
-/**
- * drop_file_write_access - give up ability to write to a file
- * @file: the file to which we will stop writing
- *
- * This is a central place which will give up the ability
- * to write to @file, along with access to write through
- * its vfsmount.
- */
-static void drop_file_write_access(struct file *file)
-{
-       struct vfsmount *mnt = file->f_path.mnt;
-       struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
-
-       put_write_access(inode);
-
-       if (special_file(inode->i_mode))
-               return;
-       if (file_check_writeable(file) != 0)
-               return;
-       __mnt_drop_write(mnt);
-       file_release_write(file);
-}
-
 /* the real guts of fput() - releasing the last reference to file
  */
 static void __fput(struct file *file)
@@ -253,8 +217,10 @@ static void __fput(struct file *file)
        put_pid(file->f_owner.pid);
        if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
                i_readcount_dec(inode);
-       if (file->f_mode & FMODE_WRITE)
-               drop_file_write_access(file);
+       if (file->f_mode & FMODE_WRITER) {
+               put_write_access(inode);
+               __mnt_drop_write(mnt);
+       }
        file->f_path.dentry = NULL;
        file->f_path.mnt = NULL;
        file->f_inode = NULL;
@@ -359,6 +325,5 @@ void __init files_init(unsigned long mempages)
 
        n = (mempages * (PAGE_SIZE / 1024)) / 10;
        files_stat.max_files = max_t(unsigned long, n, NR_FILE);
-       files_defer_init();
        percpu_counter_init(&nr_files, 0);
 } 
index 0a648bb455ae70b5a84e55ad9209c5894822d1be..aac71ce373e4494fa879c6cc268964a7f7641a37 100644 (file)
@@ -667,15 +667,15 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
                struct pipe_buffer *buf = cs->currbuf;
 
                if (!cs->write) {
-                       buf->ops->unmap(cs->pipe, buf, cs->mapaddr);
+                       kunmap_atomic(cs->mapaddr);
                } else {
-                       kunmap(buf->page);
+                       kunmap_atomic(cs->mapaddr);
                        buf->len = PAGE_SIZE - cs->len;
                }
                cs->currbuf = NULL;
                cs->mapaddr = NULL;
        } else if (cs->mapaddr) {
-               kunmap(cs->pg);
+               kunmap_atomic(cs->mapaddr);
                if (cs->write) {
                        flush_dcache_page(cs->pg);
                        set_page_dirty_lock(cs->pg);
@@ -706,7 +706,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
 
                        BUG_ON(!cs->nr_segs);
                        cs->currbuf = buf;
-                       cs->mapaddr = buf->ops->map(cs->pipe, buf, 0);
+                       cs->mapaddr = kmap_atomic(buf->page);
                        cs->len = buf->len;
                        cs->buf = cs->mapaddr + buf->offset;
                        cs->pipebufs++;
@@ -726,7 +726,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                        buf->len = 0;
 
                        cs->currbuf = buf;
-                       cs->mapaddr = kmap(page);
+                       cs->mapaddr = kmap_atomic(page);
                        cs->buf = cs->mapaddr;
                        cs->len = PAGE_SIZE;
                        cs->pipebufs++;
@@ -745,7 +745,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
                        return err;
                BUG_ON(err != 1);
                offset = cs->addr % PAGE_SIZE;
-               cs->mapaddr = kmap(cs->pg);
+               cs->mapaddr = kmap_atomic(cs->pg);
                cs->buf = cs->mapaddr + offset;
                cs->len = min(PAGE_SIZE - offset, cs->seglen);
                cs->seglen -= cs->len;
@@ -874,7 +874,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 out_fallback_unlock:
        unlock_page(newpage);
 out_fallback:
-       cs->mapaddr = buf->ops->map(cs->pipe, buf, 1);
+       cs->mapaddr = kmap_atomic(buf->page);
        cs->buf = cs->mapaddr + buf->offset;
 
        err = lock_request(cs->fc, cs->req);
index 48992cac714b413f644dbdd7c97ba87355831677..13f8bdec5110d1a7db12b2a262bb5e2ecb0e4f82 100644 (file)
@@ -1086,9 +1086,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
 
-               pagefault_disable();
                tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
-               pagefault_enable();
                flush_dcache_page(page);
 
                mark_page_accessed(page);
@@ -1237,8 +1235,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                goto out;
 
        if (file->f_flags & O_DIRECT) {
-               written = generic_file_direct_write(iocb, iov, &nr_segs,
-                                                   pos, &iocb->ki_pos,
+               written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 
                                                    count, ocount);
                if (written < 0 || written == count)
                        goto out;
index b29e42f05f3442b887e58ca1972a9333ec474bd7..d55297f2fa058c18512d0136b8a5437b29bc481c 100644 (file)
@@ -10,7 +10,7 @@ struct mnt_namespace {
        struct user_namespace   *user_ns;
        u64                     seq;    /* Sequence number to prevent loops */
        wait_queue_head_t poll;
-       int event;
+       u64 event;
 };
 
 struct mnt_pcp {
@@ -104,6 +104,9 @@ struct proc_mounts {
        struct mnt_namespace *ns;
        struct path root;
        int (*show)(struct seq_file *, struct vfsmount *);
+       void *cached_mount;
+       u64 cached_event;
+       loff_t cached_index;
 };
 
 #define proc_mounts(p) (container_of((p), struct proc_mounts, m))
index 88339f59efb5d9b3691f5ebbad7e5ef6eb59c4db..c6157c894fce234c333d5a2d787f81ee3e5e7ba9 100644 (file)
@@ -358,6 +358,7 @@ int generic_permission(struct inode *inode, int mask)
 
        return -EACCES;
 }
+EXPORT_SYMBOL(generic_permission);
 
 /*
  * We _really_ want to just do "generic_permission()" without
@@ -455,6 +456,7 @@ int inode_permission(struct inode *inode, int mask)
                return retval;
        return __inode_permission(inode, mask);
 }
+EXPORT_SYMBOL(inode_permission);
 
 /**
  * path_get - get a reference to a path
@@ -924,6 +926,7 @@ int follow_up(struct path *path)
        path->mnt = &parent->mnt;
        return 1;
 }
+EXPORT_SYMBOL(follow_up);
 
 /*
  * Perform an automount
@@ -1085,6 +1088,7 @@ int follow_down_one(struct path *path)
        }
        return 0;
 }
+EXPORT_SYMBOL(follow_down_one);
 
 static inline bool managed_dentry_might_block(struct dentry *dentry)
 {
@@ -1223,6 +1227,7 @@ int follow_down(struct path *path)
        }
        return 0;
 }
+EXPORT_SYMBOL(follow_down);
 
 /*
  * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
@@ -2025,6 +2030,7 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
                *path = nd.path;
        return res;
 }
+EXPORT_SYMBOL(kern_path);
 
 /**
  * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
@@ -2049,6 +2055,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
                *path = nd.path;
        return err;
 }
+EXPORT_SYMBOL(vfs_path_lookup);
 
 /*
  * Restricted form of lookup. Doesn't follow links, single-component only,
@@ -2111,6 +2118,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 
        return __lookup_hash(&this, base, 0);
 }
+EXPORT_SYMBOL(lookup_one_len);
 
 int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
                 struct path *path, int *empty)
@@ -2135,6 +2143,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
 {
        return user_path_at_empty(dfd, name, flags, path, NULL);
 }
+EXPORT_SYMBOL(user_path_at);
 
 /*
  * NB: most callers don't do anything directly with the reference to the
@@ -2477,6 +2486,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
        mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
        return NULL;
 }
+EXPORT_SYMBOL(lock_rename);
 
 void unlock_rename(struct dentry *p1, struct dentry *p2)
 {
@@ -2486,6 +2496,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
                mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
        }
 }
+EXPORT_SYMBOL(unlock_rename);
 
 int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                bool want_excl)
@@ -2506,6 +2517,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
                fsnotify_create(dir, dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_create);
 
 static int may_open(struct path *path, int acc_mode, int flag)
 {
@@ -3375,6 +3387,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
                fsnotify_create(dir, dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_mknod);
 
 static int may_mknod(umode_t mode)
 {
@@ -3464,6 +3477,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
                fsnotify_mkdir(dir, dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_mkdir);
 
 SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
 {
@@ -3518,6 +3532,7 @@ void dentry_unhash(struct dentry *dentry)
                __d_drop(dentry);
        spin_unlock(&dentry->d_lock);
 }
+EXPORT_SYMBOL(dentry_unhash);
 
 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
@@ -3555,6 +3570,7 @@ out:
                d_delete(dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_rmdir);
 
 static long do_rmdir(int dfd, const char __user *pathname)
 {
@@ -3672,6 +3688,7 @@ out:
 
        return error;
 }
+EXPORT_SYMBOL(vfs_unlink);
 
 /*
  * Make sure that the actual truncation of the file will occur outside its
@@ -3785,6 +3802,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
                fsnotify_create(dir, dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_symlink);
 
 SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
                int, newdfd, const char __user *, newname)
@@ -3893,6 +3911,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                fsnotify_link(dir, inode, new_dentry);
        return error;
 }
+EXPORT_SYMBOL(vfs_link);
 
 /*
  * Hardlinks are often used in delicate situations.  We avoid
@@ -4152,6 +4171,7 @@ out:
 
        return error;
 }
+EXPORT_SYMBOL(vfs_rename);
 
 SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
                int, newdfd, const char __user *, newname, unsigned int, flags)
@@ -4304,11 +4324,9 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
        return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
-int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
+int readlink_copy(char __user *buffer, int buflen, const char *link)
 {
-       int len;
-
-       len = PTR_ERR(link);
+       int len = PTR_ERR(link);
        if (IS_ERR(link))
                goto out;
 
@@ -4320,6 +4338,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const c
 out:
        return len;
 }
+EXPORT_SYMBOL(readlink_copy);
 
 /*
  * A helper for ->readlink().  This should be used *ONLY* for symlinks that
@@ -4337,11 +4356,12 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
        if (IS_ERR(cookie))
                return PTR_ERR(cookie);
 
-       res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
+       res = readlink_copy(buffer, buflen, nd_get_link(&nd));
        if (dentry->d_inode->i_op->put_link)
                dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
        return res;
 }
+EXPORT_SYMBOL(generic_readlink);
 
 /* get the link contents into pagecache */
 static char *page_getlink(struct dentry * dentry, struct page **ppage)
@@ -4361,14 +4381,14 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage)
 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
        struct page *page = NULL;
-       char *s = page_getlink(dentry, &page);
-       int res = vfs_readlink(dentry,buffer,buflen,s);
+       int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page));
        if (page) {
                kunmap(page);
                page_cache_release(page);
        }
        return res;
 }
+EXPORT_SYMBOL(page_readlink);
 
 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
 {
@@ -4376,6 +4396,7 @@ void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
        nd_set_link(nd, page_getlink(dentry, &page));
        return page;
 }
+EXPORT_SYMBOL(page_follow_link_light);
 
 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
 {
@@ -4386,6 +4407,7 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
                page_cache_release(page);
        }
 }
+EXPORT_SYMBOL(page_put_link);
 
 /*
  * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
@@ -4423,45 +4445,18 @@ retry:
 fail:
        return err;
 }
+EXPORT_SYMBOL(__page_symlink);
 
 int page_symlink(struct inode *inode, const char *symname, int len)
 {
        return __page_symlink(inode, symname, len,
                        !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
 }
+EXPORT_SYMBOL(page_symlink);
 
 const struct inode_operations page_symlink_inode_operations = {
        .readlink       = generic_readlink,
        .follow_link    = page_follow_link_light,
        .put_link       = page_put_link,
 };
-
-EXPORT_SYMBOL(user_path_at);
-EXPORT_SYMBOL(follow_down_one);
-EXPORT_SYMBOL(follow_down);
-EXPORT_SYMBOL(follow_up);
-EXPORT_SYMBOL(get_write_access); /* nfsd */
-EXPORT_SYMBOL(lock_rename);
-EXPORT_SYMBOL(lookup_one_len);
-EXPORT_SYMBOL(page_follow_link_light);
-EXPORT_SYMBOL(page_put_link);
-EXPORT_SYMBOL(page_readlink);
-EXPORT_SYMBOL(__page_symlink);
-EXPORT_SYMBOL(page_symlink);
 EXPORT_SYMBOL(page_symlink_inode_operations);
-EXPORT_SYMBOL(kern_path);
-EXPORT_SYMBOL(vfs_path_lookup);
-EXPORT_SYMBOL(inode_permission);
-EXPORT_SYMBOL(unlock_rename);
-EXPORT_SYMBOL(vfs_create);
-EXPORT_SYMBOL(vfs_link);
-EXPORT_SYMBOL(vfs_mkdir);
-EXPORT_SYMBOL(vfs_mknod);
-EXPORT_SYMBOL(generic_permission);
-EXPORT_SYMBOL(vfs_readlink);
-EXPORT_SYMBOL(vfs_rename);
-EXPORT_SYMBOL(vfs_rmdir);
-EXPORT_SYMBOL(vfs_symlink);
-EXPORT_SYMBOL(vfs_unlink);
-EXPORT_SYMBOL(dentry_unhash);
-EXPORT_SYMBOL(generic_readlink);
index 2ffc5a2905d463e828c387fbfb92a31f219f98bc..182bc41cd88711d593c4d997171c6ad483a87577 100644 (file)
@@ -52,7 +52,7 @@ static int __init set_mphash_entries(char *str)
 }
 __setup("mphash_entries=", set_mphash_entries);
 
-static int event;
+static u64 event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);
 static DEFINE_SPINLOCK(mnt_id_lock);
@@ -414,9 +414,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
  */
 int __mnt_want_write_file(struct file *file)
 {
-       struct inode *inode = file_inode(file);
-
-       if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
+       if (!(file->f_mode & FMODE_WRITER))
                return __mnt_want_write(file->f_path.mnt);
        else
                return mnt_clone_write(file->f_path.mnt);
@@ -570,13 +568,17 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 static void free_vfsmnt(struct mount *mnt)
 {
        kfree(mnt->mnt_devname);
-       mnt_free_id(mnt);
 #ifdef CONFIG_SMP
        free_percpu(mnt->mnt_pcp);
 #endif
        kmem_cache_free(mnt_cache, mnt);
 }
 
+static void delayed_free_vfsmnt(struct rcu_head *head)
+{
+       free_vfsmnt(container_of(head, struct mount, mnt_rcu));
+}
+
 /* call under rcu_read_lock */
 bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
 {
@@ -848,6 +850,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 
        root = mount_fs(type, flags, name, data);
        if (IS_ERR(root)) {
+               mnt_free_id(mnt);
                free_vfsmnt(mnt);
                return ERR_CAST(root);
        }
@@ -885,7 +888,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                        goto out_free;
        }
 
-       mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
+       mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
        /* Don't allow unprivileged users to change mount flags */
        if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
                mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
@@ -928,20 +931,11 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
        return mnt;
 
  out_free:
+       mnt_free_id(mnt);
        free_vfsmnt(mnt);
        return ERR_PTR(err);
 }
 
-static void delayed_free(struct rcu_head *head)
-{
-       struct mount *mnt = container_of(head, struct mount, mnt_rcu);
-       kfree(mnt->mnt_devname);
-#ifdef CONFIG_SMP
-       free_percpu(mnt->mnt_pcp);
-#endif
-       kmem_cache_free(mnt_cache, mnt);
-}
-
 static void mntput_no_expire(struct mount *mnt)
 {
 put_again:
@@ -991,7 +985,7 @@ put_again:
        dput(mnt->mnt.mnt_root);
        deactivate_super(mnt->mnt.mnt_sb);
        mnt_free_id(mnt);
-       call_rcu(&mnt->mnt_rcu, delayed_free);
+       call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
 }
 
 void mntput(struct vfsmount *mnt)
@@ -1100,14 +1094,29 @@ static void *m_start(struct seq_file *m, loff_t *pos)
        struct proc_mounts *p = proc_mounts(m);
 
        down_read(&namespace_sem);
-       return seq_list_start(&p->ns->list, *pos);
+       if (p->cached_event == p->ns->event) {
+               void *v = p->cached_mount;
+               if (*pos == p->cached_index)
+                       return v;
+               if (*pos == p->cached_index + 1) {
+                       v = seq_list_next(v, &p->ns->list, &p->cached_index);
+                       return p->cached_mount = v;
+               }
+       }
+
+       p->cached_event = p->ns->event;
+       p->cached_mount = seq_list_start(&p->ns->list, *pos);
+       p->cached_index = *pos;
+       return p->cached_mount;
 }
 
 static void *m_next(struct seq_file *m, void *v, loff_t *pos)
 {
        struct proc_mounts *p = proc_mounts(m);
 
-       return seq_list_next(v, &p->ns->list, pos);
+       p->cached_mount = seq_list_next(v, &p->ns->list, pos);
+       p->cached_index = *pos;
+       return p->cached_mount;
 }
 
 static void m_stop(struct seq_file *m, void *v)
@@ -1661,9 +1670,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                if (err)
                        goto out;
                err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
+               lock_mount_hash();
                if (err)
                        goto out_cleanup_ids;
-               lock_mount_hash();
                for (p = source_mnt; p; p = next_mnt(p, source_mnt))
                        set_mnt_shared(p);
        } else {
@@ -1690,6 +1699,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
        return 0;
 
  out_cleanup_ids:
+       while (!hlist_empty(&tree_list)) {
+               child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+               umount_tree(child, 0);
+       }
+       unlock_mount_hash();
        cleanup_group_ids(source_mnt, NULL);
  out:
        return err;
@@ -2044,7 +2058,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
        struct mount *parent;
        int err;
 
-       mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT);
+       mnt_flags &= ~MNT_INTERNAL_FLAGS;
 
        mp = lock_mount(path);
        if (IS_ERR(mp))
index 81b4f643ecefda84444cca57481958bfca11ea06..e31e589369a49a930f45be2124114c7252c83723 100644 (file)
@@ -470,9 +470,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
 {
        struct ncp_mount_data_kernel data;
        struct ncp_server *server;
-       struct file *ncp_filp;
        struct inode *root_inode;
-       struct inode *sock_inode;
        struct socket *sock;
        int error;
        int default_bufsize;
@@ -541,18 +539,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
            !gid_valid(data.gid))
                goto out;
-       error = -EBADF;
-       ncp_filp = fget(data.ncp_fd);
-       if (!ncp_filp)
-               goto out;
-       error = -ENOTSOCK;
-       sock_inode = file_inode(ncp_filp);
-       if (!S_ISSOCK(sock_inode->i_mode))
-               goto out_fput;
-       sock = SOCKET_I(sock_inode);
+       sock = sockfd_lookup(data.ncp_fd, &error);
        if (!sock)
-               goto out_fput;
-               
+               goto out;
+
        if (sock->type == SOCK_STREAM)
                default_bufsize = 0xF000;
        else
@@ -574,27 +564,16 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
        if (error)
                goto out_fput;
 
-       server->ncp_filp = ncp_filp;
        server->ncp_sock = sock;
        
        if (data.info_fd != -1) {
-               struct socket *info_sock;
-
-               error = -EBADF;
-               server->info_filp = fget(data.info_fd);
-               if (!server->info_filp)
-                       goto out_bdi;
-               error = -ENOTSOCK;
-               sock_inode = file_inode(server->info_filp);
-               if (!S_ISSOCK(sock_inode->i_mode))
-                       goto out_fput2;
-               info_sock = SOCKET_I(sock_inode);
+               struct socket *info_sock = sockfd_lookup(data.info_fd, &error);
                if (!info_sock)
-                       goto out_fput2;
+                       goto out_bdi;
+               server->info_sock = info_sock;
                error = -EBADFD;
                if (info_sock->type != SOCK_STREAM)
                        goto out_fput2;
-               server->info_sock = info_sock;
        }
 
 /*     server->lock = 0;       */
@@ -766,17 +745,12 @@ out_nls:
        mutex_destroy(&server->root_setup_lock);
        mutex_destroy(&server->mutex);
 out_fput2:
-       if (server->info_filp)
-               fput(server->info_filp);
+       if (server->info_sock)
+               sockfd_put(server->info_sock);
 out_bdi:
        bdi_destroy(&server->bdi);
 out_fput:
-       /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
-        * 
-        * The previously used put_filp(ncp_filp); was bogus, since
-        * it doesn't perform proper unlocking.
-        */
-       fput(ncp_filp);
+       sockfd_put(sock);
 out:
        put_pid(data.wdog_pid);
        sb->s_fs_info = NULL;
@@ -809,9 +783,9 @@ static void ncp_put_super(struct super_block *sb)
        mutex_destroy(&server->root_setup_lock);
        mutex_destroy(&server->mutex);
 
-       if (server->info_filp)
-               fput(server->info_filp);
-       fput(server->ncp_filp);
+       if (server->info_sock)
+               sockfd_put(server->info_sock);
+       sockfd_put(server->ncp_sock);
        kill_pid(server->m.wdog_pid, SIGTERM, 1);
        put_pid(server->m.wdog_pid);
 
index b81e97adc5a9b9862e19ce0b25ab83a652909565..7fa17e4593669379d420fafa11d355276a6eba72 100644 (file)
@@ -45,9 +45,7 @@ struct ncp_server {
 
        __u8 name_space[NCP_NUMBER_OF_VOLUMES + 2];
 
-       struct file *ncp_filp;  /* File pointer to ncp socket */
        struct socket *ncp_sock;/* ncp socket */
-       struct file *info_filp;
        struct socket *info_sock;
 
        u8 sequence;
index 9d8153ebacfb1c2784543eb319b4de4cdf5f16e7..f47af5e6e23037ed0c82b17274b098d00a617517 100644 (file)
@@ -1704,8 +1704,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
        iput(bvi);
 skip_large_index_stuff:
        /* Setup the operations for this index inode. */
-       vi->i_op = NULL;
-       vi->i_fop = NULL;
        vi->i_mapping->a_ops = &ntfs_mst_aops;
        vi->i_blocks = ni->allocated_size >> 9;
        /*
index eb649d23a4de24e4cde6860389528e7b2ef919dc..dfda2ffdb16c15aa8338b1de6f58ccc42ff5b9ba 100644 (file)
@@ -916,57 +916,30 @@ static struct o2net_msg_handler *o2net_handler_get(u32 msg_type, u32 key)
 
 static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
 {
-       int ret;
-       mm_segment_t oldfs;
-       struct kvec vec = {
-               .iov_len = len,
-               .iov_base = data,
-       };
-       struct msghdr msg = {
-               .msg_iovlen = 1,
-               .msg_iov = (struct iovec *)&vec,
-                       .msg_flags = MSG_DONTWAIT,
-       };
-
-       oldfs = get_fs();
-       set_fs(get_ds());
-       ret = sock_recvmsg(sock, &msg, len, msg.msg_flags);
-       set_fs(oldfs);
-
-       return ret;
+       struct kvec vec = { .iov_len = len, .iov_base = data, };
+       struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
+       return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags);
 }
 
 static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
                              size_t veclen, size_t total)
 {
        int ret;
-       mm_segment_t oldfs;
-       struct msghdr msg = {
-               .msg_iov = (struct iovec *)vec,
-               .msg_iovlen = veclen,
-       };
+       struct msghdr msg;
 
        if (sock == NULL) {
                ret = -EINVAL;
                goto out;
        }
 
-       oldfs = get_fs();
-       set_fs(get_ds());
-       ret = sock_sendmsg(sock, &msg, total);
-       set_fs(oldfs);
-       if (ret != total) {
-               mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret,
-                    total);
-               if (ret >= 0)
-                       ret = -EPIPE; /* should be smarter, I bet */
-               goto out;
-       }
-
-       ret = 0;
+       ret = kernel_sendmsg(sock, &msg, vec, veclen, total);
+       if (likely(ret == total))
+               return 0;
+       mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total);
+       if (ret >= 0)
+               ret = -EPIPE; /* should be smarter, I bet */
 out:
-       if (ret < 0)
-               mlog(0, "returning error: %d\n", ret);
+       mlog(0, "returning error: %d\n", ret);
        return ret;
 }
 
index ff33c5ef87f2b826532af886436be9adcc8f7046..8970dcf74de53e71539e8fad7042a55f66a2c713 100644 (file)
@@ -2367,15 +2367,18 @@ relock:
 
        if (direct_io) {
                written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
-                                                   ppos, count, ocount);
+                                                   count, ocount);
                if (written < 0) {
                        ret = written;
                        goto out_dio;
                }
        } else {
+               struct iov_iter from;
+               iov_iter_init(&from, iov, nr_segs, count, 0);
                current->backing_dev_info = file->f_mapping->backing_dev_info;
-               written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
-                                                     ppos, count, 0);
+               written = generic_perform_write(file, &from, *ppos);
+               if (likely(written >= 0))
+                       iocb->ki_pos = *ppos + written;
                current->backing_dev_info = NULL;
        }
 
index 631aea815def32946433b8aebed9a312d0fc872c..3d30eb1fc95e383e50e91605d3526161bcfdebde 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -655,35 +655,6 @@ out:
        return error;
 }
 
-/*
- * You have to be very careful that these write
- * counts get cleaned up in error cases and
- * upon __fput().  This should probably never
- * be called outside of __dentry_open().
- */
-static inline int __get_file_write_access(struct inode *inode,
-                                         struct vfsmount *mnt)
-{
-       int error;
-       error = get_write_access(inode);
-       if (error)
-               return error;
-       /*
-        * Do not take mount writer counts on
-        * special files since no writes to
-        * the mount itself will occur.
-        */
-       if (!special_file(inode->i_mode)) {
-               /*
-                * Balanced in __fput()
-                */
-               error = __mnt_want_write(mnt);
-               if (error)
-                       put_write_access(inode);
-       }
-       return error;
-}
-
 int open_check_o_direct(struct file *f)
 {
        /* NB: we're sure to have correct a_ops only after f_op->open */
@@ -708,26 +679,28 @@ static int do_dentry_open(struct file *f,
        f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
                                FMODE_PREAD | FMODE_PWRITE;
 
-       if (unlikely(f->f_flags & O_PATH))
-               f->f_mode = FMODE_PATH;
-
        path_get(&f->f_path);
        inode = f->f_inode = f->f_path.dentry->d_inode;
-       if (f->f_mode & FMODE_WRITE) {
-               error = __get_file_write_access(inode, f->f_path.mnt);
-               if (error)
-                       goto cleanup_file;
-               if (!special_file(inode->i_mode))
-                       file_take_write(f);
-       }
-
        f->f_mapping = inode->i_mapping;
 
-       if (unlikely(f->f_mode & FMODE_PATH)) {
+       if (unlikely(f->f_flags & O_PATH)) {
+               f->f_mode = FMODE_PATH;
                f->f_op = &empty_fops;
                return 0;
        }
 
+       if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
+               error = get_write_access(inode);
+               if (unlikely(error))
+                       goto cleanup_file;
+               error = __mnt_want_write(f->f_path.mnt);
+               if (unlikely(error)) {
+                       put_write_access(inode);
+                       goto cleanup_file;
+               }
+               f->f_mode |= FMODE_WRITER;
+       }
+
        /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
        if (S_ISREG(inode->i_mode))
                f->f_mode |= FMODE_ATOMIC_POS;
@@ -764,18 +737,9 @@ static int do_dentry_open(struct file *f,
 
 cleanup_all:
        fops_put(f->f_op);
-       if (f->f_mode & FMODE_WRITE) {
+       if (f->f_mode & FMODE_WRITER) {
                put_write_access(inode);
-               if (!special_file(inode->i_mode)) {
-                       /*
-                        * We don't consider this a real
-                        * mnt_want/drop_write() pair
-                        * because it all happenend right
-                        * here, so just reset the state.
-                        */
-                       file_reset_write(f);
-                       __mnt_drop_write(f->f_path.mnt);
-               }
+               __mnt_drop_write(f->f_path.mnt);
        }
 cleanup_file:
        path_put(&f->f_path);
index 78fd0d0788dbc465c3d7a6ba674bb64a6ebea25d..034bffac3f9724c6121f4635ba9740d61e106d06 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -142,55 +142,6 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
        return 0;
 }
 
-static int
-pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
-                     int atomic)
-{
-       unsigned long copy;
-
-       while (len > 0) {
-               while (!iov->iov_len)
-                       iov++;
-               copy = min_t(unsigned long, len, iov->iov_len);
-
-               if (atomic) {
-                       if (__copy_to_user_inatomic(iov->iov_base, from, copy))
-                               return -EFAULT;
-               } else {
-                       if (copy_to_user(iov->iov_base, from, copy))
-                               return -EFAULT;
-               }
-               from += copy;
-               len -= copy;
-               iov->iov_base += copy;
-               iov->iov_len -= copy;
-       }
-       return 0;
-}
-
-/*
- * Attempt to pre-fault in the user memory, so we can use atomic copies.
- * Returns the number of bytes not faulted in.
- */
-static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
-{
-       while (!iov->iov_len)
-               iov++;
-
-       while (len > 0) {
-               unsigned long this_len;
-
-               this_len = min_t(unsigned long, len, iov->iov_len);
-               if (fault_in_pages_writeable(iov->iov_base, this_len))
-                       break;
-
-               len -= this_len;
-               iov++;
-       }
-
-       return len;
-}
-
 /*
  * Pre-fault in the user memory, so we can use atomic copies.
  */
@@ -225,52 +176,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
                page_cache_release(page);
 }
 
-/**
- * generic_pipe_buf_map - virtually map a pipe buffer
- * @pipe:      the pipe that the buffer belongs to
- * @buf:       the buffer that should be mapped
- * @atomic:    whether to use an atomic map
- *
- * Description:
- *     This function returns a kernel virtual address mapping for the
- *     pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
- *     and the caller has to be careful not to fault before calling
- *     the unmap function.
- *
- *     Note that this function calls kmap_atomic() if @atomic != 0.
- */
-void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
-                          struct pipe_buffer *buf, int atomic)
-{
-       if (atomic) {
-               buf->flags |= PIPE_BUF_FLAG_ATOMIC;
-               return kmap_atomic(buf->page);
-       }
-
-       return kmap(buf->page);
-}
-EXPORT_SYMBOL(generic_pipe_buf_map);
-
-/**
- * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
- * @pipe:      the pipe that the buffer belongs to
- * @buf:       the buffer that should be unmapped
- * @map_data:  the data that the mapping function returned
- *
- * Description:
- *     This function undoes the mapping that ->map() provided.
- */
-void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
-                           struct pipe_buffer *buf, void *map_data)
-{
-       if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
-               buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
-               kunmap_atomic(map_data);
-       } else
-               kunmap(buf->page);
-}
-EXPORT_SYMBOL(generic_pipe_buf_unmap);
-
 /**
  * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
  * @pipe:      the pipe that the buffer belongs to
@@ -351,8 +256,6 @@ EXPORT_SYMBOL(generic_pipe_buf_release);
 
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
        .can_merge = 1,
-       .map = generic_pipe_buf_map,
-       .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = anon_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
@@ -361,8 +264,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
 
 static const struct pipe_buf_operations packet_pipe_buf_ops = {
        .can_merge = 0,
-       .map = generic_pipe_buf_map,
-       .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = anon_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
@@ -379,12 +280,15 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
        ssize_t ret;
        struct iovec *iov = (struct iovec *)_iov;
        size_t total_len;
+       struct iov_iter iter;
 
        total_len = iov_length(iov, nr_segs);
        /* Null read succeeds. */
        if (unlikely(total_len == 0))
                return 0;
 
+       iov_iter_init(&iter, iov, nr_segs, total_len, 0);
+
        do_wakeup = 0;
        ret = 0;
        __pipe_lock(pipe);
@@ -394,9 +298,9 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                        int curbuf = pipe->curbuf;
                        struct pipe_buffer *buf = pipe->bufs + curbuf;
                        const struct pipe_buf_operations *ops = buf->ops;
-                       void *addr;
                        size_t chars = buf->len;
-                       int error, atomic;
+                       size_t written;
+                       int error;
 
                        if (chars > total_len)
                                chars = total_len;
@@ -408,21 +312,10 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                                break;
                        }
 
-                       atomic = !iov_fault_in_pages_write(iov, chars);
-redo:
-                       addr = ops->map(pipe, buf, atomic);
-                       error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
-                       ops->unmap(pipe, buf, addr);
-                       if (unlikely(error)) {
-                               /*
-                                * Just retry with the slow path if we failed.
-                                */
-                               if (atomic) {
-                                       atomic = 0;
-                                       goto redo;
-                               }
+                       written = copy_page_to_iter(buf->page, buf->offset, chars, &iter);
+                       if (unlikely(written < chars)) {
                                if (!ret)
-                                       ret = error;
+                                       ret = -EFAULT;
                                break;
                        }
                        ret += chars;
@@ -538,10 +431,16 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 
                        iov_fault_in_pages_read(iov, chars);
 redo1:
-                       addr = ops->map(pipe, buf, atomic);
+                       if (atomic)
+                               addr = kmap_atomic(buf->page);
+                       else
+                               addr = kmap(buf->page);
                        error = pipe_iov_copy_from_user(offset + addr, iov,
                                                        chars, atomic);
-                       ops->unmap(pipe, buf, addr);
+                       if (atomic)
+                               kunmap_atomic(addr);
+                       else
+                               kunmap(buf->page);
                        ret = error;
                        do_wakeup = 1;
                        if (error) {
index 88396df725b4bbe84dc7d57eaf7a259877e5d87c..302bf22c4a30762013dbbfd64d0353250101eb62 100644 (file)
@@ -164,46 +164,94 @@ static struct mount *propagation_next(struct mount *m,
        }
 }
 
-/*
- * return the source mount to be used for cloning
- *
- * @dest       the current destination mount
- * @last_dest          the last seen destination mount
- * @last_src   the last seen source mount
- * @type       return CL_SLAVE if the new mount has to be
- *             cloned as a slave.
- */
-static struct mount *get_source(struct mount *dest,
-                               struct mount *last_dest,
-                               struct mount *last_src,
-                               int *type)
+static struct mount *next_group(struct mount *m, struct mount *origin)
 {
-       struct mount *p_last_src = NULL;
-       struct mount *p_last_dest = NULL;
-
-       while (last_dest != dest->mnt_master) {
-               p_last_dest = last_dest;
-               p_last_src = last_src;
-               last_dest = last_dest->mnt_master;
-               last_src = last_src->mnt_master;
+       while (1) {
+               while (1) {
+                       struct mount *next;
+                       if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+                               return first_slave(m);
+                       next = next_peer(m);
+                       if (m->mnt_group_id == origin->mnt_group_id) {
+                               if (next == origin)
+                                       return NULL;
+                       } else if (m->mnt_slave.next != &next->mnt_slave)
+                               break;
+                       m = next;
+               }
+               /* m is the last peer */
+               while (1) {
+                       struct mount *master = m->mnt_master;
+                       if (m->mnt_slave.next != &master->mnt_slave_list)
+                               return next_slave(m);
+                       m = next_peer(master);
+                       if (master->mnt_group_id == origin->mnt_group_id)
+                               break;
+                       if (master->mnt_slave.next == &m->mnt_slave)
+                               break;
+                       m = master;
+               }
+               if (m == origin)
+                       return NULL;
        }
+}
 
-       if (p_last_dest) {
-               do {
-                       p_last_dest = next_peer(p_last_dest);
-               } while (IS_MNT_NEW(p_last_dest));
-               /* is that a peer of the earlier? */
-               if (dest == p_last_dest) {
-                       *type = CL_MAKE_SHARED;
-                       return p_last_src;
+/* all accesses are serialized by namespace_sem */
+static struct user_namespace *user_ns;
+static struct mount *last_dest, *last_source, *dest_master;
+static struct mountpoint *mp;
+static struct hlist_head *list;
+
+static int propagate_one(struct mount *m)
+{
+       struct mount *child;
+       int type;
+       /* skip ones added by this propagate_mnt() */
+       if (IS_MNT_NEW(m))
+               return 0;
+       /* skip if mountpoint isn't covered by it */
+       if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
+               return 0;
+       if (m->mnt_group_id == last_dest->mnt_group_id) {
+               type = CL_MAKE_SHARED;
+       } else {
+               struct mount *n, *p;
+               for (n = m; ; n = p) {
+                       p = n->mnt_master;
+                       if (p == dest_master || IS_MNT_MARKED(p)) {
+                               while (last_dest->mnt_master != p) {
+                                       last_source = last_source->mnt_master;
+                                       last_dest = last_source->mnt_parent;
+                               }
+                               if (n->mnt_group_id != last_dest->mnt_group_id) {
+                                       last_source = last_source->mnt_master;
+                                       last_dest = last_source->mnt_parent;
+                               }
+                               break;
+                       }
                }
+               type = CL_SLAVE;
+               /* beginning of peer group among the slaves? */
+               if (IS_MNT_SHARED(m))
+                       type |= CL_MAKE_SHARED;
        }
-       /* slave of the earlier, then */
-       *type = CL_SLAVE;
-       /* beginning of peer group among the slaves? */
-       if (IS_MNT_SHARED(dest))
-               *type |= CL_MAKE_SHARED;
-       return last_src;
+               
+       /* Notice when we are propagating across user namespaces */
+       if (m->mnt_ns->user_ns != user_ns)
+               type |= CL_UNPRIVILEGED;
+       child = copy_tree(last_source, last_source->mnt.mnt_root, type);
+       if (IS_ERR(child))
+               return PTR_ERR(child);
+       mnt_set_mountpoint(m, mp, child);
+       last_dest = m;
+       last_source = child;
+       if (m->mnt_master != dest_master) {
+               read_seqlock_excl(&mount_lock);
+               SET_MNT_MARK(m->mnt_master);
+               read_sequnlock_excl(&mount_lock);
+       }
+       hlist_add_head(&child->mnt_hash, list);
+       return 0;
 }
 
 /*
@@ -222,56 +270,48 @@ static struct mount *get_source(struct mount *dest,
 int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
                    struct mount *source_mnt, struct hlist_head *tree_list)
 {
-       struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
-       struct mount *m, *child;
+       struct mount *m, *n;
        int ret = 0;
-       struct mount *prev_dest_mnt = dest_mnt;
-       struct mount *prev_src_mnt  = source_mnt;
-       HLIST_HEAD(tmp_list);
-
-       for (m = propagation_next(dest_mnt, dest_mnt); m;
-                       m = propagation_next(m, dest_mnt)) {
-               int type;
-               struct mount *source;
-
-               if (IS_MNT_NEW(m))
-                       continue;
-
-               source =  get_source(m, prev_dest_mnt, prev_src_mnt, &type);
-
-               /* Notice when we are propagating across user namespaces */
-               if (m->mnt_ns->user_ns != user_ns)
-                       type |= CL_UNPRIVILEGED;
-
-               child = copy_tree(source, source->mnt.mnt_root, type);
-               if (IS_ERR(child)) {
-                       ret = PTR_ERR(child);
-                       tmp_list = *tree_list;
-                       tmp_list.first->pprev = &tmp_list.first;
-                       INIT_HLIST_HEAD(tree_list);
+
+       /*
+        * we don't want to bother passing tons of arguments to
+        * propagate_one(); everything is serialized by namespace_sem,
+        * so globals will do just fine.
+        */
+       user_ns = current->nsproxy->mnt_ns->user_ns;
+       last_dest = dest_mnt;
+       last_source = source_mnt;
+       mp = dest_mp;
+       list = tree_list;
+       dest_master = dest_mnt->mnt_master;
+
+       /* all peers of dest_mnt, except dest_mnt itself */
+       for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
+               ret = propagate_one(n);
+               if (ret)
                        goto out;
-               }
+       }
 
-               if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
-                       mnt_set_mountpoint(m, dest_mp, child);
-                       hlist_add_head(&child->mnt_hash, tree_list);
-               } else {
-                       /*
-                        * This can happen if the parent mount was bind mounted
-                        * on some subdirectory of a shared/slave mount.
-                        */
-                       hlist_add_head(&child->mnt_hash, &tmp_list);
-               }
-               prev_dest_mnt = m;
-               prev_src_mnt  = child;
+       /* all slave groups */
+       for (m = next_group(dest_mnt, dest_mnt); m;
+                       m = next_group(m, dest_mnt)) {
+               /* everything in that slave group */
+               n = m;
+               do {
+                       ret = propagate_one(n);
+                       if (ret)
+                               goto out;
+                       n = next_peer(n);
+               } while (n != m);
        }
 out:
-       lock_mount_hash();
-       while (!hlist_empty(&tmp_list)) {
-               child = hlist_entry(tmp_list.first, struct mount, mnt_hash);
-               umount_tree(child, 0);
+       read_seqlock_excl(&mount_lock);
+       hlist_for_each_entry(n, tree_list, mnt_hash) {
+               m = n->mnt_parent;
+               if (m->mnt_master != dest_mnt->mnt_master)
+                       CLEAR_MNT_MARK(m->mnt_master);
        }
-       unlock_mount_hash();
+       read_sequnlock_excl(&mount_lock);
        return ret;
 }
 
index fc28a27fa89233d24b90bdd748477994b8713ecd..4a246358b03183994461d9eae5d8efe09db2e6dc 100644 (file)
@@ -16,6 +16,9 @@
 #define IS_MNT_NEW(m)  (!(m)->mnt_ns)
 #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
 #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
+#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
+#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
+#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
 
 #define CL_EXPIRE              0x01
 #define CL_SLAVE               0x02
index 9ae46b87470dd9fe9fe6962c689abb4a7500e697..89026095f2b51a081ab37098c4b8f3f1da762519 100644 (file)
@@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
        struct task_struct *task;
        void *ns;
        char name[50];
-       int len = -EACCES;
+       int res = -EACCES;
 
        task = get_proc_task(inode);
        if (!task)
@@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
        if (!ptrace_may_access(task, PTRACE_MODE_READ))
                goto out_put_task;
 
-       len = -ENOENT;
+       res = -ENOENT;
        ns = ns_ops->get(task);
        if (!ns)
                goto out_put_task;
 
        snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
-       len = strlen(name);
-
-       if (len > buflen)
-               len = buflen;
-       if (copy_to_user(buffer, name, len))
-               len = -EFAULT;
-
+       res = readlink_copy(buffer, buflen, name);
        ns_ops->put(ns);
 out_put_task:
        put_task_struct(task);
 out:
-       return len;
+       return res;
 }
 
 static const struct inode_operations proc_ns_link_inode_operations = {
index ffeb202ec942d3f3f83594d517e329bafa89ce98..4348bb8907c20a6abc211b9fa944eca005d6f47c 100644 (file)
@@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
        if (!tgid)
                return -ENOENT;
        sprintf(tmp, "%d", tgid);
-       return vfs_readlink(dentry,buffer,buflen,tmp);
+       return readlink_copy(buffer, buflen, tmp);
 }
 
 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
index 7be26f03a3f5813ed501bea520e79041af4466f7..1a81373947f33ec4e56b5e34e24814b4c267800a 100644 (file)
@@ -267,6 +267,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
        p->root = root;
        p->m.poll_event = ns->event;
        p->show = show;
+       p->cached_event = ~0ULL;
 
        return 0;
 
index 12028fa41def9c007b0217b6c4bc481868d93792..9bc07d2b53cf3a0e66605912386f9d2367601585 100644 (file)
@@ -136,8 +136,6 @@ error:
 
 const struct pipe_buf_operations page_cache_pipe_buf_ops = {
        .can_merge = 0,
-       .map = generic_pipe_buf_map,
-       .unmap = generic_pipe_buf_unmap,
        .confirm = page_cache_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = page_cache_pipe_buf_steal,
@@ -156,8 +154,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 
 static const struct pipe_buf_operations user_page_pipe_buf_ops = {
        .can_merge = 0,
-       .map = generic_pipe_buf_map,
-       .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = user_page_pipe_buf_steal,
@@ -547,8 +543,6 @@ EXPORT_SYMBOL(generic_file_splice_read);
 
 static const struct pipe_buf_operations default_pipe_buf_ops = {
        .can_merge = 0,
-       .map = generic_pipe_buf_map,
-       .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = generic_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
@@ -564,8 +558,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
 /* Pipe buffer operations for a socket and similar. */
 const struct pipe_buf_operations nosteal_pipe_buf_ops = {
        .can_merge = 0,
-       .map = generic_pipe_buf_map,
-       .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = generic_pipe_buf_release,
        .steal = generic_pipe_buf_nosteal,
@@ -767,13 +759,13 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                goto out;
 
        if (buf->page != page) {
-               char *src = buf->ops->map(pipe, buf, 1);
+               char *src = kmap_atomic(buf->page);
                char *dst = kmap_atomic(page);
 
                memcpy(dst + offset, src + buf->offset, this_len);
                flush_dcache_page(page);
                kunmap_atomic(dst);
-               buf->ops->unmap(pipe, buf, src);
+               kunmap_atomic(src);
        }
        ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
                                page, fsdata);
@@ -1067,9 +1059,9 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        void *data;
        loff_t tmp = sd->pos;
 
-       data = buf->ops->map(pipe, buf, 0);
+       data = kmap(buf->page);
        ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
-       buf->ops->unmap(pipe, buf, data);
+       kunmap(buf->page);
 
        return ret;
 }
@@ -1528,116 +1520,48 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                        struct splice_desc *sd)
 {
-       char *src;
-       int ret;
-
-       /*
-        * See if we can use the atomic maps, by prefaulting in the
-        * pages and doing an atomic copy
-        */
-       if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
-               src = buf->ops->map(pipe, buf, 1);
-               ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
-                                                       sd->len);
-               buf->ops->unmap(pipe, buf, src);
-               if (!ret) {
-                       ret = sd->len;
-                       goto out;
-               }
-       }
-
-       /*
-        * No dice, use slow non-atomic map and copy
-        */
-       src = buf->ops->map(pipe, buf, 0);
-
-       ret = sd->len;
-       if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
-               ret = -EFAULT;
-
-       buf->ops->unmap(pipe, buf, src);
-out:
-       if (ret > 0)
-               sd->u.userptr += ret;
-       return ret;
+       int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
+       return n == sd->len ? n : -EFAULT;
 }
 
 /*
  * For lack of a better implementation, implement vmsplice() to userspace
  * as a simple copy of the pipes pages to the user iov.
  */
-static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
                             unsigned long nr_segs, unsigned int flags)
 {
        struct pipe_inode_info *pipe;
        struct splice_desc sd;
-       ssize_t size;
-       int error;
        long ret;
+       struct iovec iovstack[UIO_FASTIOV];
+       struct iovec *iov = iovstack;
+       struct iov_iter iter;
+       ssize_t count = 0;
 
        pipe = get_pipe_info(file);
        if (!pipe)
                return -EBADF;
 
-       pipe_lock(pipe);
-
-       error = ret = 0;
-       while (nr_segs) {
-               void __user *base;
-               size_t len;
-
-               /*
-                * Get user address base and length for this iovec.
-                */
-               error = get_user(base, &iov->iov_base);
-               if (unlikely(error))
-                       break;
-               error = get_user(len, &iov->iov_len);
-               if (unlikely(error))
-                       break;
-
-               /*
-                * Sanity check this iovec. 0 read succeeds.
-                */
-               if (unlikely(!len))
-                       break;
-               if (unlikely(!base)) {
-                       error = -EFAULT;
-                       break;
-               }
-
-               if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
-                       error = -EFAULT;
-                       break;
-               }
-
-               sd.len = 0;
-               sd.total_len = len;
-               sd.flags = flags;
-               sd.u.userptr = base;
-               sd.pos = 0;
-
-               size = __splice_from_pipe(pipe, &sd, pipe_to_user);
-               if (size < 0) {
-                       if (!ret)
-                               ret = size;
-
-                       break;
-               }
-
-               ret += size;
+       ret = rw_copy_check_uvector(READ, uiov, nr_segs,
+                                   ARRAY_SIZE(iovstack), iovstack, &iov);
+       if (ret <= 0)
+               return ret;
 
-               if (size < len)
-                       break;
+       iov_iter_init(&iter, iov, nr_segs, count, 0);
 
-               nr_segs--;
-               iov++;
-       }
+       sd.len = 0;
+       sd.total_len = count;
+       sd.flags = flags;
+       sd.u.data = &iter;
+       sd.pos = 0;
 
+       pipe_lock(pipe);
+       ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
        pipe_unlock(pipe);
 
-       if (!ret)
-               ret = error;
+       if (iov != iovstack)
+               kfree(iov);
 
        return ret;
 }
index 1037637957c7670e1a66e6bf1a8e51c80fbcc49d..d2c170f8b035a4b21ef6eac2274e74b137346d56 100644 (file)
@@ -171,7 +171,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        } else
                up_write(&iinfo->i_data_sem);
 
-       retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+       retval = __generic_file_aio_write(iocb, iov, nr_segs);
        mutex_unlock(&inode->i_mutex);
 
        if (retval > 0) {
index 003c0051b62fa34ce086154413095b4fbdb4317d..79e96ce987331cad3aab2d0a7513ce19cad2fcb4 100644 (file)
@@ -699,7 +699,7 @@ xfs_file_dio_aio_write(
 
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
        ret = generic_file_direct_write(iocb, iovp,
-                       &nr_segs, pos, &iocb->ki_pos, count, ocount);
+                       &nr_segs, pos, count, ocount);
 
 out:
        xfs_rw_iunlock(ip, iolock);
@@ -715,7 +715,7 @@ xfs_file_buffered_aio_write(
        const struct iovec      *iovp,
        unsigned long           nr_segs,
        loff_t                  pos,
-       size_t                  ocount)
+       size_t                  count)
 {
        struct file             *file = iocb->ki_filp;
        struct address_space    *mapping = file->f_mapping;
@@ -724,7 +724,7 @@ xfs_file_buffered_aio_write(
        ssize_t                 ret;
        int                     enospc = 0;
        int                     iolock = XFS_IOLOCK_EXCL;
-       size_t                  count = ocount;
+       struct iov_iter         from;
 
        xfs_rw_ilock(ip, iolock);
 
@@ -732,14 +732,15 @@ xfs_file_buffered_aio_write(
        if (ret)
                goto out;
 
+       iov_iter_init(&from, iovp, nr_segs, count, 0);
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
 
 write_retry:
        trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_buffered_write(iocb, iovp, nr_segs,
-                       pos, &iocb->ki_pos, count, 0);
-
+       ret = generic_perform_write(file, &from, pos);
+       if (likely(ret >= 0))
+               iocb->ki_pos = pos + ret;
        /*
         * If we just got an ENOSPC, try to write back all dirty inodes to
         * convert delalloc space to free up some of the excess reserved
index bcfe61202115510b22509ad49aadc16bcbbb4368..0b18776b075e44fb7f7d818183fa03dcfc7b3f4d 100644 (file)
@@ -271,32 +271,6 @@ xfs_open_by_handle(
        return error;
 }
 
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
-       char __user             *buffer,
-       int                     buflen,
-       const char              *link)
-{
-        int len;
-
-       len = PTR_ERR(link);
-       if (IS_ERR(link))
-               goto out;
-
-       len = strlen(link);
-       if (len > (unsigned) buflen)
-               len = buflen;
-       if (copy_to_user(buffer, link, len))
-               len = -EFAULT;
- out:
-       return len;
-}
-
-
 int
 xfs_readlink_by_handle(
        struct file             *parfilp,
@@ -334,7 +308,7 @@ xfs_readlink_by_handle(
        error = -xfs_readlink(XFS_I(dentry->d_inode), link);
        if (error)
                goto out_kfree;
-       error = do_readlink(hreq->ohandle, olen, link);
+       error = readlink_copy(hreq->ohandle, olen, link);
        if (error)
                goto out_kfree;
 
index 5aa372a7380c6f26ccb77e523a5fb28367a6beed..bba5508269219a0726ede9f97cc3d22afa97f213 100644 (file)
@@ -388,7 +388,7 @@ struct sg_iovec;
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
                                    struct block_device *,
-                                   struct sg_iovec *, int, int, gfp_t);
+                                   const struct sg_iovec *, int, int, gfp_t);
 extern void bio_unmap_user(struct bio *);
 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
                                gfp_t);
@@ -414,7 +414,8 @@ extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
 extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
                                 unsigned long, unsigned int, int, gfp_t);
 extern struct bio *bio_copy_user_iov(struct request_queue *,
-                                    struct rq_map_data *, struct sg_iovec *,
+                                    struct rq_map_data *,
+                                    const struct sg_iovec *,
                                     int, int, gfp_t);
 extern int bio_uncopy_user(struct bio *);
 void zero_fill_bio(struct bio *bio);
index 99617cf7dd1a5bd29866e33e0ced51ae28279b3b..0d84981ee03fc1c9d7bd5b656611b8b87af696e0 100644 (file)
@@ -835,8 +835,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *,
 extern int blk_rq_unmap_user(struct bio *);
 extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
 extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
-                              struct rq_map_data *, struct sg_iovec *, int,
-                              unsigned int, gfp_t);
+                              struct rq_map_data *, const struct sg_iovec *,
+                              int, unsigned int, gfp_t);
 extern int blk_execute_rq(struct request_queue *, struct gendisk *,
                          struct request *, int);
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
index d77797a52b7bf01ab367c312e457b1e0771107a8..c40302f909ce00910260e2818dd712d658257a77 100644 (file)
@@ -210,8 +210,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
 int block_write_full_page_endio(struct page *page, get_block_t *get_block,
                        struct writeback_control *wbc, bh_end_io_t *handler);
 int block_read_full_page(struct page*, get_block_t*);
-int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
-                               unsigned long from);
+int block_is_partially_uptodate(struct page *page, unsigned long from,
+                               unsigned long count);
 int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
                unsigned flags, struct page **pagep, get_block_t *get_block);
 int __block_write_begin(struct page *page, loff_t pos, unsigned len,
index 70e8e21c0a303a3db8b7a59ff3b6f511cf225c46..230f87bdf5ad02008ff622e65bc761e41d4b22e0 100644 (file)
@@ -63,8 +63,6 @@ struct file_operations;
 struct vfsmount;
 struct dentry;
 
-extern void __init files_defer_init(void);
-
 #define rcu_dereference_check_fdtable(files, fdtfd) \
        rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))
 
index 81048f9bc7837e3ce32fb12dddf158a09fbaf302..7a9c5bca2b7694f5496dbcf793eea2920fd37af9 100644 (file)
@@ -48,6 +48,7 @@ struct cred;
 struct swap_info_struct;
 struct seq_file;
 struct workqueue_struct;
+struct iov_iter;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -125,6 +126,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 
 /* File needs atomic accesses to f_pos */
 #define FMODE_ATOMIC_POS       ((__force fmode_t)0x8000)
+/* Write access to underlying fs */
+#define FMODE_WRITER           ((__force fmode_t)0x10000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY         ((__force fmode_t)0x1000000)
@@ -293,38 +296,6 @@ struct page;
 struct address_space;
 struct writeback_control;
 
-struct iov_iter {
-       const struct iovec *iov;
-       unsigned long nr_segs;
-       size_t iov_offset;
-       size_t count;
-};
-
-size_t iov_iter_copy_from_user_atomic(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes);
-size_t iov_iter_copy_from_user(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes);
-void iov_iter_advance(struct iov_iter *i, size_t bytes);
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
-size_t iov_iter_single_seg_count(const struct iov_iter *i);
-
-static inline void iov_iter_init(struct iov_iter *i,
-                       const struct iovec *iov, unsigned long nr_segs,
-                       size_t count, size_t written)
-{
-       i->iov = iov;
-       i->nr_segs = nr_segs;
-       i->iov_offset = 0;
-       i->count = count + written;
-
-       iov_iter_advance(i, written);
-}
-
-static inline size_t iov_iter_count(struct iov_iter *i)
-{
-       return i->count;
-}
-
 /*
  * "descriptor" for what we're up to with a read.
  * This allows us to use the same read code yet
@@ -383,7 +354,7 @@ struct address_space_operations {
        int (*migratepage) (struct address_space *,
                        struct page *, struct page *, enum migrate_mode);
        int (*launder_page) (struct page *);
-       int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+       int (*is_partially_uptodate) (struct page *, unsigned long,
                                        unsigned long);
        void (*is_dirty_writeback) (struct page *, bool *, bool *);
        int (*error_remove_page)(struct address_space *, struct page *);
@@ -770,9 +741,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
                index <  ra->start + ra->size);
 }
 
-#define FILE_MNT_WRITE_TAKEN   1
-#define FILE_MNT_WRITE_RELEASED        2
-
 struct file {
        union {
                struct llist_node       fu_llist;
@@ -810,9 +778,6 @@ struct file {
        struct list_head        f_tfile_llink;
 #endif /* #ifdef CONFIG_EPOLL */
        struct address_space    *f_mapping;
-#ifdef CONFIG_DEBUG_WRITECOUNT
-       unsigned long f_mnt_write_state;
-#endif
 } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
 
 struct file_handle {
@@ -830,49 +795,6 @@ static inline struct file *get_file(struct file *f)
 #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
 #define file_count(x)  atomic_long_read(&(x)->f_count)
 
-#ifdef CONFIG_DEBUG_WRITECOUNT
-static inline void file_take_write(struct file *f)
-{
-       WARN_ON(f->f_mnt_write_state != 0);
-       f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
-}
-static inline void file_release_write(struct file *f)
-{
-       f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
-}
-static inline void file_reset_write(struct file *f)
-{
-       f->f_mnt_write_state = 0;
-}
-static inline void file_check_state(struct file *f)
-{
-       /*
-        * At this point, either both or neither of these bits
-        * should be set.
-        */
-       WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
-       WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
-}
-static inline int file_check_writeable(struct file *f)
-{
-       if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
-               return 0;
-       printk(KERN_WARNING "writeable file with no "
-                           "mnt_want_write()\n");
-       WARN_ON(1);
-       return -EINVAL;
-}
-#else /* !CONFIG_DEBUG_WRITECOUNT */
-static inline void file_take_write(struct file *filp) {}
-static inline void file_release_write(struct file *filp) {}
-static inline void file_reset_write(struct file *filp) {}
-static inline void file_check_state(struct file *filp) {}
-static inline int file_check_writeable(struct file *filp)
-{
-       return 0;
-}
-#endif /* CONFIG_DEBUG_WRITECOUNT */
-
 #define        MAX_NON_LFS     ((1UL<<31) - 1)
 
 /* Page cache limit. The filesystems should put that into their s_maxbytes 
@@ -2481,16 +2403,13 @@ extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
                unsigned long size, pgoff_t pgoff);
-extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
-               loff_t *);
+extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
-               unsigned long *, loff_t, loff_t *, size_t, size_t);
-extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
-               unsigned long, loff_t, loff_t *, size_t, ssize_t);
+               unsigned long *, loff_t, size_t, size_t);
+extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 extern int generic_segment_checks(const struct iovec *iov,
@@ -2582,7 +2501,7 @@ extern const struct file_operations generic_ro_fops;
 
 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
 
-extern int vfs_readlink(struct dentry *, char __user *, int, const char *);
+extern int readlink_copy(char __user *, int, const char *);
 extern int page_readlink(struct dentry *, char __user *, int);
 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
 extern void page_put_link(struct dentry *, struct nameidata *, void *);
index 371d346fa270dbfe7d8d3ac4a7849ab9cf6b5f77..839bac2709048da4d3f2d5099da1db618adcc729 100644 (file)
@@ -44,6 +44,8 @@ struct mnt_namespace;
 #define MNT_SHARED_MASK        (MNT_UNBINDABLE)
 #define MNT_PROPAGATION_MASK   (MNT_SHARED | MNT_UNBINDABLE)
 
+#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
+                           MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
 
 #define MNT_INTERNAL   0x4000
 
@@ -51,6 +53,7 @@ struct mnt_namespace;
 #define MNT_LOCKED             0x800000
 #define MNT_DOOMED             0x1000000
 #define MNT_SYNC_UMOUNT                0x2000000
+#define MNT_MARKED             0x4000000
 
 struct vfsmount {
        struct dentry *mnt_root;        /* root of the mounted tree */
index ae4981ebd18eaacacafb0cf34b040f67c2c3b729..f62f78aef4ac32e315fb3fe705dd4364e957158a 100644 (file)
@@ -24,8 +24,7 @@ struct request;
 struct nbd_device {
        int flags;
        int harderror;          /* Code of hard error                   */
-       struct socket * sock;
-       struct file * file;     /* If == NULL, device is not ready, yet */
+       struct socket * sock;   /* If == NULL, device is not ready, yet */
        int magic;
 
        spinlock_t queue_lock;
index 4d9389c79e61b4abe20666aa9f9c89d676a8c6b1..eb8b8ac6df3c844e2bd84903e0a50ff07f1575fe 100644 (file)
@@ -82,23 +82,6 @@ struct pipe_buf_operations {
         */
        int can_merge;
 
-       /*
-        * ->map() returns a virtual address mapping of the pipe buffer.
-        * The last integer flag reflects whether this should be an atomic
-        * mapping or not. The atomic map is faster, however you can't take
-        * page faults before calling ->unmap() again. So if you need to eg
-        * access user data through copy_to/from_user(), then you must get
-        * a non-atomic map. ->map() uses the kmap_atomic slot for
-        * atomic maps, you have to be careful if mapping another page as
-        * source or destination for a copy.
-        */
-       void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
-
-       /*
-        * Undoes ->map(), finishes the virtual mapping of the pipe buffer.
-        */
-       void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
-
        /*
         * ->confirm() verifies that the data in the pipe buffer is there
         * and that the contents are good. If the pages in the pipe belong
@@ -150,8 +133,6 @@ struct pipe_inode_info *alloc_pipe_info(void);
 void free_pipe_info(struct pipe_inode_info *);
 
 /* Generic pipe buffer ops functions */
-void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
-void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
 void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
index c55ce243cc0985c450786e4cb63f8ed3c8e5c53b..199bcc34241ba0155a367f11d05edf5d9c138a02 100644 (file)
@@ -9,14 +9,23 @@
 #ifndef __LINUX_UIO_H
 #define __LINUX_UIO_H
 
+#include <linux/kernel.h>
 #include <uapi/linux/uio.h>
 
+struct page;
 
 struct kvec {
        void *iov_base; /* and that should *never* hold a userland pointer */
        size_t iov_len;
 };
 
+struct iov_iter {
+       const struct iovec *iov;
+       unsigned long nr_segs;
+       size_t iov_offset;
+       size_t count;
+};
+
 /*
  * Total number of bytes covered by an iovec.
  *
@@ -34,8 +43,51 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
        return ret;
 }
 
+static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
+{
+       return (struct iovec) {
+               .iov_base = iter->iov->iov_base + iter->iov_offset,
+               .iov_len = min(iter->count,
+                              iter->iov->iov_len - iter->iov_offset),
+       };
+}
+
+#define iov_for_each(iov, iter, start)                         \
+       for (iter = (start);                                    \
+            (iter).count &&                                    \
+            ((iov = iov_iter_iovec(&(iter))), 1);              \
+            iov_iter_advance(&(iter), (iov).iov_len))
+
 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
 
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+               struct iov_iter *i, unsigned long offset, size_t bytes);
+size_t iov_iter_copy_from_user(struct page *page,
+               struct iov_iter *i, unsigned long offset, size_t bytes);
+void iov_iter_advance(struct iov_iter *i, size_t bytes);
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
+size_t iov_iter_single_seg_count(const struct iov_iter *i);
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i);
+
+static inline void iov_iter_init(struct iov_iter *i,
+                       const struct iovec *iov, unsigned long nr_segs,
+                       size_t count, size_t written)
+{
+       i->iov = iov;
+       i->nr_segs = nr_segs;
+       i->iov_offset = 0;
+       i->count = count + written;
+
+       iov_iter_advance(i, written);
+}
+
+static inline size_t iov_iter_count(struct iov_iter *i)
+{
+       return i->count;
+}
+
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
+
 #endif
index 52d6a6f56261d26dabc458684a3529d15710e30b..5a56d3c8dc03a799e5f53169c85c7f33f05727eb 100644 (file)
@@ -1195,8 +1195,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
 
 static const struct pipe_buf_operations relay_pipe_buf_ops = {
        .can_merge = 0,
-       .map = generic_pipe_buf_map,
-       .unmap = generic_pipe_buf_unmap,
        .confirm = generic_pipe_buf_confirm,
        .release = relay_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
@@ -1253,7 +1251,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
        subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
        pidx = (read_start / PAGE_SIZE) % subbuf_pages;
        poff = read_start & ~PAGE_MASK;
-       nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers);
+       nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max);
 
        for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
                unsigned int this_len, this_end, private;
index e3e665685ee5286ff47d3f570ef7418a9d56072b..737b0efa1a624aae606a0c50406de5f652e51862 100644 (file)
@@ -4392,8 +4392,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
 
 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
        .can_merge              = 0,
-       .map                    = generic_pipe_buf_map,
-       .unmap                  = generic_pipe_buf_unmap,
        .confirm                = generic_pipe_buf_confirm,
        .release                = generic_pipe_buf_release,
        .steal                  = generic_pipe_buf_steal,
@@ -4488,7 +4486,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
        trace_access_lock(iter->cpu_file);
 
        /* Fill as many pages as possible. */
-       for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
+       for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
                spd.pages[i] = alloc_page(GFP_KERNEL);
                if (!spd.pages[i])
                        break;
@@ -5281,8 +5279,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 /* Pipe buffer operations for a buffer. */
 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
        .can_merge              = 0,
-       .map                    = generic_pipe_buf_map,
-       .unmap                  = generic_pipe_buf_unmap,
        .confirm                = generic_pipe_buf_confirm,
        .release                = buffer_pipe_buf_release,
        .steal                  = generic_pipe_buf_steal,
@@ -5358,7 +5354,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
        trace_access_lock(iter->cpu_file);
        entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
 
-       for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
+       for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
                struct page *page;
                int r;
 
index dd7f8858188a6ac92ac19bb7ae032d62786ee612..140b66a874c1d5e9a069eade2fa6aebc6d65013d 100644 (file)
@@ -1045,16 +1045,6 @@ config DEBUG_BUGVERBOSE
          of the BUG call as well as the EIP and oops trace.  This aids
          debugging but costs about 70-100K of memory.
 
-config DEBUG_WRITECOUNT
-       bool "Debug filesystem writers count"
-       depends on DEBUG_KERNEL
-       help
-         Enable this to catch wrong use of the writers count in struct
-         vfsmount.  This will increase the size of each file struct by
-         32 bits.
-
-         If unsure, say N.
-
 config DEBUG_LIST
        bool "Debug linked list manipulation"
        depends on DEBUG_KERNEL
index 9e5aaf92197d3fcc7038e86d9769dc0def5bf1c3..b484452dac57ea5e531918837d658c9d5d159ea6 100644 (file)
@@ -17,7 +17,8 @@ obj-y                 := filemap.o mempool.o oom_kill.o fadvise.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
                           compaction.o balloon_compaction.o vmacache.o \
-                          interval_tree.o list_lru.o workingset.o $(mmu-y)
+                          interval_tree.o list_lru.o workingset.o \
+                          iov_iter.o $(mmu-y)
 
 obj-y += init-mm.o
 
index 27ebc0c9571bb8831ceb38c4e71e1bf57227f098..a82fbe4c9e8e1c1d5a3eed5e2649ec87a7bfd16d 100644 (file)
@@ -77,7 +77,7 @@
  *  ->mmap_sem
  *    ->lock_page              (access_process_vm)
  *
- *  ->i_mutex                  (generic_file_buffered_write)
+ *  ->i_mutex                  (generic_perform_write)
  *    ->mmap_sem               (fault_in_pages_readable->do_page_fault)
  *
  *  bdi->wb.list_lock
@@ -1428,7 +1428,8 @@ static void shrink_readahead_size_eio(struct file *filp,
  * do_generic_file_read - generic file read routine
  * @filp:      the file to read
  * @ppos:      current file position
- * @desc:      read_descriptor
+ * @iter:      data destination
+ * @written:   already copied
  *
  * This is a generic file read routine, and uses the
  * mapping->a_ops->readpage() function for the actual low-level stuff.
@@ -1436,8 +1437,8 @@ static void shrink_readahead_size_eio(struct file *filp,
  * This is really ugly. But the goto's actually try to clarify some
  * of the logic when it comes to error handling etc.
  */
-static void do_generic_file_read(struct file *filp, loff_t *ppos,
-               read_descriptor_t *desc)
+static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
+               struct iov_iter *iter, ssize_t written)
 {
        struct address_space *mapping = filp->f_mapping;
        struct inode *inode = mapping->host;
@@ -1447,12 +1448,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos,
        pgoff_t prev_index;
        unsigned long offset;      /* offset into pagecache page */
        unsigned int prev_offset;
-       int error;
+       int error = 0;
 
        index = *ppos >> PAGE_CACHE_SHIFT;
        prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
        prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
-       last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
+       last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
        offset = *ppos & ~PAGE_CACHE_MASK;
 
        for (;;) {
@@ -1487,7 +1488,7 @@ find_page:
                        if (!page->mapping)
                                goto page_not_up_to_date_locked;
                        if (!mapping->a_ops->is_partially_uptodate(page,
-                                                               desc, offset))
+                                                       offset, iter->count))
                                goto page_not_up_to_date_locked;
                        unlock_page(page);
                }
@@ -1537,24 +1538,23 @@ page_ok:
                /*
                 * Ok, we have the page, and it's up-to-date, so
                 * now we can copy it to user space...
-                *
-                * The file_read_actor routine returns how many bytes were
-                * actually used..
-                * NOTE! This may not be the same as how much of a user buffer
-                * we filled up (we may be padding etc), so we can only update
-                * "pos" here (the actor routine has to update the user buffer
-                * pointers and the remaining count).
                 */
-               ret = file_read_actor(desc, page, offset, nr);
+
+               ret = copy_page_to_iter(page, offset, nr, iter);
                offset += ret;
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
                prev_offset = offset;
 
                page_cache_release(page);
-               if (ret == nr && desc->count)
-                       continue;
-               goto out;
+               written += ret;
+               if (!iov_iter_count(iter))
+                       goto out;
+               if (ret < nr) {
+                       error = -EFAULT;
+                       goto out;
+               }
+               continue;
 
 page_not_up_to_date:
                /* Get exclusive access to the page ... */
@@ -1589,6 +1589,7 @@ readpage:
                if (unlikely(error)) {
                        if (error == AOP_TRUNCATED_PAGE) {
                                page_cache_release(page);
+                               error = 0;
                                goto find_page;
                        }
                        goto readpage_error;
@@ -1619,7 +1620,6 @@ readpage:
 
 readpage_error:
                /* UHHUH! A synchronous read error occurred. Report it */
-               desc->error = error;
                page_cache_release(page);
                goto out;
 
@@ -1630,16 +1630,17 @@ no_cached_page:
                 */
                page = page_cache_alloc_cold(mapping);
                if (!page) {
-                       desc->error = -ENOMEM;
+                       error = -ENOMEM;
                        goto out;
                }
                error = add_to_page_cache_lru(page, mapping,
                                                index, GFP_KERNEL);
                if (error) {
                        page_cache_release(page);
-                       if (error == -EEXIST)
+                       if (error == -EEXIST) {
+                               error = 0;
                                goto find_page;
-                       desc->error = error;
+                       }
                        goto out;
                }
                goto readpage;
@@ -1652,44 +1653,7 @@ out:
 
        *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
        file_accessed(filp);
-}
-
-int file_read_actor(read_descriptor_t *desc, struct page *page,
-                       unsigned long offset, unsigned long size)
-{
-       char *kaddr;
-       unsigned long left, count = desc->count;
-
-       if (size > count)
-               size = count;
-
-       /*
-        * Faults on the destination of a read are common, so do it before
-        * taking the kmap.
-        */
-       if (!fault_in_pages_writeable(desc->arg.buf, size)) {
-               kaddr = kmap_atomic(page);
-               left = __copy_to_user_inatomic(desc->arg.buf,
-                                               kaddr + offset, size);
-               kunmap_atomic(kaddr);
-               if (left == 0)
-                       goto success;
-       }
-
-       /* Do it the slow way */
-       kaddr = kmap(page);
-       left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
-       kunmap(page);
-
-       if (left) {
-               size -= left;
-               desc->error = -EFAULT;
-       }
-success:
-       desc->count = count - size;
-       desc->written += size;
-       desc->arg.buf += size;
-       return size;
+       return written ? written : error;
 }
 
 /*
@@ -1747,14 +1711,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 {
        struct file *filp = iocb->ki_filp;
        ssize_t retval;
-       unsigned long seg = 0;
        size_t count;
        loff_t *ppos = &iocb->ki_pos;
+       struct iov_iter i;
 
        count = 0;
        retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
        if (retval)
                return retval;
+       iov_iter_init(&i, iov, nr_segs, count, 0);
 
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (filp->f_flags & O_DIRECT) {
@@ -1776,6 +1741,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                if (retval > 0) {
                        *ppos = pos + retval;
                        count -= retval;
+                       /*
+                        * If we did a short DIO read we need to skip the
+                        * section of the iov that we've already read data into.
+                        */
+                       iov_iter_advance(&i, retval);
                }
 
                /*
@@ -1792,39 +1762,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
                }
        }
 
-       count = retval;
-       for (seg = 0; seg < nr_segs; seg++) {
-               read_descriptor_t desc;
-               loff_t offset = 0;
-
-               /*
-                * If we did a short DIO read we need to skip the section of the
-                * iov that we've already read data into.
-                */
-               if (count) {
-                       if (count > iov[seg].iov_len) {
-                               count -= iov[seg].iov_len;
-                               continue;
-                       }
-                       offset = count;
-                       count = 0;
-               }
-
-               desc.written = 0;
-               desc.arg.buf = iov[seg].iov_base + offset;
-               desc.count = iov[seg].iov_len - offset;
-               if (desc.count == 0)
-                       continue;
-               desc.error = 0;
-               do_generic_file_read(filp, ppos, &desc);
-               retval += desc.written;
-               if (desc.error) {
-                       retval = retval ?: desc.error;
-                       break;
-               }
-               if (desc.count > 0)
-                       break;
-       }
+       retval = do_generic_file_read(filp, ppos, &i, retval);
 out:
        return retval;
 }
@@ -2335,150 +2273,6 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
-static size_t __iovec_copy_from_user_inatomic(char *vaddr,
-                       const struct iovec *iov, size_t base, size_t bytes)
-{
-       size_t copied = 0, left = 0;
-
-       while (bytes) {
-               char __user *buf = iov->iov_base + base;
-               int copy = min(bytes, iov->iov_len - base);
-
-               base = 0;
-               left = __copy_from_user_inatomic(vaddr, buf, copy);
-               copied += copy;
-               bytes -= copy;
-               vaddr += copy;
-               iov++;
-
-               if (unlikely(left))
-                       break;
-       }
-       return copied - left;
-}
-
-/*
- * Copy as much as we can into the page and return the number of bytes which
- * were successfully copied.  If a fault is encountered then return the number of
- * bytes which were copied.
- */
-size_t iov_iter_copy_from_user_atomic(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-       char *kaddr;
-       size_t copied;
-
-       BUG_ON(!in_atomic());
-       kaddr = kmap_atomic(page);
-       if (likely(i->nr_segs == 1)) {
-               int left;
-               char __user *buf = i->iov->iov_base + i->iov_offset;
-               left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
-               copied = bytes - left;
-       } else {
-               copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-                                               i->iov, i->iov_offset, bytes);
-       }
-       kunmap_atomic(kaddr);
-
-       return copied;
-}
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
-
-/*
- * This has the same sideeffects and return value as
- * iov_iter_copy_from_user_atomic().
- * The difference is that it attempts to resolve faults.
- * Page must not be locked.
- */
-size_t iov_iter_copy_from_user(struct page *page,
-               struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-       char *kaddr;
-       size_t copied;
-
-       kaddr = kmap(page);
-       if (likely(i->nr_segs == 1)) {
-               int left;
-               char __user *buf = i->iov->iov_base + i->iov_offset;
-               left = __copy_from_user(kaddr + offset, buf, bytes);
-               copied = bytes - left;
-       } else {
-               copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-                                               i->iov, i->iov_offset, bytes);
-       }
-       kunmap(page);
-       return copied;
-}
-EXPORT_SYMBOL(iov_iter_copy_from_user);
-
-void iov_iter_advance(struct iov_iter *i, size_t bytes)
-{
-       BUG_ON(i->count < bytes);
-
-       if (likely(i->nr_segs == 1)) {
-               i->iov_offset += bytes;
-               i->count -= bytes;
-       } else {
-               const struct iovec *iov = i->iov;
-               size_t base = i->iov_offset;
-               unsigned long nr_segs = i->nr_segs;
-
-               /*
-                * The !iov->iov_len check ensures we skip over unlikely
-                * zero-length segments (without overruning the iovec).
-                */
-               while (bytes || unlikely(i->count && !iov->iov_len)) {
-                       int copy;
-
-                       copy = min(bytes, iov->iov_len - base);
-                       BUG_ON(!i->count || i->count < copy);
-                       i->count -= copy;
-                       bytes -= copy;
-                       base += copy;
-                       if (iov->iov_len == base) {
-                               iov++;
-                               nr_segs--;
-                               base = 0;
-                       }
-               }
-               i->iov = iov;
-               i->iov_offset = base;
-               i->nr_segs = nr_segs;
-       }
-}
-EXPORT_SYMBOL(iov_iter_advance);
-
-/*
- * Fault in the first iovec of the given iov_iter, to a maximum length
- * of bytes. Returns 0 on success, or non-zero if the memory could not be
- * accessed (ie. because it is an invalid address).
- *
- * writev-intensive code may want this to prefault several iovecs -- that
- * would be possible (callers must not rely on the fact that _only_ the
- * first iovec will be faulted with the current implementation).
- */
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
-{
-       char __user *buf = i->iov->iov_base + i->iov_offset;
-       bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-       return fault_in_pages_readable(buf, bytes);
-}
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
-
-/*
- * Return the count of just the current iov_iter segment.
- */
-size_t iov_iter_single_seg_count(const struct iov_iter *i)
-{
-       const struct iovec *iov = i->iov;
-       if (i->nr_segs == 1)
-               return i->count;
-       else
-               return min(i->count, iov->iov_len - i->iov_offset);
-}
-EXPORT_SYMBOL(iov_iter_single_seg_count);
-
 /*
  * Performs necessary checks before doing a write
  *
@@ -2585,7 +2379,7 @@ EXPORT_SYMBOL(pagecache_write_end);
 
 ssize_t
 generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long *nr_segs, loff_t pos, loff_t *ppos,
+               unsigned long *nr_segs, loff_t pos,
                size_t count, size_t ocount)
 {
        struct file     *file = iocb->ki_filp;
@@ -2646,7 +2440,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
                        i_size_write(inode, pos);
                        mark_inode_dirty(inode);
                }
-               *ppos = pos;
+               iocb->ki_pos = pos;
        }
 out:
        return written;
@@ -2692,7 +2486,7 @@ found:
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
 
-static ssize_t generic_perform_write(struct file *file,
+ssize_t generic_perform_write(struct file *file,
                                struct iov_iter *i, loff_t pos)
 {
        struct address_space *mapping = file->f_mapping;
@@ -2742,9 +2536,7 @@ again:
                if (mapping_writably_mapped(mapping))
                        flush_dcache_page(page);
 
-               pagefault_disable();
                copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
-               pagefault_enable();
                flush_dcache_page(page);
 
                mark_page_accessed(page);
@@ -2782,27 +2574,7 @@ again:
 
        return written ? written : status;
 }
-
-ssize_t
-generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t pos, loff_t *ppos,
-               size_t count, ssize_t written)
-{
-       struct file *file = iocb->ki_filp;
-       ssize_t status;
-       struct iov_iter i;
-
-       iov_iter_init(&i, iov, nr_segs, count, written);
-       status = generic_perform_write(file, &i, pos);
-
-       if (likely(status >= 0)) {
-               written += status;
-               *ppos = pos + status;
-       }
-       
-       return written ? written : status;
-}
-EXPORT_SYMBOL(generic_file_buffered_write);
+EXPORT_SYMBOL(generic_perform_write);
 
 /**
  * __generic_file_aio_write - write data to a file
@@ -2824,16 +2596,18 @@ EXPORT_SYMBOL(generic_file_buffered_write);
  * avoid syncing under i_mutex.
  */
 ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-                                unsigned long nr_segs, loff_t *ppos)
+                                unsigned long nr_segs)
 {
        struct file *file = iocb->ki_filp;
        struct address_space * mapping = file->f_mapping;
        size_t ocount;          /* original count */
        size_t count;           /* after file limit checks */
        struct inode    *inode = mapping->host;
-       loff_t          pos;
-       ssize_t         written;
+       loff_t          pos = iocb->ki_pos;
+       ssize_t         written = 0;
        ssize_t         err;
+       ssize_t         status;
+       struct iov_iter from;
 
        ocount = 0;
        err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -2841,12 +2615,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                return err;
 
        count = ocount;
-       pos = *ppos;
 
        /* We can write back this queue in page reclaim */
        current->backing_dev_info = mapping->backing_dev_info;
-       written = 0;
-
        err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
        if (err)
                goto out;
@@ -2862,45 +2633,47 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (err)
                goto out;
 
+       iov_iter_init(&from, iov, nr_segs, count, 0);
+
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (unlikely(file->f_flags & O_DIRECT)) {
                loff_t endbyte;
-               ssize_t written_buffered;
 
-               written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
-                                                       ppos, count, ocount);
+               written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
+                                                       count, ocount);
                if (written < 0 || written == count)
                        goto out;
+               iov_iter_advance(&from, written);
+
                /*
                 * direct-io write to a hole: fall through to buffered I/O
                 * for completing the rest of the request.
                 */
                pos += written;
                count -= written;
-               written_buffered = generic_file_buffered_write(iocb, iov,
-                                               nr_segs, pos, ppos, count,
-                                               written);
+
+               status = generic_perform_write(file, &from, pos);
                /*
-                * If generic_file_buffered_write() retuned a synchronous error
+                * If generic_perform_write() returned a synchronous error
                 * then we want to return the number of bytes which were
                 * direct-written, or the error code if that was zero.  Note
                 * that this differs from normal direct-io semantics, which
                 * will return -EFOO even if some bytes were written.
                 */
-               if (written_buffered < 0) {
-                       err = written_buffered;
+               if (unlikely(status < 0) && !written) {
+                       err = status;
                        goto out;
                }
-
+               iocb->ki_pos = pos + status;
                /*
                 * We need to ensure that the page cache pages are written to
                 * disk and invalidated to preserve the expected O_DIRECT
                 * semantics.
                 */
-               endbyte = pos + written_buffered - written - 1;
+               endbyte = pos + status - 1;
                err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
                if (err == 0) {
-                       written = written_buffered;
+                       written += status;
                        invalidate_mapping_pages(mapping,
                                                 pos >> PAGE_CACHE_SHIFT,
                                                 endbyte >> PAGE_CACHE_SHIFT);
@@ -2911,8 +2684,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                         */
                }
        } else {
-               written = generic_file_buffered_write(iocb, iov, nr_segs,
-                               pos, ppos, count, written);
+               written = generic_perform_write(file, &from, pos);
+               if (likely(written >= 0))
+                       iocb->ki_pos = pos + written;
        }
 out:
        current->backing_dev_info = NULL;
@@ -2941,7 +2715,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        BUG_ON(iocb->ki_pos != pos);
 
        mutex_lock(&inode->i_mutex);
-       ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+       ret = __generic_file_aio_write(iocb, iov, nr_segs);
        mutex_unlock(&inode->i_mutex);
 
        if (ret > 0) {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
new file mode 100644 (file)
index 0000000..10e46cd
--- /dev/null
@@ -0,0 +1,224 @@
+#include <linux/export.h>
+#include <linux/uio.h>
+#include <linux/pagemap.h>
+
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+                        struct iov_iter *i)
+{
+       size_t skip, copy, left, wanted;
+       const struct iovec *iov;
+       char __user *buf;
+       void *kaddr, *from;
+
+       if (unlikely(bytes > i->count))
+               bytes = i->count;
+
+       if (unlikely(!bytes))
+               return 0;
+
+       wanted = bytes;
+       iov = i->iov;
+       skip = i->iov_offset;
+       buf = iov->iov_base + skip;
+       copy = min(bytes, iov->iov_len - skip);
+
+       if (!fault_in_pages_writeable(buf, copy)) {
+               kaddr = kmap_atomic(page);
+               from = kaddr + offset;
+
+               /* first chunk, usually the only one */
+               left = __copy_to_user_inatomic(buf, from, copy);
+               copy -= left;
+               skip += copy;
+               from += copy;
+               bytes -= copy;
+
+               while (unlikely(!left && bytes)) {
+                       iov++;
+                       buf = iov->iov_base;
+                       copy = min(bytes, iov->iov_len);
+                       left = __copy_to_user_inatomic(buf, from, copy);
+                       copy -= left;
+                       skip = copy;
+                       from += copy;
+                       bytes -= copy;
+               }
+               if (likely(!bytes)) {
+                       kunmap_atomic(kaddr);
+                       goto done;
+               }
+               offset = from - kaddr;
+               buf += copy;
+               kunmap_atomic(kaddr);
+               copy = min(bytes, iov->iov_len - skip);
+       }
+       /* Too bad - revert to non-atomic kmap */
+       kaddr = kmap(page);
+       from = kaddr + offset;
+       left = __copy_to_user(buf, from, copy);
+       copy -= left;
+       skip += copy;
+       from += copy;
+       bytes -= copy;
+       while (unlikely(!left && bytes)) {
+               iov++;
+               buf = iov->iov_base;
+               copy = min(bytes, iov->iov_len);
+               left = __copy_to_user(buf, from, copy);
+               copy -= left;
+               skip = copy;
+               from += copy;
+               bytes -= copy;
+       }
+       kunmap(page);
+done:
+       i->count -= wanted - bytes;
+       i->nr_segs -= iov - i->iov;
+       i->iov = iov;
+       i->iov_offset = skip;
+       return wanted - bytes;
+}
+EXPORT_SYMBOL(copy_page_to_iter);
+
+static size_t __iovec_copy_from_user_inatomic(char *vaddr,
+                       const struct iovec *iov, size_t base, size_t bytes)
+{
+       size_t copied = 0, left = 0;
+
+       while (bytes) {
+               char __user *buf = iov->iov_base + base;
+               int copy = min(bytes, iov->iov_len - base);
+
+               base = 0;
+               left = __copy_from_user_inatomic(vaddr, buf, copy);
+               copied += copy;
+               bytes -= copy;
+               vaddr += copy;
+               iov++;
+
+               if (unlikely(left))
+                       break;
+       }
+       return copied - left;
+}
+
+/*
+ * Copy as much as we can into the page and return the number of bytes which
+ * were successfully copied.  If a fault is encountered then return the number of
+ * bytes which were copied.
+ */
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+               struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+       char *kaddr;
+       size_t copied;
+
+       kaddr = kmap_atomic(page);
+       if (likely(i->nr_segs == 1)) {
+               int left;
+               char __user *buf = i->iov->iov_base + i->iov_offset;
+               left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
+               copied = bytes - left;
+       } else {
+               copied = __iovec_copy_from_user_inatomic(kaddr + offset,
+                                               i->iov, i->iov_offset, bytes);
+       }
+       kunmap_atomic(kaddr);
+
+       return copied;
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+/*
+ * This has the same sideeffects and return value as
+ * iov_iter_copy_from_user_atomic().
+ * The difference is that it attempts to resolve faults.
+ * Page must not be locked.
+ */
+size_t iov_iter_copy_from_user(struct page *page,
+               struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+       char *kaddr;
+       size_t copied;
+
+       kaddr = kmap(page);
+       if (likely(i->nr_segs == 1)) {
+               int left;
+               char __user *buf = i->iov->iov_base + i->iov_offset;
+               left = __copy_from_user(kaddr + offset, buf, bytes);
+               copied = bytes - left;
+       } else {
+               copied = __iovec_copy_from_user_inatomic(kaddr + offset,
+                                               i->iov, i->iov_offset, bytes);
+       }
+       kunmap(page);
+       return copied;
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user);
+
+void iov_iter_advance(struct iov_iter *i, size_t bytes)
+{
+       BUG_ON(i->count < bytes);
+
+       if (likely(i->nr_segs == 1)) {
+               i->iov_offset += bytes;
+               i->count -= bytes;
+       } else {
+               const struct iovec *iov = i->iov;
+               size_t base = i->iov_offset;
+               unsigned long nr_segs = i->nr_segs;
+
+               /*
+                * The !iov->iov_len check ensures we skip over unlikely
+                * zero-length segments (without overruning the iovec).
+                */
+               while (bytes || unlikely(i->count && !iov->iov_len)) {
+                       int copy;
+
+                       copy = min(bytes, iov->iov_len - base);
+                       BUG_ON(!i->count || i->count < copy);
+                       i->count -= copy;
+                       bytes -= copy;
+                       base += copy;
+                       if (iov->iov_len == base) {
+                               iov++;
+                               nr_segs--;
+                               base = 0;
+                       }
+               }
+               i->iov = iov;
+               i->iov_offset = base;
+               i->nr_segs = nr_segs;
+       }
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
+/*
+ * Fault in the first iovec of the given iov_iter, to a maximum length
+ * of bytes. Returns 0 on success, or non-zero if the memory could not be
+ * accessed (ie. because it is an invalid address).
+ *
+ * writev-intensive code may want this to prefault several iovecs -- that
+ * would be possible (callers must not rely on the fact that _only_ the
+ * first iovec will be faulted with the current implementation).
+ */
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+{
+       char __user *buf = i->iov->iov_base + i->iov_offset;
+       bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+       return fault_in_pages_readable(buf, bytes);
+}
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
+
+/*
+ * Return the count of just the current iov_iter segment.
+ */
+size_t iov_iter_single_seg_count(const struct iov_iter *i)
+{
+       const struct iovec *iov = i->iov;
+       if (i->nr_segs == 1)
+               return i->count;
+       else
+               return min(i->count, iov->iov_len - i->iov_offset);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_count);
index cb79065c19e55f054888430555b44199425dd72b..8505c9262b35853e22580c6c9b74c4d12bc86acc 100644 (file)
 
 /**
  * process_vm_rw_pages - read/write pages from task specified
- * @task: task to read/write from
- * @mm: mm for task
- * @process_pages: struct pages area that can store at least
- *  nr_pages_to_copy struct page pointers
- * @pa: address of page in task to start copying from/to
+ * @pages: array of pointers to pages we want to copy
  * @start_offset: offset in page to start copying from/to
  * @len: number of bytes to copy
- * @lvec: iovec array specifying where to copy to/from
- * @lvec_cnt: number of elements in iovec array
- * @lvec_current: index in iovec array we are up to
- * @lvec_offset: offset in bytes from current iovec iov_base we are up to
+ * @iter: where to copy to/from locally
  * @vm_write: 0 means copy from, 1 means copy to
- * @nr_pages_to_copy: number of pages to copy
- * @bytes_copied: returns number of bytes successfully copied
  * Returns 0 on success, error code otherwise
  */
-static int process_vm_rw_pages(struct task_struct *task,
-                              struct mm_struct *mm,
-                              struct page **process_pages,
-                              unsigned long pa,
-                              unsigned long start_offset,
-                              unsigned long len,
-                              const struct iovec *lvec,
-                              unsigned long lvec_cnt,
-                              unsigned long *lvec_current,
-                              size_t *lvec_offset,
-                              int vm_write,
-                              unsigned int nr_pages_to_copy,
-                              ssize_t *bytes_copied)
+static int process_vm_rw_pages(struct page **pages,
+                              unsigned offset,
+                              size_t len,
+                              struct iov_iter *iter,
+                              int vm_write)
 {
-       int pages_pinned;
-       void *target_kaddr;
-       int pgs_copied = 0;
-       int j;
-       int ret;
-       ssize_t bytes_to_copy;
-       ssize_t rc = 0;
-
-       *bytes_copied = 0;
-
-       /* Get the pages we're interested in */
-       down_read(&mm->mmap_sem);
-       pages_pinned = get_user_pages(task, mm, pa,
-                                     nr_pages_to_copy,
-                                     vm_write, 0, process_pages, NULL);
-       up_read(&mm->mmap_sem);
-
-       if (pages_pinned != nr_pages_to_copy) {
-               rc = -EFAULT;
-               goto end;
-       }
-
        /* Do the copy for each page */
-       for (pgs_copied = 0;
-            (pgs_copied < nr_pages_to_copy) && (*lvec_current < lvec_cnt);
-            pgs_copied++) {
-               /* Make sure we have a non zero length iovec */
-               while (*lvec_current < lvec_cnt
-                      && lvec[*lvec_current].iov_len == 0)
-                       (*lvec_current)++;
-               if (*lvec_current == lvec_cnt)
-                       break;
-
-               /*
-                * Will copy smallest of:
-                * - bytes remaining in page
-                * - bytes remaining in destination iovec
-                */
-               bytes_to_copy = min_t(ssize_t, PAGE_SIZE - start_offset,
-                                     len - *bytes_copied);
-               bytes_to_copy = min_t(ssize_t, bytes_to_copy,
-                                     lvec[*lvec_current].iov_len
-                                     - *lvec_offset);
-
-               target_kaddr = kmap(process_pages[pgs_copied]) + start_offset;
-
-               if (vm_write)
-                       ret = copy_from_user(target_kaddr,
-                                            lvec[*lvec_current].iov_base
-                                            + *lvec_offset,
-                                            bytes_to_copy);
-               else
-                       ret = copy_to_user(lvec[*lvec_current].iov_base
-                                          + *lvec_offset,
-                                          target_kaddr, bytes_to_copy);
-               kunmap(process_pages[pgs_copied]);
-               if (ret) {
-                       *bytes_copied += bytes_to_copy - ret;
-                       pgs_copied++;
-                       rc = -EFAULT;
-                       goto end;
-               }
-               *bytes_copied += bytes_to_copy;
-               *lvec_offset += bytes_to_copy;
-               if (*lvec_offset == lvec[*lvec_current].iov_len) {
-                       /*
-                        * Need to copy remaining part of page into the
-                        * next iovec if there are any bytes left in page
-                        */
-                       (*lvec_current)++;
-                       *lvec_offset = 0;
-                       start_offset = (start_offset + bytes_to_copy)
-                               % PAGE_SIZE;
-                       if (start_offset)
-                               pgs_copied--;
+       while (len && iov_iter_count(iter)) {
+               struct page *page = *pages++;
+               size_t copy = PAGE_SIZE - offset;
+               size_t copied;
+
+               if (copy > len)
+                       copy = len;
+
+               if (vm_write) {
+                       if (copy > iov_iter_count(iter))
+                               copy = iov_iter_count(iter);
+                       copied = iov_iter_copy_from_user(page, iter,
+                                       offset, copy);
+                       iov_iter_advance(iter, copied);
+                       set_page_dirty_lock(page);
                } else {
-                       start_offset = 0;
-               }
-       }
-
-end:
-       if (vm_write) {
-               for (j = 0; j < pages_pinned; j++) {
-                       if (j < pgs_copied)
-                               set_page_dirty_lock(process_pages[j]);
-                       put_page(process_pages[j]);
+                       copied = copy_page_to_iter(page, offset, copy, iter);
                }
-       } else {
-               for (j = 0; j < pages_pinned; j++)
-                       put_page(process_pages[j]);
+               len -= copied;
+               if (copied < copy && iov_iter_count(iter))
+                       return -EFAULT;
+               offset = 0;
        }
-
-       return rc;
+       return 0;
 }
 
 /* Maximum number of pages kmalloc'd to hold struct page's during copy */
@@ -155,67 +70,60 @@ end:
  * process_vm_rw_single_vec - read/write pages from task specified
  * @addr: start memory address of target process
  * @len: size of area to copy to/from
- * @lvec: iovec array specifying where to copy to/from locally
- * @lvec_cnt: number of elements in iovec array
- * @lvec_current: index in iovec array we are up to
- * @lvec_offset: offset in bytes from current iovec iov_base we are up to
+ * @iter: where to copy to/from locally
  * @process_pages: struct pages area that can store at least
  *  nr_pages_to_copy struct page pointers
  * @mm: mm for task
  * @task: task to read/write from
  * @vm_write: 0 means copy from, 1 means copy to
- * @bytes_copied: returns number of bytes successfully copied
  * Returns 0 on success or on failure error code
  */
 static int process_vm_rw_single_vec(unsigned long addr,
                                    unsigned long len,
-                                   const struct iovec *lvec,
-                                   unsigned long lvec_cnt,
-                                   unsigned long *lvec_current,
-                                   size_t *lvec_offset,
+                                   struct iov_iter *iter,
                                    struct page **process_pages,
                                    struct mm_struct *mm,
                                    struct task_struct *task,
-                                   int vm_write,
-                                   ssize_t *bytes_copied)
+                                   int vm_write)
 {
        unsigned long pa = addr & PAGE_MASK;
        unsigned long start_offset = addr - pa;
        unsigned long nr_pages;
-       ssize_t bytes_copied_loop;
        ssize_t rc = 0;
-       unsigned long nr_pages_copied = 0;
-       unsigned long nr_pages_to_copy;
        unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
                / sizeof(struct pages *);
 
-       *bytes_copied = 0;
-
        /* Work out address and page range required */
        if (len == 0)
                return 0;
        nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
 
-       while ((nr_pages_copied < nr_pages) && (*lvec_current < lvec_cnt)) {
-               nr_pages_to_copy = min(nr_pages - nr_pages_copied,
-                                      max_pages_per_loop);
+       while (!rc && nr_pages && iov_iter_count(iter)) {
+               int pages = min(nr_pages, max_pages_per_loop);
+               size_t bytes;
 
-               rc = process_vm_rw_pages(task, mm, process_pages, pa,
-                                        start_offset, len,
-                                        lvec, lvec_cnt,
-                                        lvec_current, lvec_offset,
-                                        vm_write, nr_pages_to_copy,
-                                        &bytes_copied_loop);
-               start_offset = 0;
-               *bytes_copied += bytes_copied_loop;
+               /* Get the pages we're interested in */
+               down_read(&mm->mmap_sem);
+               pages = get_user_pages(task, mm, pa, pages,
+                                     vm_write, 0, process_pages, NULL);
+               up_read(&mm->mmap_sem);
 
-               if (rc < 0) {
-                       return rc;
-               } else {
-                       len -= bytes_copied_loop;
-                       nr_pages_copied += nr_pages_to_copy;
-                       pa += nr_pages_to_copy * PAGE_SIZE;
-               }
+               if (pages <= 0)
+                       return -EFAULT;
+
+               bytes = pages * PAGE_SIZE - start_offset;
+               if (bytes > len)
+                       bytes = len;
+
+               rc = process_vm_rw_pages(process_pages,
+                                        start_offset, bytes, iter,
+                                        vm_write);
+               len -= bytes;
+               start_offset = 0;
+               nr_pages -= pages;
+               pa += pages * PAGE_SIZE;
+               while (pages)
+                       put_page(process_pages[--pages]);
        }
 
        return rc;
@@ -228,8 +136,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
 /**
  * process_vm_rw_core - core of reading/writing pages from task specified
  * @pid: PID of process to read/write from/to
- * @lvec: iovec array specifying where to copy to/from locally
- * @liovcnt: size of lvec array
+ * @iter: where to copy to/from locally
  * @rvec: iovec array specifying where to copy to/from in the other process
  * @riovcnt: size of rvec array
  * @flags: currently unused
@@ -238,8 +145,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
  *  return less bytes than expected if an error occurs during the copying
  *  process.
  */
-static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
-                                 unsigned long liovcnt,
+static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter,
                                  const struct iovec *rvec,
                                  unsigned long riovcnt,
                                  unsigned long flags, int vm_write)
@@ -250,13 +156,10 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
        struct mm_struct *mm;
        unsigned long i;
        ssize_t rc = 0;
-       ssize_t bytes_copied_loop;
-       ssize_t bytes_copied = 0;
        unsigned long nr_pages = 0;
        unsigned long nr_pages_iov;
-       unsigned long iov_l_curr_idx = 0;
-       size_t iov_l_curr_offset = 0;
        ssize_t iov_len;
+       size_t total_len = iov_iter_count(iter);
 
        /*
         * Work out how many pages of struct pages we're going to need
@@ -310,24 +213,20 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
                goto put_task_struct;
        }
 
-       for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) {
+       for (i = 0; i < riovcnt && iov_iter_count(iter) && !rc; i++)
                rc = process_vm_rw_single_vec(
                        (unsigned long)rvec[i].iov_base, rvec[i].iov_len,
-                       lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset,
-                       process_pages, mm, task, vm_write, &bytes_copied_loop);
-               bytes_copied += bytes_copied_loop;
-               if (rc != 0) {
-                       /* If we have managed to copy any data at all then
-                          we return the number of bytes copied. Otherwise
-                          we return the error code */
-                       if (bytes_copied)
-                               rc = bytes_copied;
-                       goto put_mm;
-               }
-       }
+                       iter, process_pages, mm, task, vm_write);
+
+       /* copied = space before - space after */
+       total_len -= iov_iter_count(iter);
+
+       /* If we have managed to copy any data at all then
+          we return the number of bytes copied. Otherwise
+          we return the error code */
+       if (total_len)
+               rc = total_len;
 
-       rc = bytes_copied;
-put_mm:
        mmput(mm);
 
 put_task_struct:
@@ -363,6 +262,7 @@ static ssize_t process_vm_rw(pid_t pid,
        struct iovec iovstack_r[UIO_FASTIOV];
        struct iovec *iov_l = iovstack_l;
        struct iovec *iov_r = iovstack_r;
+       struct iov_iter iter;
        ssize_t rc;
 
        if (flags != 0)
@@ -378,13 +278,14 @@ static ssize_t process_vm_rw(pid_t pid,
        if (rc <= 0)
                goto free_iovecs;
 
+       iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+
        rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
                                   iovstack_r, &iov_r);
        if (rc <= 0)
                goto free_iovecs;
 
-       rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags,
-                               vm_write);
+       rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
 
 free_iovecs:
        if (iov_r != iovstack_r)
@@ -424,6 +325,7 @@ compat_process_vm_rw(compat_pid_t pid,
        struct iovec iovstack_r[UIO_FASTIOV];
        struct iovec *iov_l = iovstack_l;
        struct iovec *iov_r = iovstack_r;
+       struct iov_iter iter;
        ssize_t rc = -EFAULT;
 
        if (flags != 0)
@@ -439,14 +341,14 @@ compat_process_vm_rw(compat_pid_t pid,
                                                  &iov_l);
        if (rc <= 0)
                goto free_iovecs;
+       iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
        rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
                                          UIO_FASTIOV, iovstack_r,
                                          &iov_r);
        if (rc <= 0)
                goto free_iovecs;
 
-       rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags,
-                          vm_write);
+       rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
 
 free_iovecs:
        if (iov_r != iovstack_r)
index 70273f8df5867a33aeea7267e4268a949a35aaaa..8f1a95406bae6a61b20be247b50603cdedc30398 100644 (file)
@@ -1402,13 +1402,25 @@ shmem_write_end(struct file *file, struct address_space *mapping,
        return copied;
 }
 
-static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor)
+static ssize_t shmem_file_aio_read(struct kiocb *iocb,
+               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
 {
-       struct inode *inode = file_inode(filp);
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
        struct address_space *mapping = inode->i_mapping;
        pgoff_t index;
        unsigned long offset;
        enum sgp_type sgp = SGP_READ;
+       int error;
+       ssize_t retval;
+       size_t count;
+       loff_t *ppos = &iocb->ki_pos;
+       struct iov_iter iter;
+
+       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
+       if (retval)
+               return retval;
+       iov_iter_init(&iter, iov, nr_segs, count, 0);
 
        /*
         * Might this read be for a stacking filesystem?  Then when reading
@@ -1436,10 +1448,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
                                break;
                }
 
-               desc->error = shmem_getpage(inode, index, &page, sgp, NULL);
-               if (desc->error) {
-                       if (desc->error == -EINVAL)
-                               desc->error = 0;
+               error = shmem_getpage(inode, index, &page, sgp, NULL);
+               if (error) {
+                       if (error == -EINVAL)
+                               error = 0;
                        break;
                }
                if (page)
@@ -1483,61 +1495,26 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
                /*
                 * Ok, we have the page, and it's up-to-date, so
                 * now we can copy it to user space...
-                *
-                * The actor routine returns how many bytes were actually used..
-                * NOTE! This may not be the same as how much of a user buffer
-                * we filled up (we may be padding etc), so we can only update
-                * "pos" here (the actor routine has to update the user buffer
-                * pointers and the remaining count).
                 */
-               ret = actor(desc, page, offset, nr);
+               ret = copy_page_to_iter(page, offset, nr, &iter);
+               retval += ret;
                offset += ret;
                index += offset >> PAGE_CACHE_SHIFT;
                offset &= ~PAGE_CACHE_MASK;
 
                page_cache_release(page);
-               if (ret != nr || !desc->count)
+               if (!iov_iter_count(&iter))
                        break;
-
+               if (ret < nr) {
+                       error = -EFAULT;
+                       break;
+               }
                cond_resched();
        }
 
        *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
-       file_accessed(filp);
-}
-
-static ssize_t shmem_file_aio_read(struct kiocb *iocb,
-               const struct iovec *iov, unsigned long nr_segs, loff_t pos)
-{
-       struct file *filp = iocb->ki_filp;
-       ssize_t retval;
-       unsigned long seg;
-       size_t count;
-       loff_t *ppos = &iocb->ki_pos;
-
-       retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-       if (retval)
-               return retval;
-
-       for (seg = 0; seg < nr_segs; seg++) {
-               read_descriptor_t desc;
-
-               desc.written = 0;
-               desc.arg.buf = iov[seg].iov_base;
-               desc.count = iov[seg].iov_len;
-               if (desc.count == 0)
-                       continue;
-               desc.error = 0;
-               do_shmem_file_read(filp, ppos, &desc, file_read_actor);
-               retval += desc.written;
-               if (desc.error) {
-                       retval = retval ?: desc.error;
-                       break;
-               }
-               if (desc.count > 0)
-                       break;
-       }
-       return retval;
+       file_accessed(file);
+       return retval ? retval : error;
 }
 
 static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
@@ -1576,7 +1553,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
        index = *ppos >> PAGE_CACHE_SHIFT;
        loff = *ppos & ~PAGE_CACHE_MASK;
        req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-       nr_pages = min(req_pages, pipe->buffers);
+       nr_pages = min(req_pages, spd.nr_pages_max);
 
        spd.nr_pages = find_get_pages_contig(mapping, index,
                                                nr_pages, spd.pages);
index babd8626bf968f584153518d7ecb60e9a4616d39..6b540f1822e0b43c175466615ff78db50b0df0f5 100644 (file)
@@ -139,7 +139,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
        int error;
        int size;
 
-       if (!inode->i_op || !inode->i_op->getxattr)
+       if (!inode->i_op->getxattr)
                return -EOPNOTSUPP;
        desc = init_desc(type);
        if (IS_ERR(desc))
index 996092f21b64a4b71ee1a46acc49a94802e7618e..6e0bd933b6a9a8a815f5d57c147f1d18dfbfec36 100644 (file)
@@ -64,7 +64,7 @@ static int evm_find_protected_xattrs(struct dentry *dentry)
        int error;
        int count = 0;
 
-       if (!inode->i_op || !inode->i_op->getxattr)
+       if (!inode->i_op->getxattr)
                return -EOPNOTSUPP;
 
        for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) {
index 80a09c37cac8eba4cc26b121da37d2e03929cb55..a3386d119425eb8367194063470efe017eef622b 100644 (file)
@@ -173,7 +173,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
                 * Use filesystem name if filesystem does not support rename()
                 * operation.
                 */
-               if (inode->i_op && !inode->i_op->rename)
+               if (!inode->i_op->rename)
                        goto prepend_filesystem_name;
        }
        /* Prepend device name. */
@@ -282,7 +282,7 @@ char *tomoyo_realpath_from_path(struct path *path)
                 * Get local name for filesystems without rename() operation
                 * or dentry without vfsmount.
                 */
-               if (!path->mnt || (inode->i_op && !inode->i_op->rename))
+               if (!path->mnt || !inode->i_op->rename)
                        pos = tomoyo_get_local_path(path->dentry, buf,
                                                    buf_len - 1);
                /* Get absolute name for the rest. */