Merge branch 'work.copy_file_range' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Jan 2016 00:30:34 +0000 (16:30 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Jan 2016 00:30:34 +0000 (16:30 -0800)
Pull vfs copy_file_range updates from Al Viro:
 "Several series around copy_file_range/CLONE"

* 'work.copy_file_range' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  btrfs: use new dedupe data function pointer
  vfs: hoist the btrfs deduplication ioctl to the vfs
  vfs: wire up compat ioctl for CLONE/CLONE_RANGE
  cifs: avoid unused variable and label
  nfsd: implement the NFSv4.2 CLONE operation
  nfsd: Pass filehandle to nfs4_preprocess_stateid_op()
  vfs: pull btrfs clone API to vfs layer
  locks: new locks_mandatory_area calling convention
  vfs: Add vfs_copy_file_range() support for pagecache copies
  btrfs: add .copy_file_range file operation
  x86: add sys_copy_file_range to syscall tables
  vfs: add copy_file_range syscall and vfs helper

26 files changed:
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_64.tbl
fs/btrfs/ctree.h
fs/btrfs/file.c
fs/btrfs/ioctl.c
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/ioctl.c
fs/compat_ioctl.c
fs/ioctl.c
fs/locks.c
fs/nfs/nfs4file.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/state.h
fs/nfsd/vfs.c
fs/nfsd/vfs.h
fs/nfsd/xdr4.h
fs/read_write.c
include/linux/fs.h
include/linux/nfs4.h
include/linux/syscalls.h
include/uapi/asm-generic/unistd.h
include/uapi/linux/fs.h
kernel/sys_ni.c

index f17705e1332cc3b81dc9a3a7551ece5d1848d5db..cb713df81180ba91521296b4014b0ad2b74ee7d6 100644 (file)
 374    i386    userfaultfd             sys_userfaultfd
 375    i386    membarrier              sys_membarrier
 376    i386    mlock2                  sys_mlock2
+377    i386    copy_file_range         sys_copy_file_range
index 314a90bfc09c16ab76c5d1451d7d2a4026be946f..dc1040a50bdc21594317f9f9dc4c59196a25ae63 100644 (file)
 323    common  userfaultfd             sys_userfaultfd
 324    common  membarrier              sys_membarrier
 325    common  mlock2                  sys_mlock2
+326    common  copy_file_range         sys_copy_file_range
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
index 35489e7129a7e8de9d0232279d41d3bbd19ae1df..b7e4e344e8e0a510697a56bf553d833c5bcbeb06 100644 (file)
@@ -4024,7 +4024,8 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
                                struct btrfs_ioctl_space_info *space);
 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
                               struct btrfs_ioctl_balance_args *bargs);
-
+ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+                          struct file *dst_file, u64 dst_loff);
 
 /* file.c */
 int btrfs_auto_defrag_init(void);
@@ -4055,6 +4056,11 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
                      loff_t pos, size_t write_bytes,
                      struct extent_state **cached);
 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
+ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
+                             struct file *file_out, loff_t pos_out,
+                             size_t len, unsigned int flags);
+int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
+                          struct file *file_out, loff_t pos_out, u64 len);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
index 0f09526aa7d9d2017bb229eb4310f777c4b3b578..e3d9022bfd4e3c2861008104d13e55050aa91f13 100644 (file)
@@ -2934,6 +2934,9 @@ const struct file_operations btrfs_file_operations = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = btrfs_ioctl,
 #endif
+       .copy_file_range = btrfs_copy_file_range,
+       .clone_file_range = btrfs_clone_file_range,
+       .dedupe_file_range = btrfs_dedupe_file_range,
 };
 
 void btrfs_auto_defrag_exit(void)
index da94138eb85eb3f15f127b08c54a113715956686..e21997385d148c7ede78fd5874e594577ed8a9e9 100644 (file)
@@ -2962,7 +2962,7 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
                flush_dcache_page(dst_page);
 
                if (memcmp(addr, dst_addr, cmp_len))
-                       ret = BTRFS_SAME_DATA_DIFFERS;
+                       ret = -EBADE;
 
                kunmap_atomic(addr);
                kunmap_atomic(dst_addr);
@@ -3098,53 +3098,16 @@ out_unlock:
 
 #define BTRFS_MAX_DEDUPE_LEN   (16 * 1024 * 1024)
 
-static long btrfs_ioctl_file_extent_same(struct file *file,
-                       struct btrfs_ioctl_same_args __user *argp)
+ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+                               struct file *dst_file, u64 dst_loff)
 {
-       struct btrfs_ioctl_same_args *same = NULL;
-       struct btrfs_ioctl_same_extent_info *info;
-       struct inode *src = file_inode(file);
-       u64 off;
-       u64 len;
-       int i;
-       int ret;
-       unsigned long size;
+       struct inode *src = file_inode(src_file);
+       struct inode *dst = file_inode(dst_file);
        u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-       bool is_admin = capable(CAP_SYS_ADMIN);
-       u16 count;
-
-       if (!(file->f_mode & FMODE_READ))
-               return -EINVAL;
-
-       ret = mnt_want_write_file(file);
-       if (ret)
-               return ret;
-
-       if (get_user(count, &argp->dest_count)) {
-               ret = -EFAULT;
-               goto out;
-       }
-
-       size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
-
-       same = memdup_user(argp, size);
-
-       if (IS_ERR(same)) {
-               ret = PTR_ERR(same);
-               same = NULL;
-               goto out;
-       }
+       ssize_t res;
 
-       off = same->logical_offset;
-       len = same->length;
-
-       /*
-        * Limit the total length we will dedupe for each operation.
-        * This is intended to bound the total time spent in this
-        * ioctl to something sane.
-        */
-       if (len > BTRFS_MAX_DEDUPE_LEN)
-               len = BTRFS_MAX_DEDUPE_LEN;
+       if (olen > BTRFS_MAX_DEDUPE_LEN)
+               olen = BTRFS_MAX_DEDUPE_LEN;
 
        if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
                /*
@@ -3152,58 +3115,13 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
                 * result, btrfs_cmp_data() won't correctly handle
                 * this situation without an update.
                 */
-               ret = -EINVAL;
-               goto out;
-       }
-
-       ret = -EISDIR;
-       if (S_ISDIR(src->i_mode))
-               goto out;
-
-       ret = -EACCES;
-       if (!S_ISREG(src->i_mode))
-               goto out;
-
-       /* pre-format output fields to sane values */
-       for (i = 0; i < count; i++) {
-               same->info[i].bytes_deduped = 0ULL;
-               same->info[i].status = 0;
-       }
-
-       for (i = 0, info = same->info; i < count; i++, info++) {
-               struct inode *dst;
-               struct fd dst_file = fdget(info->fd);
-               if (!dst_file.file) {
-                       info->status = -EBADF;
-                       continue;
-               }
-               dst = file_inode(dst_file.file);
-
-               if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
-                       info->status = -EINVAL;
-               } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
-                       info->status = -EXDEV;
-               } else if (S_ISDIR(dst->i_mode)) {
-                       info->status = -EISDIR;
-               } else if (!S_ISREG(dst->i_mode)) {
-                       info->status = -EACCES;
-               } else {
-                       info->status = btrfs_extent_same(src, off, len, dst,
-                                                       info->logical_offset);
-                       if (info->status == 0)
-                               info->bytes_deduped += len;
-               }
-               fdput(dst_file);
+               return -EINVAL;
        }
 
-       ret = copy_to_user(argp, same, size);
-       if (ret)
-               ret = -EFAULT;
-
-out:
-       mnt_drop_write_file(file);
-       kfree(same);
-       return ret;
+       res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
+       if (res)
+               return res;
+       return olen;
 }
 
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
@@ -3779,17 +3697,16 @@ out:
        return ret;
 }
 
-static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
-                                      u64 off, u64 olen, u64 destoff)
+static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
+                                       u64 off, u64 olen, u64 destoff)
 {
        struct inode *inode = file_inode(file);
+       struct inode *src = file_inode(file_src);
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct fd src_file;
-       struct inode *src;
        int ret;
        u64 len = olen;
        u64 bs = root->fs_info->sb->s_blocksize;
-       int same_inode = 0;
+       int same_inode = src == inode;
 
        /*
         * TODO:
@@ -3802,49 +3719,20 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
         *   be either compressed or non-compressed.
         */
 
-       /* the destination must be opened for writing */
-       if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
-               return -EINVAL;
-
        if (btrfs_root_readonly(root))
                return -EROFS;
 
-       ret = mnt_want_write_file(file);
-       if (ret)
-               return ret;
-
-       src_file = fdget(srcfd);
-       if (!src_file.file) {
-               ret = -EBADF;
-               goto out_drop_write;
-       }
-
-       ret = -EXDEV;
-       if (src_file.file->f_path.mnt != file->f_path.mnt)
-               goto out_fput;
-
-       src = file_inode(src_file.file);
-
-       ret = -EINVAL;
-       if (src == inode)
-               same_inode = 1;
-
-       /* the src must be open for reading */
-       if (!(src_file.file->f_mode & FMODE_READ))
-               goto out_fput;
+       if (file_src->f_path.mnt != file->f_path.mnt ||
+           src->i_sb != inode->i_sb)
+               return -EXDEV;
 
        /* don't make the dst file partly checksummed */
        if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
            (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
-               goto out_fput;
+               return -EINVAL;
 
-       ret = -EISDIR;
        if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
-               goto out_fput;
-
-       ret = -EXDEV;
-       if (src->i_sb != inode->i_sb)
-               goto out_fput;
+               return -EISDIR;
 
        if (!same_inode) {
                btrfs_double_inode_lock(src, inode);
@@ -3921,21 +3809,25 @@ out_unlock:
                btrfs_double_inode_unlock(src, inode);
        else
                mutex_unlock(&src->i_mutex);
-out_fput:
-       fdput(src_file);
-out_drop_write:
-       mnt_drop_write_file(file);
        return ret;
 }
 
-static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
+ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
+                             struct file *file_out, loff_t pos_out,
+                             size_t len, unsigned int flags)
 {
-       struct btrfs_ioctl_clone_range_args args;
+       ssize_t ret;
 
-       if (copy_from_user(&args, argp, sizeof(args)))
-               return -EFAULT;
-       return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
-                                args.src_length, args.dest_offset);
+       ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
+       if (ret == 0)
+               ret = len;
+       return ret;
+}
+
+int btrfs_clone_file_range(struct file *src_file, loff_t off,
+               struct file *dst_file, loff_t destoff, u64 len)
+{
+       return btrfs_clone_files(dst_file, src_file, off, len, destoff);
 }
 
 /*
@@ -5485,10 +5377,6 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_dev_info(root, argp);
        case BTRFS_IOC_BALANCE:
                return btrfs_ioctl_balance(file, NULL);
-       case BTRFS_IOC_CLONE:
-               return btrfs_ioctl_clone(file, arg, 0, 0, 0);
-       case BTRFS_IOC_CLONE_RANGE:
-               return btrfs_ioctl_clone_range(file, argp);
        case BTRFS_IOC_TRANS_START:
                return btrfs_ioctl_trans_start(file);
        case BTRFS_IOC_TRANS_END:
@@ -5566,8 +5454,6 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_get_fslabel(file, argp);
        case BTRFS_IOC_SET_FSLABEL:
                return btrfs_ioctl_set_fslabel(file, argp);
-       case BTRFS_IOC_FILE_EXTENT_SAME:
-               return btrfs_ioctl_file_extent_same(file, argp);
        case BTRFS_IOC_GET_SUPPORTED_FEATURES:
                return btrfs_ioctl_get_supported_features(file, argp);
        case BTRFS_IOC_GET_FEATURES:
index 90e4e2b398b66b08c9a35ed3448f34c91fbcb1df..b7fcb3151103cefcc02ecaaf042b59e2b2ff9454 100644 (file)
@@ -913,6 +913,59 @@ const struct inode_operations cifs_symlink_inode_ops = {
 #endif
 };
 
+static int cifs_clone_file_range(struct file *src_file, loff_t off,
+               struct file *dst_file, loff_t destoff, u64 len)
+{
+       struct inode *src_inode = file_inode(src_file);
+       struct inode *target_inode = file_inode(dst_file);
+       struct cifsFileInfo *smb_file_src = src_file->private_data;
+       struct cifsFileInfo *smb_file_target = dst_file->private_data;
+       struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
+       unsigned int xid;
+       int rc;
+
+       cifs_dbg(FYI, "clone range\n");
+
+       xid = get_xid();
+
+       if (!src_file->private_data || !dst_file->private_data) {
+               rc = -EBADF;
+               cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
+               goto out;
+       }
+
+       /*
+        * Note: cifs case is easier than btrfs since server responsible for
+        * checks for proper open modes and file type and if it wants
+        * server could even support copy of range where source = target
+        */
+       lock_two_nondirectories(target_inode, src_inode);
+
+       if (len == 0)
+               len = src_inode->i_size - off;
+
+       cifs_dbg(FYI, "about to flush pages\n");
+       /* should we flush first and last page first */
+       truncate_inode_pages_range(&target_inode->i_data, destoff,
+                                  PAGE_CACHE_ALIGN(destoff + len)-1);
+
+       if (target_tcon->ses->server->ops->duplicate_extents)
+               rc = target_tcon->ses->server->ops->duplicate_extents(xid,
+                       smb_file_src, smb_file_target, off, len, destoff);
+       else
+               rc = -EOPNOTSUPP;
+
+       /* force revalidate of size and timestamps of target file now
+          that target is updated on the server */
+       CIFS_I(target_inode)->time = 0;
+       /* although unlocking in the reverse order from locking is not
+          strictly necessary here it is a little cleaner to be consistent */
+       unlock_two_nondirectories(src_inode, target_inode);
+out:
+       free_xid(xid);
+       return rc;
+}
+
 const struct file_operations cifs_file_ops = {
        .read_iter = cifs_loose_read_iter,
        .write_iter = cifs_file_write_iter,
@@ -925,6 +978,7 @@ const struct file_operations cifs_file_ops = {
        .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
+       .clone_file_range = cifs_clone_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
@@ -941,6 +995,8 @@ const struct file_operations cifs_file_strict_ops = {
        .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
+       .clone_file_range = cifs_clone_file_range,
+       .clone_file_range = cifs_clone_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
@@ -957,6 +1013,7 @@ const struct file_operations cifs_file_direct_ops = {
        .mmap = cifs_file_mmap,
        .splice_read = generic_file_splice_read,
        .unlocked_ioctl  = cifs_ioctl,
+       .clone_file_range = cifs_clone_file_range,
        .llseek = cifs_llseek,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
@@ -973,6 +1030,7 @@ const struct file_operations cifs_file_nobrl_ops = {
        .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
+       .clone_file_range = cifs_clone_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
@@ -988,6 +1046,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
        .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
+       .clone_file_range = cifs_clone_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
@@ -1003,6 +1062,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
        .mmap = cifs_file_mmap,
        .splice_read = generic_file_splice_read,
        .unlocked_ioctl  = cifs_ioctl,
+       .clone_file_range = cifs_clone_file_range,
        .llseek = cifs_llseek,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
@@ -1013,6 +1073,7 @@ const struct file_operations cifs_dir_ops = {
        .release = cifs_closedir,
        .read    = generic_read_dir,
        .unlocked_ioctl  = cifs_ioctl,
+       .clone_file_range = cifs_clone_file_range,
        .llseek = generic_file_llseek,
 };
 
index 26a1187d4323f227ca89072504d94619d8eb5df5..68c4547528c486f2477a8177692374dd6033e2bf 100644 (file)
@@ -130,7 +130,6 @@ extern int  cifs_setxattr(struct dentry *, const char *, const void *,
 extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
 extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
 extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
-
 #ifdef CONFIG_CIFS_NFSD_EXPORT
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
index 35cf990f87d3245d01662cc550f4f3b43cc2dc58..7a3b84e300f8978b80baf43d834feb7fab5d0cfa 100644 (file)
 #include "cifs_ioctl.h"
 #include <linux/btrfs.h>
 
-static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
-                       unsigned long srcfd, u64 off, u64 len, u64 destoff,
-                       bool dup_extents)
+static int cifs_file_clone_range(unsigned int xid, struct file *src_file,
+                         struct file *dst_file)
 {
-       int rc;
-       struct cifsFileInfo *smb_file_target = dst_file->private_data;
+       struct inode *src_inode = file_inode(src_file);
        struct inode *target_inode = file_inode(dst_file);
-       struct cifs_tcon *target_tcon;
-       struct fd src_file;
        struct cifsFileInfo *smb_file_src;
-       struct inode *src_inode;
+       struct cifsFileInfo *smb_file_target;
        struct cifs_tcon *src_tcon;
+       struct cifs_tcon *target_tcon;
+       int rc;
 
        cifs_dbg(FYI, "ioctl clone range\n");
-       /* the destination must be opened for writing */
-       if (!(dst_file->f_mode & FMODE_WRITE)) {
-               cifs_dbg(FYI, "file target not open for write\n");
-               return -EINVAL;
-       }
 
-       /* check if target volume is readonly and take reference */
-       rc = mnt_want_write_file(dst_file);
-       if (rc) {
-               cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
-               return rc;
-       }
-
-       src_file = fdget(srcfd);
-       if (!src_file.file) {
-               rc = -EBADF;
-               goto out_drop_write;
-       }
-
-       if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
-               rc = -EBADF;
-               cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
-               goto out_fput;
-       }
-
-       if ((!src_file.file->private_data) || (!dst_file->private_data)) {
+       if (!src_file->private_data || !dst_file->private_data) {
                rc = -EBADF;
                cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
-               goto out_fput;
+               goto out;
        }
 
        rc = -EXDEV;
        smb_file_target = dst_file->private_data;
-       smb_file_src = src_file.file->private_data;
+       smb_file_src = src_file->private_data;
        src_tcon = tlink_tcon(smb_file_src->tlink);
        target_tcon = tlink_tcon(smb_file_target->tlink);
 
-       /* check source and target on same server (or volume if dup_extents) */
-       if (dup_extents && (src_tcon != target_tcon)) {
-               cifs_dbg(VFS, "source and target of copy not on same share\n");
-               goto out_fput;
-       }
-
-       if (!dup_extents && (src_tcon->ses != target_tcon->ses)) {
+       if (src_tcon->ses != target_tcon->ses) {
                cifs_dbg(VFS, "source and target of copy not on same server\n");
-               goto out_fput;
+               goto out;
        }
 
-       src_inode = file_inode(src_file.file);
-       rc = -EINVAL;
-       if (S_ISDIR(src_inode->i_mode))
-               goto out_fput;
-
        /*
         * Note: cifs case is easier than btrfs since server responsible for
         * checks for proper open modes and file type and if it wants
@@ -108,34 +71,66 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
         */
        lock_two_nondirectories(target_inode, src_inode);
 
-       /* determine range to clone */
-       rc = -EINVAL;
-       if (off + len > src_inode->i_size || off + len < off)
-               goto out_unlock;
-       if (len == 0)
-               len = src_inode->i_size - off;
-
        cifs_dbg(FYI, "about to flush pages\n");
        /* should we flush first and last page first */
-       truncate_inode_pages_range(&target_inode->i_data, destoff,
-                                  PAGE_CACHE_ALIGN(destoff + len)-1);
+       truncate_inode_pages(&target_inode->i_data, 0);
 
-       if (dup_extents && target_tcon->ses->server->ops->duplicate_extents)
-               rc = target_tcon->ses->server->ops->duplicate_extents(xid,
-                       smb_file_src, smb_file_target, off, len, destoff);
-       else if (!dup_extents && target_tcon->ses->server->ops->clone_range)
+       if (target_tcon->ses->server->ops->clone_range)
                rc = target_tcon->ses->server->ops->clone_range(xid,
-                       smb_file_src, smb_file_target, off, len, destoff);
+                       smb_file_src, smb_file_target, 0, src_inode->i_size, 0);
        else
                rc = -EOPNOTSUPP;
 
        /* force revalidate of size and timestamps of target file now
           that target is updated on the server */
        CIFS_I(target_inode)->time = 0;
-out_unlock:
        /* although unlocking in the reverse order from locking is not
           strictly necessary here it is a little cleaner to be consistent */
        unlock_two_nondirectories(src_inode, target_inode);
+out:
+       return rc;
+}
+
+static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
+                       unsigned long srcfd)
+{
+       int rc;
+       struct fd src_file;
+       struct inode *src_inode;
+
+       cifs_dbg(FYI, "ioctl clone range\n");
+       /* the destination must be opened for writing */
+       if (!(dst_file->f_mode & FMODE_WRITE)) {
+               cifs_dbg(FYI, "file target not open for write\n");
+               return -EINVAL;
+       }
+
+       /* check if target volume is readonly and take reference */
+       rc = mnt_want_write_file(dst_file);
+       if (rc) {
+               cifs_dbg(FYI, "mnt_want_write failed with rc %d\n", rc);
+               return rc;
+       }
+
+       src_file = fdget(srcfd);
+       if (!src_file.file) {
+               rc = -EBADF;
+               goto out_drop_write;
+       }
+
+       if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
+               rc = -EBADF;
+               cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
+               goto out_fput;
+       }
+
+       src_inode = file_inode(src_file.file);
+       rc = -EINVAL;
+       if (S_ISDIR(src_inode->i_mode))
+               goto out_fput;
+
+       rc = cifs_file_clone_range(xid, src_file.file, dst_file);
+
 out_fput:
        fdput(src_file);
 out_drop_write:
@@ -256,10 +251,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                        }
                        break;
                case CIFS_IOC_COPYCHUNK_FILE:
-                       rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, false);
-                       break;
-               case BTRFS_IOC_CLONE:
-                       rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0, true);
+                       rc = cifs_ioctl_clone(xid, filep, arg);
                        break;
                case CIFS_IOC_SET_INTEGRITY:
                        if (pSMBFile == NULL)
index 9144b779d10ef454d0f42bb15f7560878265b5ba..647ee0b03dc0082f9cc5ff162f307a6b82c78844 100644 (file)
@@ -1601,6 +1601,11 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
                goto out_fput;
 #endif
 
+       case FICLONE:
+       case FICLONERANGE:
+       case FIDEDUPERANGE:
+               goto do_ioctl;
+
        case FIBMAP:
        case FIGETBSZ:
        case FIONREAD:
index 41c352e8119381dc646e94e89c18becd40206ec8..29466c380958ac411f4dcd46706837f2c55b03cd 100644 (file)
@@ -215,6 +215,29 @@ static int ioctl_fiemap(struct file *filp, unsigned long arg)
        return error;
 }
 
+static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
+                            u64 off, u64 olen, u64 destoff)
+{
+       struct fd src_file = fdget(srcfd);
+       int ret;
+
+       if (!src_file.file)
+               return -EBADF;
+       ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+       fdput(src_file);
+       return ret;
+}
+
+static long ioctl_file_clone_range(struct file *file, void __user *argp)
+{
+       struct file_clone_range args;
+
+       if (copy_from_user(&args, argp, sizeof(args)))
+               return -EFAULT;
+       return ioctl_file_clone(file, args.src_fd, args.src_offset,
+                               args.src_length, args.dest_offset);
+}
+
 #ifdef CONFIG_BLOCK
 
 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -545,6 +568,41 @@ static int ioctl_fsthaw(struct file *filp)
        return thaw_super(sb);
 }
 
+static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+{
+       struct file_dedupe_range __user *argp = arg;
+       struct file_dedupe_range *same = NULL;
+       int ret;
+       unsigned long size;
+       u16 count;
+
+       if (get_user(count, &argp->dest_count)) {
+               ret = -EFAULT;
+               goto out;
+       }
+
+       size = offsetof(struct file_dedupe_range __user, info[count]);
+
+       same = memdup_user(argp, size);
+       if (IS_ERR(same)) {
+               ret = PTR_ERR(same);
+               same = NULL;
+               goto out;
+       }
+
+       ret = vfs_dedupe_file_range(file, same);
+       if (ret)
+               goto out;
+
+       ret = copy_to_user(argp, same, size);
+       if (ret)
+               ret = -EFAULT;
+
+out:
+       kfree(same);
+       return ret;
+}
+
 /*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
@@ -600,6 +658,15 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
        case FIGETBSZ:
                return put_user(inode->i_sb->s_blocksize, argp);
 
+       case FICLONE:
+               return ioctl_file_clone(filp, arg, 0, 0, 0);
+
+       case FICLONERANGE:
+               return ioctl_file_clone_range(filp, argp);
+
+       case FIDEDUPERANGE:
+               return ioctl_file_dedupe_range(filp, argp);
+
        default:
                if (S_ISREG(inode->i_mode))
                        error = file_ioctl(filp, cmd, arg);
index a91f4ab00a90372d8ad9fb6c18fa9bb1f9e822dd..af1ed74a657fbc93f64386bcad54d9826bc34684 100644 (file)
@@ -1258,20 +1258,16 @@ int locks_mandatory_locked(struct file *file)
 
 /**
  * locks_mandatory_area - Check for a conflicting lock
- * @read_write: %FLOCK_VERIFY_WRITE for exclusive access, %FLOCK_VERIFY_READ
- *             for shared
- * @inode:      the file to check
+ * @inode:     the file to check
  * @filp:       how the file was opened (if it was)
- * @offset:     start of area to check
- * @count:      length of area to check
+ * @start:     first byte in the file to check
+ * @end:       lastbyte in the file to check
+ * @type:      %F_WRLCK for a write lock, else %F_RDLCK
  *
  * Searches the inode's list of locks to find any POSIX locks which conflict.
- * This function is called from rw_verify_area() and
- * locks_verify_truncate().
  */
-int locks_mandatory_area(int read_write, struct inode *inode,
-                        struct file *filp, loff_t offset,
-                        size_t count)
+int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
+                        loff_t end, unsigned char type)
 {
        struct file_lock fl;
        int error;
@@ -1283,9 +1279,9 @@ int locks_mandatory_area(int read_write, struct inode *inode,
        fl.fl_flags = FL_POSIX | FL_ACCESS;
        if (filp && !(filp->f_flags & O_NONBLOCK))
                sleep = true;
-       fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;
-       fl.fl_start = offset;
-       fl.fl_end = offset + count - 1;
+       fl.fl_type = type;
+       fl.fl_start = start;
+       fl.fl_end = end;
 
        for (;;) {
                if (filp) {
index db9b5fea5b3ef12f6eacf36cd2154c2db7d4d9f2..26f9a23e2b254998d6b0c053e2e0892d2cd16050 100644 (file)
@@ -195,65 +195,27 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
        return nfs42_proc_allocate(filep, offset, len);
 }
 
-static noinline long
-nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
-                 u64 src_off, u64 dst_off, u64 count)
+static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
+               struct file *dst_file, loff_t dst_off, u64 count)
 {
        struct inode *dst_inode = file_inode(dst_file);
        struct nfs_server *server = NFS_SERVER(dst_inode);
-       struct fd src_file;
-       struct inode *src_inode;
+       struct inode *src_inode = file_inode(src_file);
        unsigned int bs = server->clone_blksize;
        bool same_inode = false;
        int ret;
 
-       /* dst file must be opened for writing */
-       if (!(dst_file->f_mode & FMODE_WRITE))
-               return -EINVAL;
-
-       ret = mnt_want_write_file(dst_file);
-       if (ret)
-               return ret;
-
-       src_file = fdget(srcfd);
-       if (!src_file.file) {
-               ret = -EBADF;
-               goto out_drop_write;
-       }
-
-       src_inode = file_inode(src_file.file);
-
-       if (src_inode == dst_inode)
-               same_inode = true;
-
-       /* src file must be opened for reading */
-       if (!(src_file.file->f_mode & FMODE_READ))
-               goto out_fput;
-
-       /* src and dst must be regular files */
-       ret = -EISDIR;
-       if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode))
-               goto out_fput;
-
-       ret = -EXDEV;
-       if (src_file.file->f_path.mnt != dst_file->f_path.mnt ||
-           src_inode->i_sb != dst_inode->i_sb)
-               goto out_fput;
-
        /* check alignment w.r.t. clone_blksize */
        ret = -EINVAL;
        if (bs) {
                if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs))
-                       goto out_fput;
+                       goto out;
                if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count))
-                       goto out_fput;
+                       goto out;
        }
 
-       /* verify if ranges are overlapped within the same file */
-       if (same_inode) {
-               if (dst_off + count > src_off && dst_off < src_off + count)
-                       goto out_fput;
-       }
+       if (src_inode == dst_inode)
+               same_inode = true;
 
        /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
        if (same_inode) {
@@ -275,7 +237,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
        if (ret)
                goto out_unlock;
 
-       ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count);
+       ret = nfs42_proc_clone(src_file, dst_file, src_off, dst_off, count);
 
        /* truncate inode page cache of the dst range so that future reads can fetch
         * new data from server */
@@ -292,37 +254,9 @@ out_unlock:
                mutex_unlock(&dst_inode->i_mutex);
                mutex_unlock(&src_inode->i_mutex);
        }
-out_fput:
-       fdput(src_file);
-out_drop_write:
-       mnt_drop_write_file(dst_file);
+out:
        return ret;
 }
-
-static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
-{
-       struct btrfs_ioctl_clone_range_args args;
-
-       if (copy_from_user(&args, argp, sizeof(args)))
-               return -EFAULT;
-
-       return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset,
-                                args.dest_offset, args.src_length);
-}
-
-long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-       void __user *argp = (void __user *)arg;
-
-       switch (cmd) {
-       case BTRFS_IOC_CLONE:
-               return nfs42_ioctl_clone(file, arg, 0, 0, 0);
-       case BTRFS_IOC_CLONE_RANGE:
-               return nfs42_ioctl_clone_range(file, argp);
-       }
-
-       return -ENOTTY;
-}
 #endif /* CONFIG_NFS_V4_2 */
 
 const struct file_operations nfs4_file_operations = {
@@ -342,8 +276,7 @@ const struct file_operations nfs4_file_operations = {
 #ifdef CONFIG_NFS_V4_2
        .llseek         = nfs4_file_llseek,
        .fallocate      = nfs42_fallocate,
-       .unlocked_ioctl = nfs4_ioctl,
-       .compat_ioctl   = nfs4_ioctl,
+       .clone_file_range = nfs42_clone_file_range,
 #else
        .llseek         = nfs_file_llseek,
 #endif
index a9f096c7e99f5dd36b40fac5bad04300e3b7f4e8..819ad812c71b903200784f7c65622ff07ce59d72 100644 (file)
@@ -774,8 +774,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 
        /* check stateid */
-       status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
-                       RD_STATE, &read->rd_filp, &read->rd_tmp_file);
+       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+                                       &read->rd_stateid, RD_STATE,
+                                       &read->rd_filp, &read->rd_tmp_file);
        if (status) {
                dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
                goto out;
@@ -921,7 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
        if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
                status = nfs4_preprocess_stateid_op(rqstp, cstate,
-                       &setattr->sa_stateid, WR_STATE, NULL, NULL);
+                               &cstate->current_fh, &setattr->sa_stateid,
+                               WR_STATE, NULL, NULL);
                if (status) {
                        dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
                        return status;
@@ -985,8 +987,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (write->wr_offset >= OFFSET_MAX)
                return nfserr_inval;
 
-       status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
-                       &filp, NULL);
+       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+                                               stateid, WR_STATE, &filp, NULL);
        if (status) {
                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
                return status;
@@ -1009,6 +1011,47 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        return status;
 }
 
+static __be32
+nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+               struct nfsd4_clone *clone)
+{
+       struct file *src, *dst;
+       __be32 status;
+
+       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
+                                           &clone->cl_src_stateid, RD_STATE,
+                                           &src, NULL);
+       if (status) {
+               dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
+               goto out;
+       }
+
+       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+                                           &clone->cl_dst_stateid, WR_STATE,
+                                           &dst, NULL);
+       if (status) {
+               dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
+               goto out_put_src;
+       }
+
+       /* fix up for NFS-specific error code */
+       if (!S_ISREG(file_inode(src)->i_mode) ||
+           !S_ISREG(file_inode(dst)->i_mode)) {
+               status = nfserr_wrong_type;
+               goto out_put_dst;
+       }
+
+       status = nfsd4_clone_file_range(src, clone->cl_src_pos,
+                       dst, clone->cl_dst_pos, clone->cl_count);
+
+out_put_dst:
+       fput(dst);
+out_put_src:
+       fput(src);
+out:
+       return status;
+}
+
 static __be32
 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                struct nfsd4_fallocate *fallocate, int flags)
@@ -1016,7 +1059,7 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        __be32 status = nfserr_notsupp;
        struct file *file;
 
-       status = nfs4_preprocess_stateid_op(rqstp, cstate,
+       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                            &fallocate->falloc_stateid,
                                            WR_STATE, &file, NULL);
        if (status != nfs_ok) {
@@ -1055,7 +1098,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        __be32 status;
        struct file *file;
 
-       status = nfs4_preprocess_stateid_op(rqstp, cstate,
+       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
                                            &seek->seek_stateid,
                                            RD_STATE, &file, NULL);
        if (status) {
@@ -2279,6 +2322,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_name = "OP_DEALLOCATE",
                .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
+       [OP_CLONE] = {
+               .op_func = (nfsd4op_func)nfsd4_clone,
+               .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+               .op_name = "OP_CLONE",
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+       },
        [OP_SEEK] = {
                .op_func = (nfsd4op_func)nfsd4_seek,
                .op_name = "OP_SEEK",
index 6b800b5b8fedb5d60c8bf1a8d1ca6d35f3d54114..df5dba6872655e699ec950ad2e0604adbab70bd6 100644 (file)
@@ -4797,10 +4797,9 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
  */
 __be32
 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
-               struct nfsd4_compound_state *cstate, stateid_t *stateid,
-               int flags, struct file **filpp, bool *tmp_file)
+               struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
+               stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
 {
-       struct svc_fh *fhp = &cstate->current_fh;
        struct inode *ino = d_inode(fhp->fh_dentry);
        struct net *net = SVC_NET(rqstp);
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
index 51c9e9ca39a4d7c5e0537a2cafed4ec9caeb789d..924416f91fdd95c5d7e80e1302a27fce57920084 100644 (file)
@@ -1674,6 +1674,25 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
        DECODE_TAIL;
 }
 
+static __be32
+nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
+{
+       DECODE_HEAD;
+
+       status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid);
+       if (status)
+               return status;
+       status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid);
+       if (status)
+               return status;
+
+       READ_BUF(8 + 8 + 8);
+       p = xdr_decode_hyper(p, &clone->cl_src_pos);
+       p = xdr_decode_hyper(p, &clone->cl_dst_pos);
+       p = xdr_decode_hyper(p, &clone->cl_count);
+       DECODE_TAIL;
+}
+
 static __be32
 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
 {
@@ -1785,6 +1804,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
        [OP_READ_PLUS]          = (nfsd4_dec)nfsd4_decode_notsupp,
        [OP_SEEK]               = (nfsd4_dec)nfsd4_decode_seek,
        [OP_WRITE_SAME]         = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_CLONE]              = (nfsd4_dec)nfsd4_decode_clone,
 };
 
 static inline bool
@@ -4292,6 +4312,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
        [OP_READ_PLUS]          = (nfsd4_enc)nfsd4_encode_noop,
        [OP_SEEK]               = (nfsd4_enc)nfsd4_encode_seek,
        [OP_WRITE_SAME]         = (nfsd4_enc)nfsd4_encode_noop,
+       [OP_CLONE]              = (nfsd4_enc)nfsd4_encode_noop,
 };
 
 /*
index 77fdf4de91baa102dbefd2f15c5314be40aff162..99432b7ecb9c38f9338d9def8994fc6728ef2350 100644 (file)
@@ -578,8 +578,8 @@ struct nfsd4_compound_state;
 struct nfsd_net;
 
 extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
-               struct nfsd4_compound_state *cstate, stateid_t *stateid,
-               int flags, struct file **filp, bool *tmp_file);
+               struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
+               stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
 __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
                     stateid_t *stateid, unsigned char typemask,
                     struct nfs4_stid **s, struct nfsd_net *nn);
index 994d66fbb4467ac8892a4d2ae0e5e9fda2a05722..5411bf09b810b25e6ae7aae0f14406a0da6a7758 100644 (file)
@@ -36,6 +36,7 @@
 #endif /* CONFIG_NFSD_V3 */
 
 #ifdef CONFIG_NFSD_V4
+#include "../internal.h"
 #include "acl.h"
 #include "idmap.h"
 #endif /* CONFIG_NFSD_V4 */
@@ -498,6 +499,13 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 }
 #endif
 
+__be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
+               u64 dst_pos, u64 count)
+{
+       return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
+                       count));
+}
+
 __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
                           struct file *file, loff_t offset, loff_t len,
                           int flags)
index fcfc48cbe1360822896d479c3e0a6bf8997c147c..c11ba316f23f9b36b6ff57f7d9e5be94a0771962 100644 (file)
@@ -56,6 +56,8 @@ __be32          nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *,
                    struct xdr_netobj *);
 __be32         nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
                                    struct file *, loff_t, loff_t, int);
+__be32         nfsd4_clone_file_range(struct file *, u64, struct file *,
+                       u64, u64);
 #endif /* CONFIG_NFSD_V4 */
 __be32         nfsd_create(struct svc_rqst *, struct svc_fh *,
                                char *name, int len, struct iattr *attrs,
index ce7362c88b4875958201b5c8db9e35f3347dd4a8..d9554813e58afaa15d8e94292e776ea7b7f27f01 100644 (file)
@@ -491,6 +491,15 @@ struct nfsd4_fallocate {
        u64             falloc_length;
 };
 
+struct nfsd4_clone {
+       /* request */
+       stateid_t       cl_src_stateid;
+       stateid_t       cl_dst_stateid;
+       u64             cl_src_pos;
+       u64             cl_dst_pos;
+       u64             cl_count;
+};
+
 struct nfsd4_seek {
        /* request */
        stateid_t       seek_stateid;
@@ -555,6 +564,7 @@ struct nfsd4_op {
                /* NFSv4.2 */
                struct nfsd4_fallocate          allocate;
                struct nfsd4_fallocate          deallocate;
+               struct nfsd4_clone              clone;
                struct nfsd4_seek               seek;
        } u;
        struct nfs4_replay *                    replay;
index 819ef3faf1bb710678175de06a13f4dcf6e90d62..2116e74a83d3f34e94335e0ab2530c27ef9b972d 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/pagemap.h>
 #include <linux/splice.h>
 #include <linux/compat.h>
+#include <linux/mount.h>
 #include "internal.h"
 
 #include <asm/uaccess.h>
@@ -395,9 +396,8 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
        }
 
        if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
-               retval = locks_mandatory_area(
-                       read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
-                       inode, file, pos, count);
+               retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
+                               read_write == READ ? F_RDLCK : F_WRLCK);
                if (retval < 0)
                        return retval;
        }
@@ -1327,3 +1327,299 @@ COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
        return do_sendfile(out_fd, in_fd, NULL, count, 0);
 }
 #endif
+
+/*
+ * copy_file_range() differs from regular file read and write in that it
+ * specifically allows return partial success.  When it does so is up to
+ * the copy_file_range method.
+ */
+ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
+                           struct file *file_out, loff_t pos_out,
+                           size_t len, unsigned int flags)
+{
+       struct inode *inode_in = file_inode(file_in);
+       struct inode *inode_out = file_inode(file_out);
+       ssize_t ret;
+
+       if (flags != 0)
+               return -EINVAL;
+
+       /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT  */
+       ret = rw_verify_area(READ, file_in, &pos_in, len);
+       if (ret >= 0)
+               ret = rw_verify_area(WRITE, file_out, &pos_out, len);
+       if (ret < 0)
+               return ret;
+
+       if (!(file_in->f_mode & FMODE_READ) ||
+           !(file_out->f_mode & FMODE_WRITE) ||
+           (file_out->f_flags & O_APPEND))
+               return -EBADF;
+
+       /* this could be relaxed once a method supports cross-fs copies */
+       if (inode_in->i_sb != inode_out->i_sb)
+               return -EXDEV;
+
+       if (len == 0)
+               return 0;
+
+       ret = mnt_want_write_file(file_out);
+       if (ret)
+               return ret;
+
+       ret = -EOPNOTSUPP;
+       if (file_out->f_op->copy_file_range)
+               ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
+                                                     pos_out, len, flags);
+       if (ret == -EOPNOTSUPP)
+               ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
+                               len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
+
+       if (ret > 0) {
+               fsnotify_access(file_in);
+               add_rchar(current, ret);
+               fsnotify_modify(file_out);
+               add_wchar(current, ret);
+       }
+       inc_syscr(current);
+       inc_syscw(current);
+
+       mnt_drop_write_file(file_out);
+
+       return ret;
+}
+EXPORT_SYMBOL(vfs_copy_file_range);
+
+SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
+               int, fd_out, loff_t __user *, off_out,
+               size_t, len, unsigned int, flags)
+{
+       loff_t pos_in;
+       loff_t pos_out;
+       struct fd f_in;
+       struct fd f_out;
+       ssize_t ret = -EBADF;
+
+       f_in = fdget(fd_in);
+       if (!f_in.file)
+               goto out2;
+
+       f_out = fdget(fd_out);
+       if (!f_out.file)
+               goto out1;
+
+       ret = -EFAULT;
+       if (off_in) {
+               if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
+                       goto out;
+       } else {
+               pos_in = f_in.file->f_pos;
+       }
+
+       if (off_out) {
+               if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
+                       goto out;
+       } else {
+               pos_out = f_out.file->f_pos;
+       }
+
+       ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
+                                 flags);
+       if (ret > 0) {
+               pos_in += ret;
+               pos_out += ret;
+
+               if (off_in) {
+                       if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
+                               ret = -EFAULT;
+               } else {
+                       f_in.file->f_pos = pos_in;
+               }
+
+               if (off_out) {
+                       if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
+                               ret = -EFAULT;
+               } else {
+                       f_out.file->f_pos = pos_out;
+               }
+       }
+
+out:
+       fdput(f_out);
+out1:
+       fdput(f_in);
+out2:
+       return ret;
+}
+
+static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+{
+       struct inode *inode = file_inode(file);
+
+       if (unlikely(pos < 0))
+               return -EINVAL;
+
+        if (unlikely((loff_t) (pos + len) < 0))
+               return -EINVAL;
+
+       if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
+               loff_t end = len ? pos + len - 1 : OFFSET_MAX;
+               int retval;
+
+               retval = locks_mandatory_area(inode, file, pos, end,
+                               write ? F_WRLCK : F_RDLCK);
+               if (retval < 0)
+                       return retval;
+       }
+
+       return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
+}
+
+int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+               struct file *file_out, loff_t pos_out, u64 len)
+{
+       struct inode *inode_in = file_inode(file_in);
+       struct inode *inode_out = file_inode(file_out);
+       int ret;
+
+       if (inode_in->i_sb != inode_out->i_sb ||
+           file_in->f_path.mnt != file_out->f_path.mnt)
+               return -EXDEV;
+
+       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+               return -EISDIR;
+       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+               return -EINVAL;
+
+       if (!(file_in->f_mode & FMODE_READ) ||
+           !(file_out->f_mode & FMODE_WRITE) ||
+           (file_out->f_flags & O_APPEND) ||
+           !file_in->f_op->clone_file_range)
+               return -EBADF;
+
+       ret = clone_verify_area(file_in, pos_in, len, false);
+       if (ret)
+               return ret;
+
+       ret = clone_verify_area(file_out, pos_out, len, true);
+       if (ret)
+               return ret;
+
+       if (pos_in + len > i_size_read(inode_in))
+               return -EINVAL;
+
+       ret = mnt_want_write_file(file_out);
+       if (ret)
+               return ret;
+
+       ret = file_in->f_op->clone_file_range(file_in, pos_in,
+                       file_out, pos_out, len);
+       if (!ret) {
+               fsnotify_access(file_in);
+               fsnotify_modify(file_out);
+       }
+
+       mnt_drop_write_file(file_out);
+       return ret;
+}
+EXPORT_SYMBOL(vfs_clone_file_range);
+
+int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
+{
+       struct file_dedupe_range_info *info;
+       struct inode *src = file_inode(file);
+       u64 off;
+       u64 len;
+       int i;
+       int ret;
+       bool is_admin = capable(CAP_SYS_ADMIN);
+       u16 count = same->dest_count;
+       struct file *dst_file;
+       loff_t dst_off;
+       ssize_t deduped;
+
+       if (!(file->f_mode & FMODE_READ))
+               return -EINVAL;
+
+       if (same->reserved1 || same->reserved2)
+               return -EINVAL;
+
+       off = same->src_offset;
+       len = same->src_length;
+
+       ret = -EISDIR;
+       if (S_ISDIR(src->i_mode))
+               goto out;
+
+       ret = -EINVAL;
+       if (!S_ISREG(src->i_mode))
+               goto out;
+
+       ret = clone_verify_area(file, off, len, false);
+       if (ret < 0)
+               goto out;
+       ret = 0;
+
+       /* pre-format output fields to sane values */
+       for (i = 0; i < count; i++) {
+               same->info[i].bytes_deduped = 0ULL;
+               same->info[i].status = FILE_DEDUPE_RANGE_SAME;
+       }
+
+       for (i = 0, info = same->info; i < count; i++, info++) {
+               struct inode *dst;
+               struct fd dst_fd = fdget(info->dest_fd);
+
+               dst_file = dst_fd.file;
+               if (!dst_file) {
+                       info->status = -EBADF;
+                       goto next_loop;
+               }
+               dst = file_inode(dst_file);
+
+               ret = mnt_want_write_file(dst_file);
+               if (ret) {
+                       info->status = ret;
+                       goto next_loop;
+               }
+
+               dst_off = info->dest_offset;
+               ret = clone_verify_area(dst_file, dst_off, len, true);
+               if (ret < 0) {
+                       info->status = ret;
+                       goto next_file;
+               }
+               ret = 0;
+
+               if (info->reserved) {
+                       info->status = -EINVAL;
+               } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
+                       info->status = -EINVAL;
+               } else if (file->f_path.mnt != dst_file->f_path.mnt) {
+                       info->status = -EXDEV;
+               } else if (S_ISDIR(dst->i_mode)) {
+                       info->status = -EISDIR;
+               } else if (dst_file->f_op->dedupe_file_range == NULL) {
+                       info->status = -EINVAL;
+               } else {
+                       deduped = dst_file->f_op->dedupe_file_range(file, off,
+                                                       len, dst_file,
+                                                       info->dest_offset);
+                       if (deduped == -EBADE)
+                               info->status = FILE_DEDUPE_RANGE_DIFFERS;
+                       else if (deduped < 0)
+                               info->status = deduped;
+                       else
+                               info->bytes_deduped += deduped;
+               }
+
+next_file:
+               mnt_drop_write_file(dst_file);
+next_loop:
+               fdput(dst_fd);
+       }
+
+out:
+       return ret;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range);
index 566f8e078ffc8f3c4e310cd43422f25c31481215..ec43a24bf63d3937edb2e93603852757ded8c18c 100644 (file)
@@ -1630,6 +1630,12 @@ struct file_operations {
 #ifndef CONFIG_MMU
        unsigned (*mmap_capabilities)(struct file *);
 #endif
+       ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
+                       loff_t, size_t, unsigned int);
+       int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
+                       u64);
+       ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
+                       u64);
 };
 
 struct inode_operations {
@@ -1680,6 +1686,12 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
                unsigned long, loff_t *);
 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
                unsigned long, loff_t *);
+extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
+                                  loff_t, size_t, unsigned int);
+extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+               struct file *file_out, loff_t pos_out, u64 len);
+extern int vfs_dedupe_file_range(struct file *file,
+                                struct file_dedupe_range *same);
 
 struct super_operations {
        struct inode *(*alloc_inode)(struct super_block *sb);
@@ -2027,12 +2039,9 @@ extern struct kobject *fs_kobj;
 
 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
 
-#define FLOCK_VERIFY_READ  1
-#define FLOCK_VERIFY_WRITE 2
-
 #ifdef CONFIG_MANDATORY_FILE_LOCKING
 extern int locks_mandatory_locked(struct file *);
-extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
+extern int locks_mandatory_area(struct inode *, struct file *, loff_t, loff_t, unsigned char);
 
 /*
  * Candidates for mandatory locking have the setgid bit set
@@ -2062,17 +2071,19 @@ static inline int locks_verify_locked(struct file *file)
 }
 
 static inline int locks_verify_truncate(struct inode *inode,
-                                   struct file *filp,
+                                   struct file *f,
                                    loff_t size)
 {
-       if (inode->i_flctx && mandatory_lock(inode))
-               return locks_mandatory_area(
-                       FLOCK_VERIFY_WRITE, inode, filp,
-                       size < inode->i_size ? size : inode->i_size,
-                       (size < inode->i_size ? inode->i_size - size
-                        : size - inode->i_size)
-               );
-       return 0;
+       if (!inode->i_flctx || !mandatory_lock(inode))
+               return 0;
+
+       if (size < inode->i_size) {
+               return locks_mandatory_area(inode, f, size, inode->i_size - 1,
+                               F_WRLCK);
+       } else {
+               return locks_mandatory_area(inode, f, inode->i_size, size - 1,
+                               F_WRLCK);
+       }
 }
 
 #else /* !CONFIG_MANDATORY_FILE_LOCKING */
@@ -2082,9 +2093,8 @@ static inline int locks_mandatory_locked(struct file *file)
        return 0;
 }
 
-static inline int locks_mandatory_area(int rw, struct inode *inode,
-                                      struct file *filp, loff_t offset,
-                                      size_t count)
+static inline int locks_mandatory_area(struct inode *inode, struct file *filp,
+                                       loff_t start, loff_t end, unsigned char type)
 {
        return 0;
 }
index e7e78537aea2cbeba658bc8591350961b089fa58..43aeabd4b96856125ef3d80e279b80a5c3d2d5d8 100644 (file)
@@ -139,10 +139,10 @@ enum nfs_opnum4 {
 Needs to be updated if more operations are defined in future.*/
 
 #define FIRST_NFS4_OP  OP_ACCESS
-#define LAST_NFS4_OP   OP_WRITE_SAME
 #define LAST_NFS40_OP  OP_RELEASE_LOCKOWNER
 #define LAST_NFS41_OP  OP_RECLAIM_COMPLETE
-#define LAST_NFS42_OP  OP_WRITE_SAME
+#define LAST_NFS42_OP  OP_CLONE
+#define LAST_NFS4_OP   LAST_NFS42_OP
 
 enum nfsstat4 {
        NFS4_OK = 0,
index c2b66a277e9807de8a224879793c121df63f7d69..185815c964330e017a622aafeec0a1865efaf22e 100644 (file)
@@ -886,6 +886,9 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename,
                        const char __user *const __user *envp, int flags);
 
 asmlinkage long sys_membarrier(int cmd, int flags);
+asmlinkage long sys_copy_file_range(int fd_in, loff_t __user *off_in,
+                                   int fd_out, loff_t __user *off_out,
+                                   size_t len, unsigned int flags);
 
 asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags);
 
index 1324b0292ec28ed7ed61f1dc875d1fd84503a6d1..2622b33fb2ec7d0e37e437198ac5d1637eed224d 100644 (file)
@@ -715,9 +715,11 @@ __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 __SYSCALL(__NR_membarrier, sys_membarrier)
 #define __NR_mlock2 284
 __SYSCALL(__NR_mlock2, sys_mlock2)
+#define __NR_copy_file_range 285
+__SYSCALL(__NR_copy_file_range, sys_copy_file_range)
 
 #undef __NR_syscalls
-#define __NR_syscalls 285
+#define __NR_syscalls 286
 
 /*
  * All syscalls below here should go away really,
index f15d980249b502e4638e6125f04e595f0f52035e..b38e647664a06300423743bd03b244877c19c072 100644 (file)
 #define RENAME_EXCHANGE                (1 << 1)        /* Exchange source and dest */
 #define RENAME_WHITEOUT                (1 << 2)        /* Whiteout source */
 
+struct file_clone_range {
+       __s64 src_fd;
+       __u64 src_offset;
+       __u64 src_length;
+       __u64 dest_offset;
+};
+
 struct fstrim_range {
        __u64 start;
        __u64 len;
        __u64 minlen;
 };
 
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define FILE_DEDUPE_RANGE_SAME         0
+#define FILE_DEDUPE_RANGE_DIFFERS      1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct file_dedupe_range_info {
+       __s64 dest_fd;          /* in - destination file */
+       __u64 dest_offset;      /* in - start of extent in destination */
+       __u64 bytes_deduped;    /* out - total # of bytes we were able
+                                * to dedupe from this file. */
+       /* status of this dedupe operation:
+        * < 0 for error
+        * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+        * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+        */
+       __s32 status;           /* out - see above description */
+       __u32 reserved;         /* must be zero */
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct file_dedupe_range {
+       __u64 src_offset;       /* in - start of extent in source */
+       __u64 src_length;       /* in - length of extent */
+       __u16 dest_count;       /* in - total elements in info array */
+       __u16 reserved1;        /* must be zero */
+       __u32 reserved2;        /* must be zero */
+       struct file_dedupe_range_info info[0];
+};
+
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
        unsigned long nr_files;         /* read only */
@@ -159,6 +195,9 @@ struct inodes_stat_t {
 #define FIFREEZE       _IOWR('X', 119, int)    /* Freeze */
 #define FITHAW         _IOWR('X', 120, int)    /* Thaw */
 #define FITRIM         _IOWR('X', 121, struct fstrim_range)    /* Trim */
+#define FICLONE                _IOW(0x94, 9, int)
+#define FICLONERANGE   _IOW(0x94, 13, struct file_clone_range)
+#define FIDEDUPERANGE  _IOWR(0x94, 54, struct file_dedupe_range)
 
 #define        FS_IOC_GETFLAGS                 _IOR('f', 1, long)
 #define        FS_IOC_SETFLAGS                 _IOW('f', 2, long)
index 0623787ec67af5fb95f505b55c1577a2f56ce0f4..2c5e3a8e00d7bd152c045549d68458c1b76e253f 100644 (file)
@@ -174,6 +174,7 @@ cond_syscall(sys_setfsuid);
 cond_syscall(sys_setfsgid);
 cond_syscall(sys_capget);
 cond_syscall(sys_capset);
+cond_syscall(sys_copy_file_range);
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);