vfs: hoist the btrfs deduplication ioctl to the vfs
authorDarrick J. Wong <darrick.wong@oracle.com>
Sat, 19 Dec 2015 08:55:59 +0000 (00:55 -0800)
committerAl Viro <viro@zeniv.linux.org.uk>
Fri, 1 Jan 2016 07:36:19 +0000 (02:36 -0500)
Hoist the btrfs EXTENT_SAME ioctl up to the VFS and make the name
more systematic (FIDEDUPERANGE).

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/compat_ioctl.c
fs/ioctl.c
fs/read_write.c
include/linux/fs.h
include/uapi/linux/fs.h

index 70d4b104c08d4adc58bf8e40d5f522cf693a942b..eab31e74b9cc5e54e84db3f72aa4f384e024dd74 100644 (file)
@@ -1582,6 +1582,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
 
        case FICLONE:
        case FICLONERANGE:
+       case FIDEDUPERANGE:
                goto do_ioctl;
 
        case FIBMAP:
index 84c6e79829ab0ebb3b257c63b8cf34485288d401..fcdd33b7ec7814923367684238e3812d0fe039bb 100644 (file)
@@ -568,6 +568,41 @@ static int ioctl_fsthaw(struct file *filp)
        return thaw_super(sb);
 }
 
+static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+{
+       struct file_dedupe_range __user *argp = arg;
+       struct file_dedupe_range *same = NULL;
+       int ret;
+       unsigned long size;
+       u16 count;
+
+       if (get_user(count, &argp->dest_count)) {
+               ret = -EFAULT;
+               goto out;
+       }
+
+       size = offsetof(struct file_dedupe_range __user, info[count]);
+
+       same = memdup_user(argp, size);
+       if (IS_ERR(same)) {
+               ret = PTR_ERR(same);
+               same = NULL;
+               goto out;
+       }
+
+       ret = vfs_dedupe_file_range(file, same);
+       if (ret)
+               goto out;
+
+       ret = copy_to_user(argp, same, size);
+       if (ret)
+               ret = -EFAULT;
+
+out:
+       kfree(same);
+       return ret;
+}
+
 /*
  * When you add any new common ioctls to the switches above and below
  * please update compat_sys_ioctl() too.
@@ -629,6 +664,9 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
        case FICLONERANGE:
                return ioctl_file_clone_range(filp, argp);
 
+       case FIDEDUPERANGE:
+               return ioctl_file_dedupe_range(filp, argp);
+
        default:
                if (S_ISREG(inode->i_mode))
                        error = file_ioctl(filp, cmd, arg);
index 60ee269412313b90e1b5d399ee3bcaea0df701b6..2116e74a83d3f34e94335e0ab2530c27ef9b972d 100644 (file)
@@ -1523,3 +1523,103 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
        return ret;
 }
 EXPORT_SYMBOL(vfs_clone_file_range);
+
+int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
+{
+       struct file_dedupe_range_info *info;
+       struct inode *src = file_inode(file);
+       u64 off;
+       u64 len;
+       int i;
+       int ret;
+       bool is_admin = capable(CAP_SYS_ADMIN);
+       u16 count = same->dest_count;
+       struct file *dst_file;
+       loff_t dst_off;
+       ssize_t deduped;
+
+       if (!(file->f_mode & FMODE_READ))
+               return -EINVAL;
+
+       if (same->reserved1 || same->reserved2)
+               return -EINVAL;
+
+       off = same->src_offset;
+       len = same->src_length;
+
+       ret = -EISDIR;
+       if (S_ISDIR(src->i_mode))
+               goto out;
+
+       ret = -EINVAL;
+       if (!S_ISREG(src->i_mode))
+               goto out;
+
+       ret = clone_verify_area(file, off, len, false);
+       if (ret < 0)
+               goto out;
+       ret = 0;
+
+       /* pre-format output fields to sane values */
+       for (i = 0; i < count; i++) {
+               same->info[i].bytes_deduped = 0ULL;
+               same->info[i].status = FILE_DEDUPE_RANGE_SAME;
+       }
+
+       for (i = 0, info = same->info; i < count; i++, info++) {
+               struct inode *dst;
+               struct fd dst_fd = fdget(info->dest_fd);
+
+               dst_file = dst_fd.file;
+               if (!dst_file) {
+                       info->status = -EBADF;
+                       goto next_loop;
+               }
+               dst = file_inode(dst_file);
+
+               ret = mnt_want_write_file(dst_file);
+               if (ret) {
+                       info->status = ret;
+                       goto next_loop;
+               }
+
+               dst_off = info->dest_offset;
+               ret = clone_verify_area(dst_file, dst_off, len, true);
+               if (ret < 0) {
+                       info->status = ret;
+                       goto next_file;
+               }
+               ret = 0;
+
+               if (info->reserved) {
+                       info->status = -EINVAL;
+               } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
+                       info->status = -EINVAL;
+               } else if (file->f_path.mnt != dst_file->f_path.mnt) {
+                       info->status = -EXDEV;
+               } else if (S_ISDIR(dst->i_mode)) {
+                       info->status = -EISDIR;
+               } else if (dst_file->f_op->dedupe_file_range == NULL) {
+                       info->status = -EINVAL;
+               } else {
+                       deduped = dst_file->f_op->dedupe_file_range(file, off,
+                                                       len, dst_file,
+                                                       info->dest_offset);
+                       if (deduped == -EBADE)
+                               info->status = FILE_DEDUPE_RANGE_DIFFERS;
+                       else if (deduped < 0)
+                               info->status = deduped;
+                       else
+                               info->bytes_deduped += deduped;
+               }
+
+next_file:
+               mnt_drop_write_file(dst_file);
+next_loop:
+               fdput(dst_fd);
+       }
+
+out:
+       return ret;
+}
+EXPORT_SYMBOL(vfs_dedupe_file_range);
index 5d987aefcf1e1fd84593fae0811edf4f7a290414..d71814b81a3c30b839e19ded0d50e4ce2f3da2e2 100644 (file)
@@ -1633,6 +1633,8 @@ struct file_operations {
                        loff_t, size_t, unsigned int);
        int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
                        u64);
+       ssize_t (*dedupe_file_range)(struct file *, u64, u64, struct file *,
+                       u64);
 };
 
 struct inode_operations {
@@ -1688,6 +1690,8 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
                                   loff_t, size_t, unsigned int);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
                struct file *file_out, loff_t pos_out, u64 len);
+extern int vfs_dedupe_file_range(struct file *file,
+                                struct file_dedupe_range *same);
 
 struct super_operations {
        struct inode *(*alloc_inode)(struct super_block *sb);
index cd5db7fb3cb765ba8ea2a03e526a8b53807df1a9..b38e647664a06300423743bd03b244877c19c072 100644 (file)
@@ -52,6 +52,35 @@ struct fstrim_range {
        __u64 minlen;
 };
 
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define FILE_DEDUPE_RANGE_SAME         0
+#define FILE_DEDUPE_RANGE_DIFFERS      1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct file_dedupe_range_info {
+       __s64 dest_fd;          /* in - destination file */
+       __u64 dest_offset;      /* in - start of extent in destination */
+       __u64 bytes_deduped;    /* out - total # of bytes we were able
+                                * to dedupe from this file. */
+       /* status of this dedupe operation:
+        * < 0 for error
+        * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+        * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+        */
+       __s32 status;           /* out - see above description */
+       __u32 reserved;         /* must be zero */
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct file_dedupe_range {
+       __u64 src_offset;       /* in - start of extent in source */
+       __u64 src_length;       /* in - length of extent */
+       __u16 dest_count;       /* in - total elements in info array */
+       __u16 reserved1;        /* must be zero */
+       __u32 reserved2;        /* must be zero */
+       struct file_dedupe_range_info info[0];
+};
+
 /* And dynamically-tunable limits and defaults: */
 struct files_stat_struct {
        unsigned long nr_files;         /* read only */
@@ -168,6 +197,7 @@ struct inodes_stat_t {
 #define FITRIM         _IOWR('X', 121, struct fstrim_range)    /* Trim */
 #define FICLONE                _IOW(0x94, 9, int)
 #define FICLONERANGE   _IOW(0x94, 13, struct file_clone_range)
+#define FIDEDUPERANGE  _IOWR(0x94, 54, struct file_dedupe_range)
 
 #define        FS_IOC_GETFLAGS                 _IOR('f', 1, long)
 #define        FS_IOC_SETFLAGS                 _IOW('f', 2, long)