Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 18 Jan 2016 20:44:40 +0000 (12:44 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 18 Jan 2016 20:44:40 +0000 (12:44 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 18 Jan 2016 20:44:40 +0000 (12:44 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 18 Jan 2016 20:44:40 +0000 (12:44 -0800)
diff --combined fs/btrfs/acl.c

index f89db0c21b51edaadda1d1544425010d89e92ae2,dbbb8ed53a518a0ac3fa0207afe8d9041e7afba2..6d263bb1621cd92c51cc6ebd014f8ad06dcd719f
--- 1/fs/btrfs/acl.c
--- 2/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@@ -37,10 -37,10 +37,10 @@@ struct posix_acl *btrfs_get_acl(struct 
   
         switch (type) {
         case ACL_TYPE_ACCESS:
- -              name = POSIX_ACL_XATTR_ACCESS;
+ +              name = XATTR_NAME_POSIX_ACL_ACCESS;
                 break;
         case ACL_TYPE_DEFAULT:
- -              name = POSIX_ACL_XATTR_DEFAULT;
+ +              name = XATTR_NAME_POSIX_ACL_DEFAULT;
                 break;
         default:
                 BUG();
@@@ -48,7 -48,7 +48,7 @@@
   
         size = __btrfs_getxattr(inode, name, "", 0);
         if (size > 0) {
-               value = kzalloc(size, GFP_NOFS);
+               value = kzalloc(size, GFP_KERNEL);
                 if (!value)
                         return ERR_PTR(-ENOMEM);
                 size = __btrfs_getxattr(inode, name, value, size);
@@@ -81,7 -81,7 +81,7 @@@ static int __btrfs_set_acl(struct btrfs
   
         switch (type) {
         case ACL_TYPE_ACCESS:
- -              name = POSIX_ACL_XATTR_ACCESS;
+ +              name = XATTR_NAME_POSIX_ACL_ACCESS;
                 if (acl) {
                         ret = posix_acl_equiv_mode(acl, &inode->i_mode);
                         if (ret < 0)
@@@ -94,7 -94,7 +94,7 @@@
         case ACL_TYPE_DEFAULT:
                 if (!S_ISDIR(inode->i_mode))
                         return acl ? -EINVAL : 0;
- -              name = POSIX_ACL_XATTR_DEFAULT;
+ +              name = XATTR_NAME_POSIX_ACL_DEFAULT;
                 break;
         default:
                 return -EINVAL;
@@@ -102,7 -102,7 +102,7 @@@
   
         if (acl) {
                 size = posix_acl_xattr_size(acl->a_count);
-               value = kmalloc(size, GFP_NOFS);
+               value = kmalloc(size, GFP_KERNEL);
                 if (!value) {
                         ret = -ENOMEM;
                         goto out;
diff --combined fs/btrfs/ctree.h

index b7e4e344e8e0a510697a56bf553d833c5bcbeb06,c5f40dc1f74fc06508ed794bde57e67b6fcd09c8..97ad9bbeb35d24ec0ad228aa6f040b61bffb33a5
--- 1/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@@ -35,6 -35,7 +35,7 @@@
   #include <linux/btrfs.h>
   #include <linux/workqueue.h>
   #include <linux/security.h>
+ #include <linux/sizes.h>
   #include "extent_io.h"
   #include "extent_map.h"
   #include "async-thread.h"
@@@ -96,6 -97,9 +97,9 @@@ struct btrfs_ordered_sum
   /* for storing items that use the BTRFS_UUID_KEY* types */
   #define BTRFS_UUID_TREE_OBJECTID 9ULL
   
+ /* tracks free space in block groups. */
+ #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+ 
   /* for storing balance parameters in the root tree */
   #define BTRFS_BALANCE_OBJECTID -4ULL
   
@@@ -174,7 -178,7 +178,7 @@@
   /* csum types */
   #define BTRFS_CSUM_TYPE_CRC32 0
   
- static int btrfs_csum_sizes[] = { 4 };
+ static const int btrfs_csum_sizes[] = { 4 };
   
   /* four bytes for CRC32 */
   #define BTRFS_EMPTY_DIR_SIZE 0
@@@ -196,9 -200,9 +200,9 @@@
   /* ioprio of readahead is set to idle */
   #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
   
- #define BTRFS_DIRTY_METADATA_THRESH   (32 * 1024 * 1024)
+ #define BTRFS_DIRTY_METADATA_THRESH   SZ_32M
   
- #define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024)
+ #define BTRFS_MAX_EXTENT_SIZE SZ_128M
   
   /*
    * The key defines the order in the tree, and so it also defines (optimal)
@@@ -500,6 -504,8 +504,8 @@@ struct btrfs_super_block 
    * Compat flags that we support.  If any incompat flags are set other than the
    * ones specified below then we will fail to mount
    */
+ #define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE       (1ULL << 0)
+ 
   #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF  (1ULL << 0)
   #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
   #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS   (1ULL << 2)
@@@ -526,7 -532,10 +532,10 @@@
   #define BTRFS_FEATURE_COMPAT_SUPP             0ULL
   #define BTRFS_FEATURE_COMPAT_SAFE_SET         0ULL
   #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR               0ULL
- #define BTRFS_FEATURE_COMPAT_RO_SUPP          0ULL
+ 
+ #define BTRFS_FEATURE_COMPAT_RO_SUPP                  \
+       (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+ 
   #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET      0ULL
   #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR    0ULL
   
@@@ -590,14 -599,15 +599,15 @@@ struct btrfs_node 
    * The slots array records the index of the item or block pointer
    * used while walking the tree.
    */
+ enum { READA_NONE = 0, READA_BACK, READA_FORWARD };
   struct btrfs_path {
         struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
         int slots[BTRFS_MAX_LEVEL];
         /* if there is real range locking, this locks field will change */
-       int locks[BTRFS_MAX_LEVEL];
-       int reada;
+       u8 locks[BTRFS_MAX_LEVEL];
+       u8 reada;
         /* keep some upper locks as we walk down */
-       int lowest_level;
+       u8 lowest_level;
   
         /*
          * set by btrfs_split_item, tells search_slot to keep all locks
@@@ -1088,6 -1098,13 +1098,13 @@@ struct btrfs_block_group_item 
         __le64 flags;
   } __attribute__ ((__packed__));
   
+ struct btrfs_free_space_info {
+       __le32 extent_count;
+       __le32 flags;
+ } __attribute__ ((__packed__));
+ 
+ #define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+ 
   #define BTRFS_QGROUP_LEVEL_SHIFT              48
   static inline u64 btrfs_qgroup_level(u64 qgroupid)
   {
@@@ -1296,6 -1313,9 +1313,9 @@@ struct btrfs_caching_control 
         atomic_t count;
   };
   
+ /* Once caching_thread() finds this much free space, it will wake up waiters. */
+ #define CACHING_CTL_WAKE_UP (1024 * 1024 * 2)
+ 
   struct btrfs_io_ctl {
         void *cur, *orig;
         struct page *page;
@@@ -1321,8 -1341,20 +1341,20 @@@ struct btrfs_block_group_cache 
         u64 delalloc_bytes;
         u64 bytes_super;
         u64 flags;
-       u64 sectorsize;
         u64 cache_generation;
+       u32 sectorsize;
+ 
+       /*
+        * If the free space extent count exceeds this number, convert the block
+        * group to bitmaps.
+        */
+       u32 bitmap_high_thresh;
+ 
+       /*
+        * If the free space extent count drops below this number, convert the
+        * block group back to extents.
+        */
+       u32 bitmap_low_thresh;
   
         /*
          * It is just used for the delayed data space allocation because
@@@ -1378,6 -1410,15 +1410,15 @@@
         struct list_head io_list;
   
         struct btrfs_io_ctl io_ctl;
+ 
+       /* Lock for free space tree operations. */
+       struct mutex free_space_lock;
+ 
+       /*
+        * Does the block group need to be added to the free space tree?
+        * Protected by free_space_lock.
+        */
+       int needs_free_space;
   };
   
   /* delayed seq elem */
@@@ -1429,6 -1470,7 +1470,7 @@@ struct btrfs_fs_info 
         struct btrfs_root *csum_root;
         struct btrfs_root *quota_root;
         struct btrfs_root *uuid_root;
+       struct btrfs_root *free_space_root;
   
         /* the log root tree is a directory of all the other log roots */
         struct btrfs_root *log_root_tree;
@@@ -1816,6 -1858,8 +1858,8 @@@
          * and will be latter freed. Protected by fs_info->chunk_mutex.
          */
         struct list_head pinned_chunks;
+ 
+       int creating_free_space_tree;
   };
   
   struct btrfs_subvolume_writers {
@@@ -2092,6 -2136,27 +2136,27 @@@ struct btrfs_ioctl_defrag_range_args 
    */
   #define BTRFS_BLOCK_GROUP_ITEM_KEY 192
   
+ /*
+  * Every block group is represented in the free space tree by a free space info
+  * item, which stores some accounting information. It is keyed on
+  * (block_group_start, FREE_SPACE_INFO, block_group_length).
+  */
+ #define BTRFS_FREE_SPACE_INFO_KEY 198
+ 
+ /*
+  * A free space extent tracks an extent of space that is free in a block group.
+  * It is keyed on (start, FREE_SPACE_EXTENT, length).
+  */
+ #define BTRFS_FREE_SPACE_EXTENT_KEY 199
+ 
+ /*
+  * When a block group becomes very fragmented, we convert it to use bitmaps
+  * instead of extents. A free space bitmap is keyed on
+  * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
+  * (length / sectorsize) bits.
+  */
+ #define BTRFS_FREE_SPACE_BITMAP_KEY 200
+ 
   #define BTRFS_DEV_EXTENT_KEY  204
   #define BTRFS_DEV_ITEM_KEY    216
   #define BTRFS_CHUNK_ITEM_KEY  228
@@@ -2184,6 -2249,7 +2249,7 @@@
   #define BTRFS_MOUNT_RESCAN_UUID_TREE  (1 << 23)
   #define BTRFS_MOUNT_FRAGMENT_DATA     (1 << 24)
   #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
+ #define BTRFS_MOUNT_FREE_SPACE_TREE   (1 << 26)
   
   #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
   #define BTRFS_DEFAULT_MAX_INLINE      (8192)
@@@ -2506,6 -2572,11 +2572,11 @@@ BTRFS_SETGET_FUNCS(disk_block_group_fla
   BTRFS_SETGET_STACK_FUNCS(block_group_flags,
                         struct btrfs_block_group_item, flags, 64);
   
+ /* struct btrfs_free_space_info */
+ BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
+                  extent_count, 32);
+ BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
+ 
   /* struct btrfs_inode_ref */
   BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
   BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
@@@ -3573,6 -3644,9 +3644,9 @@@ void btrfs_end_write_no_snapshoting(str
   void check_system_chunk(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         const u64 type);
+ u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+                      struct btrfs_fs_info *info, u64 start, u64 end);
+ 
   /* ctree.c */
   int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                      int level, int *slot);
@@@ -3737,6 -3811,7 +3811,7 @@@ static inline void free_fs_info(struct 
         kfree(fs_info->csum_root);
         kfree(fs_info->quota_root);
         kfree(fs_info->uuid_root);
+       kfree(fs_info->free_space_root);
         kfree(fs_info->super_copy);
         kfree(fs_info->super_for_commit);
         security_free_mnt_opts(&fs_info->security_opts);
@@@ -3906,7 -3981,6 +3981,6 @@@ void btrfs_extent_item_to_extent_map(st
   /* inode.c */
   struct btrfs_delalloc_work {
         struct inode *inode;
-       int wait;
         int delay_iput;
         struct completion completion;
         struct list_head list;
@@@ -3914,7 -3988,7 +3988,7 @@@
   };
   
   struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
-                                                   int wait, int delay_iput);
+                                                   int delay_iput);
   void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
   
   struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
@@@ -4024,8 -4098,7 +4098,8 @@@ void btrfs_get_block_group_info(struct 
                                 struct btrfs_ioctl_space_info *space);
   void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
                                struct btrfs_ioctl_balance_args *bargs);
- -
+ +ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+ +                         struct file *dst_file, u64 dst_loff);
   
   /* file.c */
   int btrfs_auto_defrag_init(void);
@@@ -4056,11 -4129,6 +4130,11 @@@ int btrfs_dirty_pages(struct btrfs_roo
                       loff_t pos, size_t write_bytes,
                       struct extent_state **cached);
   int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
+ +ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ +                            struct file *file_out, loff_t pos_out,
+ +                            size_t len, unsigned int flags);
+ +int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
+ +                         struct file *file_out, loff_t pos_out, u64 len);
   
   /* tree-defrag.c */
   int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
@@@ -4253,16 -4321,98 +4327,98 @@@ static inline void __btrfs_set_fs_incom
         }
   }
   
+ #define btrfs_clear_fs_incompat(__fs_info, opt) \
+       __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+ 
+ static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
+                                            u64 flag)
+ {
+       struct btrfs_super_block *disk_super;
+       u64 features;
+ 
+       disk_super = fs_info->super_copy;
+       features = btrfs_super_incompat_flags(disk_super);
+       if (features & flag) {
+               spin_lock(&fs_info->super_lock);
+               features = btrfs_super_incompat_flags(disk_super);
+               if (features & flag) {
+                       features &= ~flag;
+                       btrfs_set_super_incompat_flags(disk_super, features);
+                       btrfs_info(fs_info, "clearing %llu feature flag",
+                                        flag);
+               }
+               spin_unlock(&fs_info->super_lock);
+       }
+ }
+ 
   #define btrfs_fs_incompat(fs_info, opt) \
         __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
   
- static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
+ static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
   {
         struct btrfs_super_block *disk_super;
         disk_super = fs_info->super_copy;
         return !!(btrfs_super_incompat_flags(disk_super) & flag);
   }
   
+ #define btrfs_set_fs_compat_ro(__fs_info, opt) \
+       __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+ 
+ static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
+                                           u64 flag)
+ {
+       struct btrfs_super_block *disk_super;
+       u64 features;
+ 
+       disk_super = fs_info->super_copy;
+       features = btrfs_super_compat_ro_flags(disk_super);
+       if (!(features & flag)) {
+               spin_lock(&fs_info->super_lock);
+               features = btrfs_super_compat_ro_flags(disk_super);
+               if (!(features & flag)) {
+                       features |= flag;
+                       btrfs_set_super_compat_ro_flags(disk_super, features);
+                       btrfs_info(fs_info, "setting %llu ro feature flag",
+                                  flag);
+               }
+               spin_unlock(&fs_info->super_lock);
+       }
+ }
+ 
+ #define btrfs_clear_fs_compat_ro(__fs_info, opt) \
+       __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+ 
+ static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
+                                             u64 flag)
+ {
+       struct btrfs_super_block *disk_super;
+       u64 features;
+ 
+       disk_super = fs_info->super_copy;
+       features = btrfs_super_compat_ro_flags(disk_super);
+       if (features & flag) {
+               spin_lock(&fs_info->super_lock);
+               features = btrfs_super_compat_ro_flags(disk_super);
+               if (features & flag) {
+                       features &= ~flag;
+                       btrfs_set_super_compat_ro_flags(disk_super, features);
+                       btrfs_info(fs_info, "clearing %llu ro feature flag",
+                                  flag);
+               }
+               spin_unlock(&fs_info->super_lock);
+       }
+ }
+ 
+ #define btrfs_fs_compat_ro(fs_info, opt) \
+       __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+ 
+ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
+ {
+       struct btrfs_super_block *disk_super;
+       disk_super = fs_info->super_copy;
+       return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
+ }
+ 
   /*
    * Call btrfs_abort_transaction as early as possible when an error condition is
    * detected, that way the exact line number is reported.
diff --combined fs/btrfs/disk-io.c

index 42a378a4eefb4cd198c0d328eecaec293735f3af,c67c129fe99a537aa1ec72d76ef111109078c776..e99ccd6ffb2c14f58bf38f548e202f0e0f86e9ea
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -42,6 -42,7 +42,7 @@@
   #include "locking.h"
   #include "tree-log.h"
   #include "free-space-cache.h"
+ #include "free-space-tree.h"
   #include "inode-map.h"
   #include "check-integrity.h"
   #include "rcu-string.h"
@@@ -362,7 -363,7 +363,7 @@@ static int verify_parent_transid(struc
         }
   
         lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
-                        0, &cached_state);
+                        &cached_state);
         if (extent_buffer_uptodate(eb) &&
             btrfs_header_generation(eb) == parent_transid) {
                 ret = 0;
@@@ -923,7 -924,7 +924,7 @@@ static int check_async_write(struct ino
         if (bio_flags & EXTENT_BIO_TREE_LOG)
                 return 0;
   #ifdef CONFIG_X86
- -      if (cpu_has_xmm4_2)
+ +      if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
                 return 0;
   #endif
         return 1;
@@@ -1650,6 -1651,9 +1651,9 @@@ struct btrfs_root *btrfs_get_fs_root(st
         if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
                 return fs_info->uuid_root ? fs_info->uuid_root :
                                             ERR_PTR(-ENOENT);
+       if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+               return fs_info->free_space_root ? fs_info->free_space_root :
+                                                 ERR_PTR(-ENOENT);
   again:
         root = btrfs_lookup_fs_root(fs_info, location->objectid);
         if (root) {
@@@ -2148,6 -2152,7 +2152,7 @@@ static void free_root_pointers(struct b
         free_root_extent_buffers(info->uuid_root);
         if (chunk_root)
                 free_root_extent_buffers(info->chunk_root);
+       free_root_extent_buffers(info->free_space_root);
   }
   
   void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
@@@ -2448,6 -2453,15 +2453,15 @@@ static int btrfs_read_roots(struct btrf
                 fs_info->uuid_root = root;
         }
   
+       if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+               location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
+               root = btrfs_read_tree_root(tree_root, &location);
+               if (IS_ERR(root))
+                       return PTR_ERR(root);
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->free_space_root = root;
+       }
+ 
         return 0;
   }
   
@@@ -2668,6 -2682,7 +2682,7 @@@ int open_ctree(struct super_block *sb
         if (btrfs_check_super_csum(bh->b_data)) {
                 printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
                 err = -EINVAL;
+               brelse(bh);
                 goto fail_alloc;
         }
   
@@@ -2809,7 -2824,7 +2824,7 @@@
   
         fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
         fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
-                                   4 * 1024 * 1024 / PAGE_CACHE_SIZE);
+                                   SZ_4M / PAGE_CACHE_SIZE);
   
         tree_root->nodesize = nodesize;
         tree_root->sectorsize = sectorsize;
@@@ -3051,6 -3066,18 +3066,18 @@@ retry_root_backup
         if (sb->s_flags & MS_RDONLY)
                 return 0;
   
+       if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+           !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+               pr_info("BTRFS: creating free space tree\n");
+               ret = btrfs_create_free_space_tree(fs_info);
+               if (ret) {
+                       pr_warn("BTRFS: failed to create free space tree %d\n",
+                               ret);
+                       close_ctree(tree_root);
+                       return ret;
+               }
+       }
+ 
         down_read(&fs_info->cleanup_work_sem);
         if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
             (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
@@@ -3076,6 -3103,18 +3103,18 @@@
   
         btrfs_qgroup_rescan_resume(fs_info);
   
+       if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+           btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+               pr_info("BTRFS: clearing free space tree\n");
+               ret = btrfs_clear_free_space_tree(fs_info);
+               if (ret) {
+                       pr_warn("BTRFS: failed to clear free space tree %d\n",
+                               ret);
+                       close_ctree(tree_root);
+                       return ret;
+               }
+       }
+ 
         if (!fs_info->uuid_root) {
                 pr_info("BTRFS: creating UUID tree\n");
                 ret = btrfs_create_uuid_tree(fs_info);
@@@ -3902,11 -3941,6 +3941,6 @@@ int btrfs_buffer_uptodate(struct extent
         return !ret;
   }
   
- int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
- {
-       return set_extent_buffer_uptodate(buf);
- }
- 
   void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
   {
         struct btrfs_root *root;
@@@ -3962,7 -3996,6 +3996,6 @@@ static void __btrfs_btree_balance_dirty
                 balance_dirty_pages_ratelimited(
                                    root->fs_info->btree_inode->i_mapping);
         }
-       return;
   }
   
   void btrfs_btree_balance_dirty(struct btrfs_root *root)
diff --combined fs/btrfs/file.c

index e3d9022bfd4e3c2861008104d13e55050aa91f13,364e0f1f61f68ab4e32b83bff9e11465295d0d05..83d7859d76199d96ec882c35c87dea98526d229e
--- 1/fs/btrfs/file.c
--- 2/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@@ -1394,7 -1394,7 +1394,7 @@@ lock_and_cleanup_extent_if_need(struct 
         if (start_pos < inode->i_size) {
                 struct btrfs_ordered_extent *ordered;
                 lock_extent_bits(&BTRFS_I(inode)->io_tree,
-                                start_pos, last_pos, 0, cached_state);
+                                start_pos, last_pos, cached_state);
                 ordered = btrfs_lookup_ordered_range(inode, start_pos,
                                                      last_pos - start_pos + 1);
                 if (ordered &&
@@@ -2398,7 -2398,7 +2398,7 @@@ static int btrfs_punch_hole(struct inod
                 truncate_pagecache_range(inode, lockstart, lockend);
   
                 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                0, &cached_state);
+                                &cached_state);
                 ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
   
                 /*
@@@ -2705,7 -2705,7 +2705,7 @@@ static long btrfs_fallocate(struct fil
                  * transaction
                  */
                 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
-                                locked_end, 0, &cached_state);
+                                locked_end, &cached_state);
                 ordered = btrfs_lookup_first_ordered_extent(inode,
                                                             alloc_end - 1);
                 if (ordered &&
@@@ -2852,7 -2852,7 +2852,7 @@@ static int find_desired_extent(struct i
         lockend--;
         len = lockend - lockstart + 1;
   
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                          &cached_state);
   
         while (start < inode->i_size) {
@@@ -2934,9 -2934,6 +2934,9 @@@ const struct file_operations btrfs_file
   #ifdef CONFIG_COMPAT
         .compat_ioctl   = btrfs_ioctl,
   #endif
+ +      .copy_file_range = btrfs_copy_file_range,
+ +      .clone_file_range = btrfs_clone_file_range,
+ +      .dedupe_file_range = btrfs_dedupe_file_range,
   };
   
   void btrfs_auto_defrag_exit(void)
diff --combined fs/btrfs/inode.c

index 394017831692beb5a59ff7b2c747c0a824e188ce,85afe66955cf395611892e31a31ca05245146f69..24783010768680bea28f4f0e5e9aaa2c6a62a41c
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -66,6 -66,13 +66,13 @@@ struct btrfs_iget_args 
         struct btrfs_root *root;
   };
   
+ struct btrfs_dio_data {
+       u64 outstanding_extents;
+       u64 reserve;
+       u64 unsubmitted_oe_range_start;
+       u64 unsubmitted_oe_range_end;
+ };
+ 
   static const struct inode_operations btrfs_dir_inode_operations;
   static const struct inode_operations btrfs_symlink_inode_operations;
   static const struct inode_operations btrfs_dir_ro_inode_operations;
@@@ -74,17 -81,16 +81,16 @@@ static const struct inode_operations bt
   static const struct address_space_operations btrfs_aops;
   static const struct address_space_operations btrfs_symlink_aops;
   static const struct file_operations btrfs_dir_file_operations;
- static struct extent_io_ops btrfs_extent_io_ops;
+ static const struct extent_io_ops btrfs_extent_io_ops;
   
   static struct kmem_cache *btrfs_inode_cachep;
- static struct kmem_cache *btrfs_delalloc_work_cachep;
   struct kmem_cache *btrfs_trans_handle_cachep;
   struct kmem_cache *btrfs_transaction_cachep;
   struct kmem_cache *btrfs_path_cachep;
   struct kmem_cache *btrfs_free_space_cachep;
   
   #define S_SHIFT 12
- static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+ static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
@@@ -414,15 -420,15 +420,15 @@@ static noinline void compress_file_rang
         unsigned long nr_pages_ret = 0;
         unsigned long total_compressed = 0;
         unsigned long total_in = 0;
-       unsigned long max_compressed = 128 * 1024;
-       unsigned long max_uncompressed = 128 * 1024;
+       unsigned long max_compressed = SZ_128K;
+       unsigned long max_uncompressed = SZ_128K;
         int i;
         int will_compress;
         int compress_type = root->fs_info->compress_type;
         int redirty = 0;
   
         /* if this is a small write inside eof, kick off a defrag */
-       if ((end - start + 1) < 16 * 1024 &&
+       if ((end - start + 1) < SZ_16K &&
             (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
                 btrfs_add_inode_defrag(NULL, inode);
   
@@@ -430,7 -436,7 +436,7 @@@
   again:
         will_compress = 0;
         nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
-       nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE);
+       nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_CACHE_SIZE);
   
         /*
          * we don't want to send crud past the end of i_size through
@@@ -944,7 -950,7 +950,7 @@@ static noinline int cow_file_range(stru
         disk_num_bytes = num_bytes;
   
         /* if this is a small write inside eof, kick off defrag */
-       if (num_bytes < 64 * 1024 &&
+       if (num_bytes < SZ_64K &&
             (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
                 btrfs_add_inode_defrag(NULL, inode);
   
@@@ -1107,7 -1113,7 +1113,7 @@@ static noinline void async_cow_submit(s
          * atomic_sub_return implies a barrier for waitqueue_active
          */
         if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
-           5 * 1024 * 1024 &&
+           5 * SZ_1M &&
             waitqueue_active(&root->fs_info->async_submit_wait))
                 wake_up(&root->fs_info->async_submit_wait);
   
@@@ -1132,7 -1138,7 +1138,7 @@@ static int cow_file_range_async(struct 
         struct btrfs_root *root = BTRFS_I(inode)->root;
         unsigned long nr_pages;
         u64 cur_end;
-       int limit = 10 * 1024 * 1024;
+       int limit = 10 * SZ_1M;
   
         clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
                          1, 0, NULL, GFP_NOFS);
@@@ -1148,7 -1154,7 +1154,7 @@@
                     !btrfs_test_opt(root, FORCE_COMPRESS))
                         cur_end = end;
                 else
-                       cur_end = min(end, start + 512 * 1024 - 1);
+                       cur_end = min(end, start + SZ_512K - 1);
   
                 async_cow->end = cur_end;
                 INIT_LIST_HEAD(&async_cow->extents);
@@@ -1989,7 -1995,7 +1995,7 @@@ again
         page_start = page_offset(page);
         page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
   
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
                          &cached_state);
   
         /* already ordered? We're done */
@@@ -2482,7 -2488,7 +2488,7 @@@ static noinline int relink_extent_backr
         lock_start = backref->file_pos;
         lock_end = backref->file_pos + backref->num_bytes - 1;
         lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
-                        0, &cached);
+                        &cached);
   
         ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
         if (ordered) {
@@@ -2874,7 -2880,7 +2880,7 @@@ static int btrfs_finish_ordered_io(stru
   
         lock_extent_bits(io_tree, ordered_extent->file_offset,
                          ordered_extent->file_offset + ordered_extent->len - 1,
-                        0, &cached_state);
+                        &cached_state);
   
         ret = test_range_bit(io_tree, ordered_extent->file_offset,
                         ordered_extent->file_offset + ordered_extent->len - 1,
@@@ -3106,55 -3112,47 +3112,47 @@@ static int btrfs_readpage_end_io_hook(s
                                       start, (size_t)(end - start + 1));
   }
   
- struct delayed_iput {
-       struct list_head list;
-       struct inode *inode;
- };
- 
- /* JDM: If this is fs-wide, why can't we add a pointer to
-  * btrfs_inode instead and avoid the allocation? */
   void btrfs_add_delayed_iput(struct inode *inode)
   {
         struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
-       struct delayed_iput *delayed;
+       struct btrfs_inode *binode = BTRFS_I(inode);
   
         if (atomic_add_unless(&inode->i_count, -1, 1))
                 return;
   
-       delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
-       delayed->inode = inode;
- 
         spin_lock(&fs_info->delayed_iput_lock);
-       list_add_tail(&delayed->list, &fs_info->delayed_iputs);
+       if (binode->delayed_iput_count == 0) {
+               ASSERT(list_empty(&binode->delayed_iput));
+               list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
+       } else {
+               binode->delayed_iput_count++;
+       }
         spin_unlock(&fs_info->delayed_iput_lock);
   }
   
   void btrfs_run_delayed_iputs(struct btrfs_root *root)
   {
-       LIST_HEAD(list);
         struct btrfs_fs_info *fs_info = root->fs_info;
-       struct delayed_iput *delayed;
-       int empty;
- 
-       spin_lock(&fs_info->delayed_iput_lock);
-       empty = list_empty(&fs_info->delayed_iputs);
-       spin_unlock(&fs_info->delayed_iput_lock);
-       if (empty)
-               return;
   
         down_read(&fs_info->delayed_iput_sem);
- 
         spin_lock(&fs_info->delayed_iput_lock);
-       list_splice_init(&fs_info->delayed_iputs, &list);
-       spin_unlock(&fs_info->delayed_iput_lock);
- 
-       while (!list_empty(&list)) {
-               delayed = list_entry(list.next, struct delayed_iput, list);
-               list_del(&delayed->list);
-               iput(delayed->inode);
-               kfree(delayed);
+       while (!list_empty(&fs_info->delayed_iputs)) {
+               struct btrfs_inode *inode;
+ 
+               inode = list_first_entry(&fs_info->delayed_iputs,
+                               struct btrfs_inode, delayed_iput);
+               if (inode->delayed_iput_count) {
+                       inode->delayed_iput_count--;
+                       list_move_tail(&inode->delayed_iput,
+                                       &fs_info->delayed_iputs);
+               } else {
+                       list_del_init(&inode->delayed_iput);
+               }
+               spin_unlock(&fs_info->delayed_iput_lock);
+               iput(&inode->vfs_inode);
+               spin_lock(&fs_info->delayed_iput_lock);
         }
- 
+       spin_unlock(&fs_info->delayed_iput_lock);
         up_read(&root->fs_info->delayed_iput_sem);
   }
   
@@@ -3351,7 -3349,7 +3349,7 @@@ int btrfs_orphan_cleanup(struct btrfs_r
                 ret = -ENOMEM;
                 goto out;
         }
-       path->reada = -1;
+       path->reada = READA_BACK;
   
         key.objectid = BTRFS_ORPHAN_OBJECTID;
         key.type = BTRFS_ORPHAN_ITEM_KEY;
@@@ -3550,10 -3548,10 +3548,10 @@@ static noinline int acls_after_inode_it
         int scanned = 0;
   
         if (!xattr_access) {
- -              xattr_access = btrfs_name_hash(POSIX_ACL_XATTR_ACCESS,
- -                                      strlen(POSIX_ACL_XATTR_ACCESS));
- -              xattr_default = btrfs_name_hash(POSIX_ACL_XATTR_DEFAULT,
- -                                      strlen(POSIX_ACL_XATTR_DEFAULT));
+ +              xattr_access = btrfs_name_hash(XATTR_NAME_POSIX_ACL_ACCESS,
+ +                                      strlen(XATTR_NAME_POSIX_ACL_ACCESS));
+ +              xattr_default = btrfs_name_hash(XATTR_NAME_POSIX_ACL_DEFAULT,
+ +                                      strlen(XATTR_NAME_POSIX_ACL_DEFAULT));
         }
   
         slot++;
@@@ -3774,7 -3772,6 +3772,7 @@@ cache_acl
                 break;
         case S_IFLNK:
                 inode->i_op = &btrfs_symlink_inode_operations;
+ +              inode_nohighmem(inode);
                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
                 break;
         default:
@@@ -4318,7 -4315,7 +4316,7 @@@ int btrfs_truncate_inode_items(struct b
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
-       path->reada = -1;
+       path->reada = READA_BACK;
   
         /*
          * We want to drop from the next block forward in case this new size is
@@@ -4349,7 -4346,7 +4347,7 @@@ search_again
          * up a huge file in a single leaf.  Most of the time that
          * bytes_deleted is > 0, it will be huge by the time we get here
          */
-       if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+       if (be_nice && bytes_deleted > SZ_32M) {
                 if (btrfs_should_end_transaction(trans, root)) {
                         err = -EAGAIN;
                         goto error;
@@@ -4592,7 -4589,7 +4590,7 @@@ error
   
         btrfs_free_path(path);
   
-       if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+       if (be_nice && bytes_deleted > SZ_32M) {
                 unsigned long updates = trans->delayed_ref_updates;
                 if (updates) {
                         trans->delayed_ref_updates = 0;
@@@ -4669,7 -4666,7 +4667,7 @@@ again
         }
         wait_on_page_writeback(page);
   
-       lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
+       lock_extent_bits(io_tree, page_start, page_end, &cached_state);
         set_page_extent_mapped(page);
   
         ordered = btrfs_lookup_ordered_extent(inode, page_start);
@@@ -4800,7 -4797,7 +4798,7 @@@ int btrfs_cont_expand(struct inode *ino
         while (1) {
                 struct btrfs_ordered_extent *ordered;
   
-               lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
+               lock_extent_bits(io_tree, hole_start, block_end - 1,
                                  &cached_state);
                 ordered = btrfs_lookup_ordered_range(inode, hole_start,
                                                      block_end - hole_start);
@@@ -5112,7 -5109,7 +5110,7 @@@ static void evict_inode_truncate_pages(
                 end = state->end;
                 spin_unlock(&io_tree->lock);
   
-               lock_extent_bits(io_tree, start, end, 0, &cached_state);
+               lock_extent_bits(io_tree, start, end, &cached_state);
   
                 /*
                  * If still has DELALLOC flag, the extent didn't reach disk,
@@@ -5305,7 -5302,6 +5303,6 @@@ void btrfs_evict_inode(struct inode *in
   no_delete:
         btrfs_remove_delayed_node(inode);
         clear_inode(inode);
-       return;
   }
   
   /*
@@@ -5754,7 -5750,7 +5751,7 @@@ static int btrfs_real_readdir(struct fi
         if (!path)
                 return -ENOMEM;
   
-       path->reada = 1;
+       path->reada = READA_FORWARD;
   
         if (key_type == BTRFS_DIR_INDEX_KEY) {
                 INIT_LIST_HEAD(&ins_list);
@@@ -6482,7 -6478,7 +6479,7 @@@ out_unlock_inode
   static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                       struct dentry *dentry)
   {
-       struct btrfs_trans_handle *trans;
+       struct btrfs_trans_handle *trans = NULL;
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct inode *inode = d_inode(old_dentry);
         u64 index;
@@@ -6508,6 -6504,7 +6505,7 @@@
         trans = btrfs_start_transaction(root, 5);
         if (IS_ERR(trans)) {
                 err = PTR_ERR(trans);
+               trans = NULL;
                 goto fail;
         }
   
@@@ -6541,9 -6538,10 +6539,10 @@@
                 btrfs_log_new_name(trans, inode, NULL, parent);
         }
   
-       btrfs_end_transaction(trans, root);
         btrfs_balance_delayed_items(root);
   fail:
+       if (trans)
+               btrfs_end_transaction(trans, root);
         if (drop_inode) {
                 inode_dec_link_count(inode);
                 iput(inode);
@@@ -6688,7 -6686,7 +6687,7 @@@ static int merge_extent_mapping(struct 
   }
   
   static noinline int uncompress_inline(struct btrfs_path *path,
-                                     struct inode *inode, struct page *page,
+                                     struct page *page,
                                       size_t pg_offset, u64 extent_offset,
                                       struct btrfs_file_extent_item *item)
   {
@@@ -6785,7 -6783,7 +6784,7 @@@ again
                  * Chances are we'll be called again, so go ahead and do
                  * readahead
                  */
-               path->reada = 1;
+               path->reada = READA_FORWARD;
         }
   
         ret = btrfs_lookup_file_extent(trans, root, path,
@@@ -6884,8 -6882,7 +6883,7 @@@ next
                 if (create == 0 && !PageUptodate(page)) {
                         if (btrfs_file_extent_compression(leaf, item) !=
                             BTRFS_COMPRESS_NONE) {
-                               ret = uncompress_inline(path, inode, page,
-                                                       pg_offset,
+                               ret = uncompress_inline(path, page, pg_offset,
                                                         extent_offset, item);
                                 if (ret) {
                                         err = ret;
@@@ -7381,7 -7378,7 +7379,7 @@@ static int lock_extent_direct(struct in
   
         while (1) {
                 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                0, cached_state);
+                                cached_state);
                 /*
                  * We're concerned with the entire range that we're going to be
                  * doing DIO to, so we need to make sure theres no ordered
@@@ -7409,25 -7406,21 +7407,21 @@@
                         btrfs_start_ordered_extent(inode, ordered, 1);
                         btrfs_put_ordered_extent(ordered);
                 } else {
-                       /* Screw you mmap */
-                       ret = btrfs_fdatawrite_range(inode, lockstart, lockend);
-                       if (ret)
-                               break;
-                       ret = filemap_fdatawait_range(inode->i_mapping,
-                                                     lockstart,
-                                                     lockend);
-                       if (ret)
-                               break;
- 
                         /*
-                        * If we found a page that couldn't be invalidated just
-                        * fall back to buffered.
+                        * We could trigger writeback for this range (and wait
+                        * for it to complete) and then invalidate the pages for
+                        * this range (through invalidate_inode_pages2_range()),
+                        * but that can lead us to a deadlock with a concurrent
+                        * call to readpages() (a buffered read or a defrag call
+                        * triggered a readahead) on a page lock due to an
+                        * ordered dio extent we created before but did not have
+                        * yet a corresponding bio submitted (whence it can not
+                        * complete), which makes readpages() wait for that
+                        * ordered extent to complete while holding a lock on
+                        * that page.
                          */
-                       ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                       lockstart >> PAGE_CACHE_SHIFT,
-                                       lockend >> PAGE_CACHE_SHIFT);
-                       if (ret)
-                               break;
+                       ret = -ENOTBLK;
+                       break;
                 }
   
                 cond_resched();
@@@ -7483,11 -7476,6 +7477,6 @@@ static struct extent_map *create_pinned
         return em;
   }
   
- struct btrfs_dio_data {
-       u64 outstanding_extents;
-       u64 reserve;
- };
- 
   static void adjust_dio_outstanding_extents(struct inode *inode,
                                            struct btrfs_dio_data *dio_data,
                                            const u64 len)
@@@ -7671,6 -7659,7 +7660,7 @@@ unlock
                 btrfs_free_reserved_data_space(inode, start, len);
                 WARN_ON(dio_data->reserve < len);
                 dio_data->reserve -= len;
+               dio_data->unsubmitted_oe_range_end = start + len;
                 current->journal_info = dio_data;
         }
   
@@@ -7993,22 -7982,22 +7983,22 @@@ static void btrfs_endio_direct_read(str
         bio_put(bio);
   }
   
- static void btrfs_endio_direct_write(struct bio *bio)
+ static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
+                                                   const u64 offset,
+                                                   const u64 bytes,
+                                                   const int uptodate)
   {
-       struct btrfs_dio_private *dip = bio->bi_private;
-       struct inode *inode = dip->inode;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_ordered_extent *ordered = NULL;
-       u64 ordered_offset = dip->logical_offset;
-       u64 ordered_bytes = dip->bytes;
-       struct bio *dio_bio;
+       u64 ordered_offset = offset;
+       u64 ordered_bytes = bytes;
         int ret;
   
   again:
         ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                    &ordered_offset,
                                                    ordered_bytes,
-                                                  !bio->bi_error);
+                                                  uptodate);
         if (!ret)
                 goto out_test;
   
@@@ -8021,13 -8010,22 +8011,22 @@@ out_test
          * our bio might span multiple ordered extents.  If we haven't
          * completed the accounting for the whole dio, go back and try again
          */
-       if (ordered_offset < dip->logical_offset + dip->bytes) {
-               ordered_bytes = dip->logical_offset + dip->bytes -
-                       ordered_offset;
+       if (ordered_offset < offset + bytes) {
+               ordered_bytes = offset + bytes - ordered_offset;
                 ordered = NULL;
                 goto again;
         }
-       dio_bio = dip->dio_bio;
+ }
+ 
+ static void btrfs_endio_direct_write(struct bio *bio)
+ {
+       struct btrfs_dio_private *dip = bio->bi_private;
+       struct bio *dio_bio = dip->dio_bio;
+ 
+       btrfs_endio_direct_write_update_ordered(dip->inode,
+                                               dip->logical_offset,
+                                               dip->bytes,
+                                               !bio->bi_error);
   
         kfree(dip);
   
@@@ -8335,6 -8333,21 +8334,21 @@@ static void btrfs_submit_direct(int rw
                 dip->subio_endio = btrfs_subio_endio_read;
         }
   
+       /*
+        * Reset the range for unsubmitted ordered extents (to a 0 length range)
+        * even if we fail to submit a bio, because in such case we do the
+        * corresponding error handling below and it must not be done a second
+        * time by btrfs_direct_IO().
+        */
+       if (write) {
+               struct btrfs_dio_data *dio_data = current->journal_info;
+ 
+               dio_data->unsubmitted_oe_range_end = dip->logical_offset +
+                       dip->bytes;
+               dio_data->unsubmitted_oe_range_start =
+                       dio_data->unsubmitted_oe_range_end;
+       }
+ 
         ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
         if (!ret)
                 return;
@@@ -8363,24 -8376,15 +8377,15 @@@ free_ordered
                 dip = NULL;
                 io_bio = NULL;
         } else {
-               if (write) {
-                       struct btrfs_ordered_extent *ordered;
- 
-                       ordered = btrfs_lookup_ordered_extent(inode,
-                                                             file_offset);
-                       set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
-                       /*
-                        * Decrements our ref on the ordered extent and removes
-                        * the ordered extent from the inode's ordered tree,
-                        * doing all the proper resource cleanup such as for the
-                        * reserved space and waking up any waiters for this
-                        * ordered extent (through btrfs_remove_ordered_extent).
-                        */
-                       btrfs_finish_ordered_io(ordered);
-               } else {
+               if (write)
+                       btrfs_endio_direct_write_update_ordered(inode,
+                                               file_offset,
+                                               dio_bio->bi_iter.bi_size,
+                                               0);
+               else
                         unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
                               file_offset + dio_bio->bi_iter.bi_size - 1);
-               }
+ 
                 dio_bio->bi_error = -EIO;
                 /*
                  * Releases and cleans up our dio_bio, no need to bio_put()
@@@ -8480,6 -8484,8 +8485,8 @@@ static ssize_t btrfs_direct_IO(struct k
                  * originally calculated.  Abuse current->journal_info for this.
                  */
                 dio_data.reserve = round_up(count, root->sectorsize);
+               dio_data.unsubmitted_oe_range_start = (u64)offset;
+               dio_data.unsubmitted_oe_range_end = (u64)offset;
                 current->journal_info = &dio_data;
         } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                      &BTRFS_I(inode)->runtime_flags)) {
@@@ -8498,6 -8504,19 +8505,19 @@@
                         if (dio_data.reserve)
                                 btrfs_delalloc_release_space(inode, offset,
                                                              dio_data.reserve);
+                       /*
+                        * On error we might have left some ordered extents
+                        * without submitting corresponding bios for them, so
+                        * cleanup them up to avoid other tasks getting them
+                        * and waiting for them to complete forever.
+                        */
+                       if (dio_data.unsubmitted_oe_range_start <
+                           dio_data.unsubmitted_oe_range_end)
+                               btrfs_endio_direct_write_update_ordered(inode,
+                                       dio_data.unsubmitted_oe_range_start,
+                                       dio_data.unsubmitted_oe_range_end -
+                                       dio_data.unsubmitted_oe_range_start,
+                                       0);
                 } else if (ret >= 0 && (size_t)ret < count)
                         btrfs_delalloc_release_space(inode, offset,
                                                      count - (size_t)ret);
@@@ -8535,15 -8554,28 +8555,28 @@@ int btrfs_readpage(struct file *file, s
   static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
   {
         struct extent_io_tree *tree;
- 
+       struct inode *inode = page->mapping->host;
+       int ret;
   
         if (current->flags & PF_MEMALLOC) {
                 redirty_page_for_writepage(wbc, page);
                 unlock_page(page);
                 return 0;
         }
+ 
+       /*
+        * If we are under memory pressure we will call this directly from the
+        * VM, we need to make sure we have the inode referenced for the ordered
+        * extent.  If not just return like we didn't do anything.
+        */
+       if (!igrab(inode)) {
+               redirty_page_for_writepage(wbc, page);
+               return AOP_WRITEPAGE_ACTIVATE;
+       }
         tree = &BTRFS_I(page->mapping->host)->io_tree;
-       return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
+       ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc);
+       btrfs_add_delayed_iput(inode);
+       return ret;
   }
   
   static int btrfs_writepages(struct address_space *mapping,
@@@ -8615,7 -8647,7 +8648,7 @@@ static void btrfs_invalidatepage(struc
         }
   
         if (!inode_evicting)
-               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+               lock_extent_bits(tree, page_start, page_end, &cached_state);
         ordered = btrfs_lookup_ordered_extent(inode, page_start);
         if (ordered) {
                 /*
@@@ -8653,7 -8685,7 +8686,7 @@@
                 btrfs_put_ordered_extent(ordered);
                 if (!inode_evicting) {
                         cached_state = NULL;
-                       lock_extent_bits(tree, page_start, page_end, 0,
+                       lock_extent_bits(tree, page_start, page_end,
                                          &cached_state);
                 }
         }
@@@ -8751,7 -8783,7 +8784,7 @@@ again
         }
         wait_on_page_writeback(page);
   
-       lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
+       lock_extent_bits(io_tree, page_start, page_end, &cached_state);
         set_page_extent_mapped(page);
   
         /*
@@@ -9025,6 -9057,7 +9058,7 @@@ struct inode *btrfs_alloc_inode(struct 
         ei->dir_index = 0;
         ei->last_unlink_trans = 0;
         ei->last_log_commit = 0;
+       ei->delayed_iput_count = 0;
   
         spin_lock_init(&ei->lock);
         ei->outstanding_extents = 0;
@@@ -9049,6 -9082,7 +9083,7 @@@
         mutex_init(&ei->delalloc_mutex);
         btrfs_ordered_inode_tree_init(&ei->ordered_tree);
         INIT_LIST_HEAD(&ei->delalloc_inodes);
+       INIT_LIST_HEAD(&ei->delayed_iput);
         RB_CLEAR_NODE(&ei->rb_node);
   
         return inode;
@@@ -9153,16 -9187,13 +9188,14 @@@ void btrfs_destroy_cachep(void
                 kmem_cache_destroy(btrfs_path_cachep);
         if (btrfs_free_space_cachep)
                 kmem_cache_destroy(btrfs_free_space_cachep);
-       if (btrfs_delalloc_work_cachep)
-               kmem_cache_destroy(btrfs_delalloc_work_cachep);
   }
   
   int btrfs_init_cachep(void)
   {
         btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
                         sizeof(struct btrfs_inode), 0,
- -                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, init_once);
+ +                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+ +                      init_once);
         if (!btrfs_inode_cachep)
                 goto fail;
   
@@@ -9190,13 -9221,6 +9223,6 @@@
         if (!btrfs_free_space_cachep)
                 goto fail;
   
-       btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
-                       sizeof(struct btrfs_delalloc_work), 0,
-                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
-                       NULL);
-       if (!btrfs_delalloc_work_cachep)
-               goto fail;
- 
         return 0;
   fail:
         btrfs_destroy_cachep();
@@@ -9420,14 -9444,10 +9446,10 @@@ static void btrfs_run_delalloc_work(str
         delalloc_work = container_of(work, struct btrfs_delalloc_work,
                                      work);
         inode = delalloc_work->inode;
-       if (delalloc_work->wait) {
-               btrfs_wait_ordered_range(inode, 0, (u64)-1);
-       } else {
+       filemap_flush(inode->i_mapping);
+       if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                               &BTRFS_I(inode)->runtime_flags))
                 filemap_flush(inode->i_mapping);
-               if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-                            &BTRFS_I(inode)->runtime_flags))
-                       filemap_flush(inode->i_mapping);
-       }
   
         if (delalloc_work->delay_iput)
                 btrfs_add_delayed_iput(inode);
@@@ -9437,18 -9457,17 +9459,17 @@@
   }
   
   struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
-                                                   int wait, int delay_iput)
+                                                   int delay_iput)
   {
         struct btrfs_delalloc_work *work;
   
-       work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
+       work = kmalloc(sizeof(*work), GFP_NOFS);
         if (!work)
                 return NULL;
   
         init_completion(&work->completion);
         INIT_LIST_HEAD(&work->list);
         work->inode = inode;
-       work->wait = wait;
         work->delay_iput = delay_iput;
         WARN_ON_ONCE(!inode);
         btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
@@@ -9460,7 -9479,7 +9481,7 @@@
   void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
   {
         wait_for_completion(&work->completion);
-       kmem_cache_free(btrfs_delalloc_work_cachep, work);
+       kfree(work);
   }
   
   /*
@@@ -9496,7 -9515,7 +9517,7 @@@ static int __start_delalloc_inodes(stru
                 }
                 spin_unlock(&root->delalloc_lock);
   
-               work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+               work = btrfs_alloc_delalloc_work(inode, delay_iput);
                 if (!work) {
                         if (delay_iput)
                                 btrfs_add_delayed_iput(inode);
@@@ -9638,9 -9657,11 +9659,11 @@@ static int btrfs_symlink(struct inode *
         /*
          * 2 items for inode item and ref
          * 2 items for dir items
+        * 1 item for updating parent inode item
+        * 1 item for the inline extent item
          * 1 item for xattr if selinux is on
          */
-       trans = btrfs_start_transaction(root, 5);
+       trans = btrfs_start_transaction(root, 7);
         if (IS_ERR(trans))
                 return PTR_ERR(trans);
   
@@@ -9671,10 -9692,6 +9694,6 @@@
         if (err)
                 goto out_unlock_inode;
   
-       err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
-       if (err)
-               goto out_unlock_inode;
- 
         path = btrfs_alloc_path();
         if (!path) {
                 err = -ENOMEM;
@@@ -9707,11 -9724,17 +9726,18 @@@
         btrfs_free_path(path);
   
         inode->i_op = &btrfs_symlink_inode_operations;
+ +      inode_nohighmem(inode);
         inode->i_mapping->a_ops = &btrfs_symlink_aops;
         inode_set_bytes(inode, name_len);
         btrfs_i_size_write(inode, name_len);
         err = btrfs_update_inode(trans, root, inode);
+       /*
+        * Last step, add directory indexes for our symlink inode. This is the
+        * last step to avoid extra cleanup of these indexes if an error happens
+        * elsewhere above.
+        */
+       if (!err)
+               err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
         if (err) {
                 drop_inode = 1;
                 goto out_unlock_inode;
@@@ -9762,7 -9785,7 +9788,7 @@@ static int __btrfs_prealloc_file_range(
                         }
                 }
   
-               cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
+               cur_bytes = min_t(u64, num_bytes, SZ_256M);
                 cur_bytes = max(cur_bytes, min_size);
                 /*
                  * If we are severely fragmented we could end up with really
@@@ -9997,7 -10020,7 +10023,7 @@@ static const struct inode_operations bt
         .setattr        = btrfs_setattr,
         .mknod          = btrfs_mknod,
         .setxattr       = btrfs_setxattr,
- -      .getxattr       = btrfs_getxattr,
+ +      .getxattr       = generic_getxattr,
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
         .permission     = btrfs_permission,
@@@ -10026,7 -10049,7 +10052,7 @@@ static const struct file_operations btr
         .fsync          = btrfs_sync_file,
   };
   
- static struct extent_io_ops btrfs_extent_io_ops = {
+ static const struct extent_io_ops btrfs_extent_io_ops = {
         .fill_delalloc = run_delalloc_range,
         .submit_bio_hook = btrfs_submit_bio_hook,
         .merge_bio_hook = btrfs_merge_bio_hook,
@@@ -10074,7 -10097,7 +10100,7 @@@ static const struct inode_operations bt
         .getattr        = btrfs_getattr,
         .setattr        = btrfs_setattr,
         .setxattr       = btrfs_setxattr,
- -      .getxattr       = btrfs_getxattr,
+ +      .getxattr       = generic_getxattr,
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
         .permission     = btrfs_permission,
@@@ -10088,7 -10111,7 +10114,7 @@@ static const struct inode_operations bt
         .setattr        = btrfs_setattr,
         .permission     = btrfs_permission,
         .setxattr       = btrfs_setxattr,
- -      .getxattr       = btrfs_getxattr,
+ +      .getxattr       = generic_getxattr,
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
         .get_acl        = btrfs_get_acl,
@@@ -10097,12 -10120,13 +10123,12 @@@
   };
   static const struct inode_operations btrfs_symlink_inode_operations = {
         .readlink       = generic_readlink,
- -      .follow_link    = page_follow_link_light,
- -      .put_link       = page_put_link,
+ +      .get_link       = page_get_link,
         .getattr        = btrfs_getattr,
         .setattr        = btrfs_setattr,
         .permission     = btrfs_permission,
         .setxattr       = btrfs_setxattr,
- -      .getxattr       = btrfs_getxattr,
+ +      .getxattr       = generic_getxattr,
         .listxattr      = btrfs_listxattr,
         .removexattr    = btrfs_removexattr,
         .update_time    = btrfs_update_time,
diff --combined fs/btrfs/ioctl.c

index e21997385d148c7ede78fd5874e594577ed8a9e9,e392dd67f0ba07dd943dcc650175a0ab8a79cceb..2a47a3148ec80df57150e3f5aa7d321abb8d1ccd
--- 1/fs/btrfs/ioctl.c
--- 2/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@@ -655,22 -655,28 +655,28 @@@ static int create_snapshot(struct btrfs
         if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
                 return -EINVAL;
   
+       pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
+       if (!pending_snapshot)
+               return -ENOMEM;
+ 
+       pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
+                       GFP_NOFS);
+       pending_snapshot->path = btrfs_alloc_path();
+       if (!pending_snapshot->root_item || !pending_snapshot->path) {
+               ret = -ENOMEM;
+               goto free_pending;
+       }
+ 
         atomic_inc(&root->will_be_snapshoted);
         smp_mb__after_atomic();
         btrfs_wait_for_no_snapshoting_writes(root);
   
         ret = btrfs_start_delalloc_inodes(root, 0);
         if (ret)
-               goto out;
+               goto dec_and_free;
   
         btrfs_wait_ordered_extents(root, -1);
   
-       pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
-       if (!pending_snapshot) {
-               ret = -ENOMEM;
-               goto out;
-       }
- 
         btrfs_init_block_rsv(&pending_snapshot->block_rsv,
                              BTRFS_BLOCK_RSV_TEMP);
         /*
@@@ -686,7 -692,7 +692,7 @@@
                                         &pending_snapshot->qgroup_reserved,
                                         false);
         if (ret)
-               goto free;
+               goto dec_and_free;
   
         pending_snapshot->dentry = dentry;
         pending_snapshot->root = root;
@@@ -737,11 -743,14 +743,14 @@@ fail
         btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,
                                          &pending_snapshot->block_rsv,
                                          pending_snapshot->qgroup_reserved);
- free:
-       kfree(pending_snapshot);
- out:
+ dec_and_free:
         if (atomic_dec_and_test(&root->will_be_snapshoted))
                 wake_up_atomic_t(&root->will_be_snapshoted);
+ free_pending:
+       kfree(pending_snapshot->root_item);
+       btrfs_free_path(pending_snapshot->path);
+       kfree(pending_snapshot);
+ 
         return ret;
   }
   
@@@ -992,7 -1001,7 +1001,7 @@@ static struct extent_map *defrag_lookup
                 u64 end = start + len - 1;
   
                 /* get the big lock and read metadata off disk */
-               lock_extent_bits(io_tree, start, end, 0, &cached);
+               lock_extent_bits(io_tree, start, end, &cached);
                 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
                 unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
   
@@@ -1016,7 -1025,7 +1025,7 @@@ static bool defrag_check_next_extent(st
         if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
                 ret = false;
         else if ((em->block_start + em->block_len == next->block_start) &&
-                (em->block_len > 128 * 1024 && next->block_len > 128 * 1024))
+                (em->block_len > SZ_128K && next->block_len > SZ_128K))
                 ret = false;
   
         free_extent_map(next);
@@@ -1140,7 -1149,7 +1149,7 @@@ again
                 page_end = page_start + PAGE_CACHE_SIZE - 1;
                 while (1) {
                         lock_extent_bits(tree, page_start, page_end,
-                                        0, &cached_state);
+                                        &cached_state);
                         ordered = btrfs_lookup_ordered_extent(inode,
                                                               page_start);
                         unlock_extent_cached(tree, page_start, page_end,
@@@ -1200,7 -1209,7 +1209,7 @@@
         page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
   
         lock_extent_bits(&BTRFS_I(inode)->io_tree,
-                        page_start, page_end - 1, 0, &cached_state);
+                        page_start, page_end - 1, &cached_state);
         clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
                           page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
                           EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
@@@ -1262,9 -1271,9 +1271,9 @@@ int btrfs_defrag_file(struct inode *ino
         int defrag_count = 0;
         int compress_type = BTRFS_COMPRESS_ZLIB;
         u32 extent_thresh = range->extent_thresh;
-       unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
+       unsigned long max_cluster = SZ_256K >> PAGE_CACHE_SHIFT;
         unsigned long cluster = max_cluster;
-       u64 new_align = ~((u64)128 * 1024 - 1);
+       u64 new_align = ~((u64)SZ_128K - 1);
         struct page **pages = NULL;
   
         if (isize == 0)
@@@ -1281,7 -1290,7 +1290,7 @@@
         }
   
         if (extent_thresh == 0)
-               extent_thresh = 256 * 1024;
+               extent_thresh = SZ_256K;
   
         /*
          * if we were not given a file, allocate a readahead
@@@ -1313,7 -1322,7 +1322,7 @@@
   
         if (newer_than) {
                 ret = find_new_extents(root, inode, newer_than,
-                                      &newer_off, 64 * 1024);
+                                      &newer_off, SZ_64K);
                 if (!ret) {
                         range->start = newer_off;
                         /*
@@@ -1403,9 -1412,8 +1412,8 @@@
                         newer_off = max(newer_off + 1,
                                         (u64)i << PAGE_CACHE_SHIFT);
   
-                       ret = find_new_extents(root, inode,
-                                              newer_than, &newer_off,
-                                              64 * 1024);
+                       ret = find_new_extents(root, inode, newer_than,
+                                              &newer_off, SZ_64K);
                         if (!ret) {
                                 range->start = newer_off;
                                 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
@@@ -1571,7 -1579,7 +1579,7 @@@ static noinline int btrfs_ioctl_resize(
                 new_size = old_size + new_size;
         }
   
-       if (new_size < 256 * 1024 * 1024) {
+       if (new_size < SZ_256M) {
                 ret = -EINVAL;
                 goto out_free;
         }
@@@ -2160,7 -2168,7 +2168,7 @@@ static noinline int btrfs_ioctl_tree_se
         struct inode *inode;
         int ret;
         size_t buf_size;
-       const size_t buf_limit = 16 * 1024 * 1024;
+       const size_t buf_limit = SZ_16M;
   
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
@@@ -2962,7 -2970,7 +2970,7 @@@ static int btrfs_cmp_data(struct inode 
                 flush_dcache_page(dst_page);
   
                 if (memcmp(addr, dst_addr, cmp_len))
- -                      ret = BTRFS_SAME_DATA_DIFFERS;
+ +                      ret = -EBADE;
   
                 kunmap_atomic(addr);
                 kunmap_atomic(dst_addr);
@@@ -3096,18 -3104,55 +3104,18 @@@ out_unlock
         return ret;
   }
   
- #define BTRFS_MAX_DEDUPE_LEN  (16 * 1024 * 1024)
+ #define BTRFS_MAX_DEDUPE_LEN  SZ_16M
   
- -static long btrfs_ioctl_file_extent_same(struct file *file,
- -                      struct btrfs_ioctl_same_args __user *argp)
+ +ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
+ +                              struct file *dst_file, u64 dst_loff)
   {
- -      struct btrfs_ioctl_same_args *same = NULL;
- -      struct btrfs_ioctl_same_extent_info *info;
- -      struct inode *src = file_inode(file);
- -      u64 off;
- -      u64 len;
- -      int i;
- -      int ret;
- -      unsigned long size;
+ +      struct inode *src = file_inode(src_file);
+ +      struct inode *dst = file_inode(dst_file);
         u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
- -      bool is_admin = capable(CAP_SYS_ADMIN);
- -      u16 count;
- -
- -      if (!(file->f_mode & FMODE_READ))
- -              return -EINVAL;
- -
- -      ret = mnt_want_write_file(file);
- -      if (ret)
- -              return ret;
- -
- -      if (get_user(count, &argp->dest_count)) {
- -              ret = -EFAULT;
- -              goto out;
- -      }
- -
- -      size = offsetof(struct btrfs_ioctl_same_args __user, info[count]);
- -
- -      same = memdup_user(argp, size);
- -
- -      if (IS_ERR(same)) {
- -              ret = PTR_ERR(same);
- -              same = NULL;
- -              goto out;
- -      }
+ +      ssize_t res;
   
- -      off = same->logical_offset;
- -      len = same->length;
- -
- -      /*
- -       * Limit the total length we will dedupe for each operation.
- -       * This is intended to bound the total time spent in this
- -       * ioctl to something sane.
- -       */
- -      if (len > BTRFS_MAX_DEDUPE_LEN)
- -              len = BTRFS_MAX_DEDUPE_LEN;
+ +      if (olen > BTRFS_MAX_DEDUPE_LEN)
+ +              olen = BTRFS_MAX_DEDUPE_LEN;
   
         if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
                 /*
@@@ -3115,13 -3160,58 +3123,13 @@@
                  * result, btrfs_cmp_data() won't correctly handle
                  * this situation without an update.
                  */
- -              ret = -EINVAL;
- -              goto out;
- -      }
- -
- -      ret = -EISDIR;
- -      if (S_ISDIR(src->i_mode))
- -              goto out;
- -
- -      ret = -EACCES;
- -      if (!S_ISREG(src->i_mode))
- -              goto out;
- -
- -      /* pre-format output fields to sane values */
- -      for (i = 0; i < count; i++) {
- -              same->info[i].bytes_deduped = 0ULL;
- -              same->info[i].status = 0;
- -      }
- -
- -      for (i = 0, info = same->info; i < count; i++, info++) {
- -              struct inode *dst;
- -              struct fd dst_file = fdget(info->fd);
- -              if (!dst_file.file) {
- -                      info->status = -EBADF;
- -                      continue;
- -              }
- -              dst = file_inode(dst_file.file);
- -
- -              if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) {
- -                      info->status = -EINVAL;
- -              } else if (file->f_path.mnt != dst_file.file->f_path.mnt) {
- -                      info->status = -EXDEV;
- -              } else if (S_ISDIR(dst->i_mode)) {
- -                      info->status = -EISDIR;
- -              } else if (!S_ISREG(dst->i_mode)) {
- -                      info->status = -EACCES;
- -              } else {
- -                      info->status = btrfs_extent_same(src, off, len, dst,
- -                                                      info->logical_offset);
- -                      if (info->status == 0)
- -                              info->bytes_deduped += len;
- -              }
- -              fdput(dst_file);
+ +              return -EINVAL;
         }
   
- -      ret = copy_to_user(argp, same, size);
- -      if (ret)
- -              ret = -EFAULT;
- -
- -out:
- -      mnt_drop_write_file(file);
- -      kfree(same);
- -      return ret;
+ +      res = btrfs_extent_same(src, loff, olen, dst, dst_loff);
+ +      if (res)
+ +              return res;
+ +      return olen;
   }
   
   static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
@@@ -3396,7 -3486,7 +3404,7 @@@ static int btrfs_clone(struct inode *sr
                 return ret;
         }
   
-       path->reada = 2;
+       path->reada = READA_FORWARD;
         /* clone data */
         key.objectid = btrfs_ino(src);
         key.type = BTRFS_EXTENT_DATA_KEY;
@@@ -3697,16 -3787,17 +3705,16 @@@ out
         return ret;
   }
   
- -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
- -                                     u64 off, u64 olen, u64 destoff)
+ +static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
+ +                                      u64 off, u64 olen, u64 destoff)
   {
         struct inode *inode = file_inode(file);
+ +      struct inode *src = file_inode(file_src);
         struct btrfs_root *root = BTRFS_I(inode)->root;
- -      struct fd src_file;
- -      struct inode *src;
         int ret;
         u64 len = olen;
         u64 bs = root->fs_info->sb->s_blocksize;
- -      int same_inode = 0;
+ +      int same_inode = src == inode;
   
         /*
          * TODO:
@@@ -3719,20 -3810,49 +3727,20 @@@
          *   be either compressed or non-compressed.
          */
   
- -      /* the destination must be opened for writing */
- -      if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
- -              return -EINVAL;
- -
         if (btrfs_root_readonly(root))
                 return -EROFS;
   
- -      ret = mnt_want_write_file(file);
- -      if (ret)
- -              return ret;
- -
- -      src_file = fdget(srcfd);
- -      if (!src_file.file) {
- -              ret = -EBADF;
- -              goto out_drop_write;
- -      }
- -
- -      ret = -EXDEV;
- -      if (src_file.file->f_path.mnt != file->f_path.mnt)
- -              goto out_fput;
- -
- -      src = file_inode(src_file.file);
- -
- -      ret = -EINVAL;
- -      if (src == inode)
- -              same_inode = 1;
- -
- -      /* the src must be open for reading */
- -      if (!(src_file.file->f_mode & FMODE_READ))
- -              goto out_fput;
+ +      if (file_src->f_path.mnt != file->f_path.mnt ||
+ +          src->i_sb != inode->i_sb)
+ +              return -EXDEV;
   
         /* don't make the dst file partly checksummed */
         if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
             (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
- -              goto out_fput;
+ +              return -EINVAL;
   
- -      ret = -EISDIR;
         if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
- -              goto out_fput;
- -
- -      ret = -EXDEV;
- -      if (src->i_sb != inode->i_sb)
- -              goto out_fput;
+ +              return -EISDIR;
   
         if (!same_inode) {
                 btrfs_double_inode_lock(src, inode);
@@@ -3809,25 -3929,21 +3817,25 @@@ out_unlock
                 btrfs_double_inode_unlock(src, inode);
         else
                 mutex_unlock(&src->i_mutex);
- -out_fput:
- -      fdput(src_file);
- -out_drop_write:
- -      mnt_drop_write_file(file);
         return ret;
   }
   
- -static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
+ +ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
+ +                            struct file *file_out, loff_t pos_out,
+ +                            size_t len, unsigned int flags)
   {
- -      struct btrfs_ioctl_clone_range_args args;
+ +      ssize_t ret;
   
- -      if (copy_from_user(&args, argp, sizeof(args)))
- -              return -EFAULT;
- -      return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
- -                               args.src_length, args.dest_offset);
+ +      ret = btrfs_clone_files(file_out, file_in, pos_in, len, pos_out);
+ +      if (ret == 0)
+ +              ret = len;
+ +      return ret;
+ +}
+ +
+ +int btrfs_clone_file_range(struct file *src_file, loff_t off,
+ +              struct file *dst_file, loff_t destoff, u64 len)
+ +{
+ +      return btrfs_clone_files(dst_file, src_file, off, len, destoff);
   }
   
   /*
@@@ -4039,7 -4155,7 +4047,7 @@@ static long btrfs_ioctl_space_info(stru
                 return -ENOMEM;
   
         space_args.total_spaces = 0;
-       dest = kmalloc(alloc_size, GFP_NOFS);
+       dest = kmalloc(alloc_size, GFP_KERNEL);
         if (!dest)
                 return -ENOMEM;
         dest_orig = dest;
@@@ -4416,7 -4532,7 +4424,7 @@@ static long btrfs_ioctl_logical_to_ino(
                 goto out;
         }
   
-       size = min_t(u32, loi->size, 64 * 1024);
+       size = min_t(u32, loi->size, SZ_64K);
         inodes = init_data_container(size);
         if (IS_ERR(inodes)) {
                 ret = PTR_ERR(inodes);
@@@ -4565,7 -4681,7 +4573,7 @@@ locked
                 goto out_bargs;
         }
   
-       bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+       bctl = kzalloc(sizeof(*bctl), GFP_KERNEL);
         if (!bctl) {
                 ret = -ENOMEM;
                 goto out_bargs;
@@@ -4651,7 -4767,7 +4659,7 @@@ static long btrfs_ioctl_balance_progres
                 goto out;
         }
   
-       bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
+       bargs = kzalloc(sizeof(*bargs), GFP_KERNEL);
         if (!bargs) {
                 ret = -ENOMEM;
                 goto out;
@@@ -4911,7 -5027,7 +4919,7 @@@ static long btrfs_ioctl_quota_rescan_st
         if (!capable(CAP_SYS_ADMIN))
                 return -EPERM;
   
-       qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
+       qsa = kzalloc(sizeof(*qsa), GFP_KERNEL);
         if (!qsa)
                 return -ENOMEM;
   
@@@ -5041,7 -5157,7 +5049,7 @@@ static long btrfs_ioctl_set_received_su
                 goto out;
         }
   
-       args64 = kmalloc(sizeof(*args64), GFP_NOFS);
+       args64 = kmalloc(sizeof(*args64), GFP_KERNEL);
         if (!args64) {
                 ret = -ENOMEM;
                 goto out;
@@@ -5178,7 -5294,7 +5186,7 @@@ out_unlock
   static int btrfs_ioctl_get_supported_features(struct file *file,
                                               void __user *arg)
   {
-       static struct btrfs_ioctl_feature_flags features[3] = {
+       static const struct btrfs_ioctl_feature_flags features[3] = {
                 INIT_FEATURE_FLAGS(SUPP),
                 INIT_FEATURE_FLAGS(SAFE_SET),
                 INIT_FEATURE_FLAGS(SAFE_CLEAR)
@@@ -5377,6 -5493,10 +5385,6 @@@ long btrfs_ioctl(struct file *file, uns
                 return btrfs_ioctl_dev_info(root, argp);
         case BTRFS_IOC_BALANCE:
                 return btrfs_ioctl_balance(file, NULL);
- -      case BTRFS_IOC_CLONE:
- -              return btrfs_ioctl_clone(file, arg, 0, 0, 0);
- -      case BTRFS_IOC_CLONE_RANGE:
- -              return btrfs_ioctl_clone_range(file, argp);
         case BTRFS_IOC_TRANS_START:
                 return btrfs_ioctl_trans_start(file);
         case BTRFS_IOC_TRANS_END:
@@@ -5454,6 -5574,8 +5462,6 @@@
                 return btrfs_ioctl_get_fslabel(file, argp);
         case BTRFS_IOC_SET_FSLABEL:
                 return btrfs_ioctl_set_fslabel(file, argp);
- -      case BTRFS_IOC_FILE_EXTENT_SAME:
- -              return btrfs_ioctl_file_extent_same(file, argp);
         case BTRFS_IOC_GET_SUPPORTED_FEATURES:
                 return btrfs_ioctl_get_supported_features(file, argp);
         case BTRFS_IOC_GET_FEATURES:
diff --combined fs/btrfs/super.c

index a0434c179ea96b9f1308e7034066202b30cb4267,86f7fdc0563388b9122bf6abdb88b68ebec2ac85..9b9eab6d048e93d32963b0c66a6d9ab6c022ee63
--- 1/fs/btrfs/super.c
--- 2/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@@ -295,10 -295,11 +295,11 @@@ enum 
         Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
         Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
         Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
-       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-       Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
-       Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
-       Opt_check_integrity, Opt_check_integrity_including_extent_data,
+       Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
+       Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
+       Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
+       Opt_skip_balance, Opt_check_integrity,
+       Opt_check_integrity_including_extent_data,
         Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
         Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
         Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
@@@ -309,7 -310,7 +310,7 @@@
         Opt_err,
   };
   
- static match_table_t tokens = {
+ static const match_table_t tokens = {
         {Opt_degraded, "degraded"},
         {Opt_subvol, "subvol=%s"},
         {Opt_subvolid, "subvolid=%s"},
@@@ -340,6 -341,7 +341,7 @@@
         {Opt_discard, "discard"},
         {Opt_nodiscard, "nodiscard"},
         {Opt_space_cache, "space_cache"},
+       {Opt_space_cache_version, "space_cache=%s"},
         {Opt_clear_cache, "clear_cache"},
         {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
         {Opt_enospc_debug, "enospc_debug"},
@@@ -383,7 -385,9 +385,9 @@@ int btrfs_parse_options(struct btrfs_ro
         bool compress_force = false;
   
         cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
-       if (cache_gen)
+       if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
+               btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
+       else if (cache_gen)
                 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
   
         if (!options)
@@@ -617,15 -621,35 +621,35 @@@
                                              "turning off discard");
                         break;
                 case Opt_space_cache:
-                       btrfs_set_and_info(root, SPACE_CACHE,
-                                          "enabling disk space caching");
+               case Opt_space_cache_version:
+                       if (token == Opt_space_cache ||
+                           strcmp(args[0].from, "v1") == 0) {
+                               btrfs_clear_opt(root->fs_info->mount_opt,
+                                               FREE_SPACE_TREE);
+                               btrfs_set_and_info(root, SPACE_CACHE,
+                                                  "enabling disk space caching");
+                       } else if (strcmp(args[0].from, "v2") == 0) {
+                               btrfs_clear_opt(root->fs_info->mount_opt,
+                                               SPACE_CACHE);
+                               btrfs_set_and_info(root, FREE_SPACE_TREE,
+                                                  "enabling free space tree");
+                       } else {
+                               ret = -EINVAL;
+                               goto out;
+                       }
                         break;
                 case Opt_rescan_uuid_tree:
                         btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
                         break;
                 case Opt_no_space_cache:
-                       btrfs_clear_and_info(root, SPACE_CACHE,
-                                            "disabling disk space caching");
+                       if (btrfs_test_opt(root, SPACE_CACHE)) {
+                               btrfs_clear_and_info(root, SPACE_CACHE,
+                                                    "disabling disk space caching");
+                       }
+                       if (btrfs_test_opt(root, FREE_SPACE_TREE)) {
+                               btrfs_clear_and_info(root, FREE_SPACE_TREE,
+                                                    "disabling free space tree");
+                       }
                         break;
                 case Opt_inode_cache:
                         btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
@@@ -754,8 -778,17 +778,17 @@@
                 }
         }
   out:
+       if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
+           !btrfs_test_opt(root, FREE_SPACE_TREE) &&
+           !btrfs_test_opt(root, CLEAR_CACHE)) {
+               btrfs_err(root->fs_info, "cannot disable free space tree");
+               ret = -EINVAL;
+ 
+       }
         if (!ret && btrfs_test_opt(root, SPACE_CACHE))
                 btrfs_info(root->fs_info, "disk space caching is enabled");
+       if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE))
+               btrfs_info(root->fs_info, "using free space tree");
         kfree(orig);
         return ret;
   }
@@@ -1162,6 -1195,8 +1195,8 @@@ static int btrfs_show_options(struct se
                 seq_puts(seq, ",noacl");
         if (btrfs_test_opt(root, SPACE_CACHE))
                 seq_puts(seq, ",space_cache");
+       else if (btrfs_test_opt(root, FREE_SPACE_TREE))
+               seq_puts(seq, ",space_cache=v2");
         else
                 seq_puts(seq, ",nospace_cache");
         if (btrfs_test_opt(root, RESCAN_UUID_TREE))
@@@ -1514,7 -1549,9 +1549,7 @@@ static struct dentry *btrfs_mount(struc
                 if ((flags ^ s->s_flags) & MS_RDONLY)
                         error = -EBUSY;
         } else {
- -              char b[BDEVNAME_SIZE];
- -
- -              strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
+ +              snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
                 btrfs_sb(s)->bdev_holder = fs_type;
                 error = btrfs_fill_super(s, fs_devices, data,
                                          flags & MS_SILENT ? 1 : 0);
@@@ -1863,7 -1900,7 +1898,7 @@@ static int btrfs_calc_avail_data_space(
                  * btrfs starts at an offset of at least 1MB when doing chunk
                  * allocation.
                  */
-               skip_space = 1024 * 1024;
+               skip_space = SZ_1M;
   
                 /* user can set the offset in fs_info->alloc_start. */
                 if (fs_info->alloc_start &&
@@@ -1954,6 -1991,8 +1989,8 @@@
    * there are other factors that may change the result (like a new metadata
    * chunk).
    *
+  * If metadata is exhausted, f_bavail will be 0.
+  *
    * FIXME: not accurate for mixed block groups, total and free/used are ok,
    * available appears slightly larger.
    */
@@@ -1965,11 -2004,13 +2002,13 @@@ static int btrfs_statfs(struct dentry *
         struct btrfs_space_info *found;
         u64 total_used = 0;
         u64 total_free_data = 0;
+       u64 total_free_meta = 0;
         int bits = dentry->d_sb->s_blocksize_bits;
         __be32 *fsid = (__be32 *)fs_info->fsid;
         unsigned factor = 1;
         struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
         int ret;
+       u64 thresh = 0;
   
         /*
          * holding chunk_muext to avoid allocating new chunks, holding
@@@ -1995,6 -2036,8 +2034,8 @@@
                                 }
                         }
                 }
+               if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+                       total_free_meta += found->disk_total - found->disk_used;
   
                 total_used += found->disk_used;
         }
@@@ -2017,6 -2060,24 +2058,24 @@@
         buf->f_bavail += div_u64(total_free_data, factor);
         buf->f_bavail = buf->f_bavail >> bits;
   
+       /*
+        * We calculate the remaining metadata space minus global reserve. If
+        * this is (supposedly) smaller than zero, there's no space. But this
+        * does not hold in practice, the exhausted state happens where's still
+        * some positive delta. So we apply some guesswork and compare the
+        * delta to a 4M threshold.  (Practically observed delta was ~2M.)
+        *
+        * We probably cannot calculate the exact threshold value because this
+        * depends on the internal reservations requested by various
+        * operations, so some operations that consume a few metadata will
+        * succeed even if the Avail is zero. But this is better than the other
+        * way around.
+        */
+       thresh = 4 * 1024 * 1024;
+ 
+       if (total_free_meta - thresh < block_rsv->size)
+               buf->f_bavail = 0;
+ 
         buf->f_type = BTRFS_SUPER_MAGIC;
         buf->f_bsize = dentry->d_sb->s_blocksize;
         buf->f_namelen = BTRFS_NAME_LEN;
@@@ -2223,6 -2284,9 +2282,9 @@@ static int btrfs_run_sanity_tests(void
         if (ret)
                 goto out;
         ret = btrfs_test_qgroups();
+       if (ret)
+               goto out;
+       ret = btrfs_test_free_space_tree();
   out:
         btrfs_destroy_test_fs();
         return ret;
diff --combined fs/btrfs/xattr.c

index 7cbef1a14fe1b13bc3af4c63f47efb3a6f83dbad,608552ed89c078fc953679d84b0016a9a1db77af..fd953c361a43c7c7f0faf3e100df12b052c06552
--- 1/fs/btrfs/xattr.c
--- 2/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@@ -283,7 -283,7 +283,7 @@@ ssize_t btrfs_listxattr(struct dentry *
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
-       path->reada = 2;
+       path->reada = READA_FORWARD;
   
         /* search for our xattrs */
         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@@ -351,89 -351,137 +351,89 @@@ err
         return ret;
   }
   
- -/*
- - * List of handlers for synthetic system.* attributes.  All real ondisk
- - * attributes are handled directly.
- - */
- -const struct xattr_handler *btrfs_xattr_handlers[] = {
- -#ifdef CONFIG_BTRFS_FS_POSIX_ACL
- -      &posix_acl_access_xattr_handler,
- -      &posix_acl_default_xattr_handler,
- -#endif
- -      NULL,
- -};
- -
- -/*
- - * Check if the attribute is in a supported namespace.
- - *
- - * This is applied after the check for the synthetic attributes in the system
- - * namespace.
- - */
- -static int btrfs_is_valid_xattr(const char *name)
+ +static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
+ +                                 struct dentry *dentry, const char *name,
+ +                                 void *buffer, size_t size)
   {
- -      int len = strlen(name);
- -      int prefixlen = 0;
- -
- -      if (!strncmp(name, XATTR_SECURITY_PREFIX,
- -                      XATTR_SECURITY_PREFIX_LEN))
- -              prefixlen = XATTR_SECURITY_PREFIX_LEN;
- -      else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- -              prefixlen = XATTR_SYSTEM_PREFIX_LEN;
- -      else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
- -              prefixlen = XATTR_TRUSTED_PREFIX_LEN;
- -      else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
- -              prefixlen = XATTR_USER_PREFIX_LEN;
- -      else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- -              prefixlen = XATTR_BTRFS_PREFIX_LEN;
- -      else
- -              return -EOPNOTSUPP;
- -
- -      /*
- -       * The name cannot consist of just prefix
- -       */
- -      if (len <= prefixlen)
- -              return -EINVAL;
+ +      struct inode *inode = d_inode(dentry);
   
- -      return 0;
+ +      name = xattr_full_name(handler, name);
+ +      return __btrfs_getxattr(inode, name, buffer, size);
   }
   
- -ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
- -                     void *buffer, size_t size)
+ +static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
+ +                                 struct dentry *dentry, const char *name,
+ +                                 const void *buffer, size_t size,
+ +                                 int flags)
   {
- -      int ret;
+ +      struct inode *inode = d_inode(dentry);
   
- -      /*
- -       * If this is a request for a synthetic attribute in the system.*
- -       * namespace use the generic infrastructure to resolve a handler
- -       * for it via sb->s_xattr.
- -       */
- -      if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- -              return generic_getxattr(dentry, name, buffer, size);
+ +      name = xattr_full_name(handler, name);
+ +      return __btrfs_setxattr(NULL, inode, name, buffer, size, flags);
+ +}
   
- -      ret = btrfs_is_valid_xattr(name);
- -      if (ret)
- -              return ret;
- -      return __btrfs_getxattr(d_inode(dentry), name, buffer, size);
+ +static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
+ +                                      struct dentry *dentry,
+ +                                      const char *name, const void *value,
+ +                                      size_t size, int flags)
+ +{
+ +      name = xattr_full_name(handler, name);
+ +      return btrfs_set_prop(d_inode(dentry), name, value, size, flags);
   }
   
+ +static const struct xattr_handler btrfs_security_xattr_handler = {
+ +      .prefix = XATTR_SECURITY_PREFIX,
+ +      .get = btrfs_xattr_handler_get,
+ +      .set = btrfs_xattr_handler_set,
+ +};
+ +
+ +static const struct xattr_handler btrfs_trusted_xattr_handler = {
+ +      .prefix = XATTR_TRUSTED_PREFIX,
+ +      .get = btrfs_xattr_handler_get,
+ +      .set = btrfs_xattr_handler_set,
+ +};
+ +
+ +static const struct xattr_handler btrfs_user_xattr_handler = {
+ +      .prefix = XATTR_USER_PREFIX,
+ +      .get = btrfs_xattr_handler_get,
+ +      .set = btrfs_xattr_handler_set,
+ +};
+ +
+ +static const struct xattr_handler btrfs_btrfs_xattr_handler = {
+ +      .prefix = XATTR_BTRFS_PREFIX,
+ +      .get = btrfs_xattr_handler_get,
+ +      .set = btrfs_xattr_handler_set_prop,
+ +};
+ +
+ +const struct xattr_handler *btrfs_xattr_handlers[] = {
+ +      &btrfs_security_xattr_handler,
+ +#ifdef CONFIG_BTRFS_FS_POSIX_ACL
+ +      &posix_acl_access_xattr_handler,
+ +      &posix_acl_default_xattr_handler,
+ +#endif
+ +      &btrfs_trusted_xattr_handler,
+ +      &btrfs_user_xattr_handler,
+ +      &btrfs_btrfs_xattr_handler,
+ +      NULL,
+ +};
+ +
   int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                    size_t size, int flags)
   {
         struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
- -      int ret;
   
- -      /*
- -       * The permission on security.* and system.* is not checked
- -       * in permission().
- -       */
         if (btrfs_root_readonly(root))
                 return -EROFS;
- -
- -      /*
- -       * If this is a request for a synthetic attribute in the system.*
- -       * namespace use the generic infrastructure to resolve a handler
- -       * for it via sb->s_xattr.
- -       */
- -      if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- -              return generic_setxattr(dentry, name, value, size, flags);
- -
- -      ret = btrfs_is_valid_xattr(name);
- -      if (ret)
- -              return ret;
- -
- -      if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- -              return btrfs_set_prop(d_inode(dentry), name,
- -                                    value, size, flags);
- -
- -      if (size == 0)
- -              value = "";  /* empty EA, do not remove */
- -
- -      return __btrfs_setxattr(NULL, d_inode(dentry), name, value, size,
- -                              flags);
+ +      return generic_setxattr(dentry, name, value, size, flags);
   }
   
   int btrfs_removexattr(struct dentry *dentry, const char *name)
   {
         struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
- -      int ret;
   
- -      /*
- -       * The permission on security.* and system.* is not checked
- -       * in permission().
- -       */
         if (btrfs_root_readonly(root))
                 return -EROFS;
- -
- -      /*
- -       * If this is a request for a synthetic attribute in the system.*
- -       * namespace use the generic infrastructure to resolve a handler
- -       * for it via sb->s_xattr.
- -       */
- -      if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
- -              return generic_removexattr(dentry, name);
- -
- -      ret = btrfs_is_valid_xattr(name);
- -      if (ret)
- -              return ret;
- -
- -      if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
- -              return btrfs_set_prop(d_inode(dentry), name,
- -                                    NULL, 0, XATTR_REPLACE);
- -
- -      return __btrfs_setxattr(NULL, d_inode(dentry), name, NULL, 0,
- -                              XATTR_REPLACE);
+ +      return generic_removexattr(dentry, name);
   }
   
   static int btrfs_initxattrs(struct inode *inode,
@@@ -446,7 -494,7 +446,7 @@@
   
         for (xattr = xattr_array; xattr->name != NULL; xattr++) {
                 name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
-                              strlen(xattr->name) + 1, GFP_NOFS);
+                              strlen(xattr->name) + 1, GFP_KERNEL);
                 if (!name) {
                         err = -ENOMEM;
                         break;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 18 Jan 2016 20:44:40 +0000 (12:44 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 18 Jan 2016 20:44:40 +0000 (12:44 -0800)
		1	2
fs/btrfs/acl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/ctree.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/ioctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/xattr.c	patch \|	diff1 \|	diff2 \|	blob \| history