Merge branch 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Jul 2015 17:26:34 +0000 (10:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 11 Jul 2015 17:26:34 +0000 (10:26 -0700)
Pull btrfs fixes from Chris Mason:
 "This is an assortment of fixes.  Most of the commits are from Filipe
  (fsync, the inode allocation cache and a few others).  Mark kicked in
  a series fixing corners in the extent sharing ioctls, and everyone
  else fixed up on assorted other problems"

* 'for-linus-4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: fix wrong check for btrfs_force_chunk_alloc()
  Btrfs: fix warning of bytes_may_use
  Btrfs: fix hang when failing to submit bio of directIO
  Btrfs: fix a comment in inode.c:evict_inode_truncate_pages()
  Btrfs: fix memory corruption on failure to submit bio for direct IO
  btrfs: don't update mtime/ctime on deduped inodes
  btrfs: allow dedupe of same inode
  btrfs: fix deadlock with extent-same and readpage
  btrfs: pass unaligned length to btrfs_cmp_data()
  Btrfs: fix fsync after truncate when no_holes feature is enabled
  Btrfs: fix fsync xattr loss in the fast fsync path
  Btrfs: fix fsync data loss after append write
  Btrfs: fix crash on close_ctree() if cleaner starts new transaction
  Btrfs: fix race between caching kthread and returning inode to inode cache
  Btrfs: use kmem_cache_free when freeing entry in inode cache
  Btrfs: fix race between balance and unused block group deletion
  btrfs: add error handling for scrub_workers_get()
  btrfs: cleanup noused initialization of dev in btrfs_end_bio()
  btrfs: qgroup: allow user to clear the limitation on qgroup

13 files changed:
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/qgroup.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c

index 0ef5cc13fae26f8899cad7ed30b59344c79120d4..81220b2203c61f1033bc5a4a0666f9b7b96f08df 100644 (file)
@@ -44,6 +44,8 @@
 #define BTRFS_INODE_IN_DELALLOC_LIST           9
 #define BTRFS_INODE_READDIO_NEED_LOCK          10
 #define BTRFS_INODE_HAS_PROPS                  11
+/* DIO is ready to submit */
+#define BTRFS_INODE_DIO_READY                  12
 /*
  * The following 3 bits are meant only for the btree inode.
  * When any of them is set, it means an error happened while writing an
index 80a9aefb0c46fc179119d8ad67eb9f8bff687ac1..aac314e14188facff9ed95136eab647709352359 100644 (file)
@@ -1778,6 +1778,7 @@ struct btrfs_fs_info {
        spinlock_t unused_bgs_lock;
        struct list_head unused_bgs;
        struct mutex unused_bg_unpin_mutex;
+       struct mutex delete_unused_bgs_mutex;
 
        /* For btrfs to record security options */
        struct security_mnt_opts security_opts;
index 3f43bfea3684a13e378fbdd3d364c0635e14daca..a9aadb2ad5254cfe98d36a6eed1f12d4ae5e7925 100644 (file)
@@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg)
 {
        struct btrfs_root *root = arg;
        int again;
+       struct btrfs_trans_handle *trans;
 
        do {
                again = 0;
@@ -1772,7 +1773,6 @@ static int cleaner_kthread(void *arg)
                }
 
                btrfs_run_delayed_iputs(root);
-               btrfs_delete_unused_bgs(root->fs_info);
                again = btrfs_clean_one_deleted_snapshot(root);
                mutex_unlock(&root->fs_info->cleaner_mutex);
 
@@ -1781,6 +1781,16 @@ static int cleaner_kthread(void *arg)
                 * needn't do anything special here.
                 */
                btrfs_run_defrag_inodes(root->fs_info);
+
+               /*
+                * Acquires fs_info->delete_unused_bgs_mutex to avoid racing
+                * with relocation (btrfs_relocate_chunk) and relocation
+                * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group)
+                * after acquiring fs_info->delete_unused_bgs_mutex. So we
+                * can't hold, nor need to, fs_info->cleaner_mutex when deleting
+                * unused block groups.
+                */
+               btrfs_delete_unused_bgs(root->fs_info);
 sleep:
                if (!try_to_freeze() && !again) {
                        set_current_state(TASK_INTERRUPTIBLE);
@@ -1789,6 +1799,34 @@ sleep:
                        __set_current_state(TASK_RUNNING);
                }
        } while (!kthread_should_stop());
+
+       /*
+        * Transaction kthread is stopped before us and wakes us up.
+        * However we might have started a new transaction and COWed some
+        * tree blocks when deleting unused block groups for example. So
+        * make sure we commit the transaction we started to have a clean
+        * shutdown when evicting the btree inode - if it has dirty pages
+        * when we do the final iput() on it, eviction will trigger a
+        * writeback for it which will fail with null pointer dereferences
+        * since work queues and other resources were already released and
+        * destroyed by the time the iput/eviction/writeback is made.
+        */
+       trans = btrfs_attach_transaction(root);
+       if (IS_ERR(trans)) {
+               if (PTR_ERR(trans) != -ENOENT)
+                       btrfs_err(root->fs_info,
+                                 "cleaner transaction attach returned %ld",
+                                 PTR_ERR(trans));
+       } else {
+               int ret;
+
+               ret = btrfs_commit_transaction(trans, root);
+               if (ret)
+                       btrfs_err(root->fs_info,
+                                 "cleaner open transaction commit returned %d",
+                                 ret);
+       }
+
        return 0;
 }
 
@@ -2492,6 +2530,7 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->unused_bgs_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->unused_bg_unpin_mutex);
+       mutex_init(&fs_info->delete_unused_bgs_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
        seqlock_init(&fs_info->profiles_lock);
index 38b76cc02f486db1e9d7f887860aba8137d61205..1c2bd1723e40ce8d1390223daf2bf4a7871ac051 100644 (file)
@@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                }
                spin_unlock(&fs_info->unused_bgs_lock);
 
+               mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
+
                /* Don't want to race with allocators so take the groups_sem */
                down_write(&space_info->groups_sem);
                spin_lock(&block_group->lock);
@@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
 end_trans:
                btrfs_end_transaction(trans, root);
 next:
+               mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                btrfs_put_block_group(block_group);
                spin_lock(&fs_info->unused_bgs_lock);
        }
index f6a596d5a6374f1dbf66279e24ef17aed9ff399d..d4a582ac3f730f82299e997d0866e3caecacac7b 100644 (file)
@@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
 {
        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
        struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset;
+       spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock;
        struct btrfs_free_space *info;
        struct rb_node *n;
        u64 count;
@@ -254,24 +255,30 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
                return;
 
        while (1) {
+               bool add_to_ctl = true;
+
+               spin_lock(rbroot_lock);
                n = rb_first(rbroot);
-               if (!n)
+               if (!n) {
+                       spin_unlock(rbroot_lock);
                        break;
+               }
 
                info = rb_entry(n, struct btrfs_free_space, offset_index);
                BUG_ON(info->bitmap); /* Logic error */
 
                if (info->offset > root->ino_cache_progress)
-                       goto free;
+                       add_to_ctl = false;
                else if (info->offset + info->bytes > root->ino_cache_progress)
                        count = root->ino_cache_progress - info->offset + 1;
                else
                        count = info->bytes;
 
-               __btrfs_add_free_space(ctl, info->offset, count);
-free:
                rb_erase(&info->offset_index, rbroot);
-               kfree(info);
+               spin_unlock(rbroot_lock);
+               if (add_to_ctl)
+                       __btrfs_add_free_space(ctl, info->offset, count);
+               kmem_cache_free(btrfs_free_space_cachep, info);
        }
 }
 
index 855935f6671ae59b1b025c3916d553bc689191ef..b33c0cf02668bde4d3dbeb60d0b1f411d58c648d 100644 (file)
@@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode)
        /*
         * Keep looping until we have no more ranges in the io tree.
         * We can have ongoing bios started by readpages (called from readahead)
-        * that didn't get their end io callbacks called yet or they are still
-        * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+        * that have their endio callback (extent_io.c:end_bio_extent_readpage)
+        * still in progress (unlocked the pages in the bio but did not yet
+        * unlocked the ranges in the io tree). Therefore this means some
         * ranges can still be locked and eviction started because before
         * submitting those bios, which are executed by a separate task (work
         * queue kthread), inode references (inode->i_count) were not taken
@@ -7546,6 +7547,7 @@ unlock:
 
                current->journal_info = outstanding_extents;
                btrfs_free_reserved_data_space(inode, len);
+               set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
        }
 
        /*
@@ -7871,8 +7873,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
        struct bio *dio_bio;
        int ret;
 
-       if (err)
-               goto out_done;
 again:
        ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                   &ordered_offset,
@@ -7895,7 +7895,6 @@ out_test:
                ordered = NULL;
                goto again;
        }
-out_done:
        dio_bio = dip->dio_bio;
 
        kfree(dip);
@@ -8163,9 +8162,8 @@ out_err:
 static void btrfs_submit_direct(int rw, struct bio *dio_bio,
                                struct inode *inode, loff_t file_offset)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_dio_private *dip;
-       struct bio *io_bio;
+       struct btrfs_dio_private *dip = NULL;
+       struct bio *io_bio = NULL;
        struct btrfs_io_bio *btrfs_bio;
        int skip_sum;
        int write = rw & REQ_WRITE;
@@ -8182,7 +8180,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
        dip = kzalloc(sizeof(*dip), GFP_NOFS);
        if (!dip) {
                ret = -ENOMEM;
-               goto free_io_bio;
+               goto free_ordered;
        }
 
        dip->private = dio_bio->bi_private;
@@ -8210,25 +8208,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
 
        if (btrfs_bio->end_io)
                btrfs_bio->end_io(btrfs_bio, ret);
-free_io_bio:
-       bio_put(io_bio);
 
 free_ordered:
        /*
-        * If this is a write, we need to clean up the reserved space and kill
-        * the ordered extent.
+        * If we arrived here it means either we failed to submit the dip
+        * or we either failed to clone the dio_bio or failed to allocate the
+        * dip. If we cloned the dio_bio and allocated the dip, we can just
+        * call bio_endio against our io_bio so that we get proper resource
+        * cleanup if we fail to submit the dip, otherwise, we must do the
+        * same as btrfs_endio_direct_[write|read] because we can't call these
+        * callbacks - they require an allocated dip and a clone of dio_bio.
         */
-       if (write) {
-               struct btrfs_ordered_extent *ordered;
-               ordered = btrfs_lookup_ordered_extent(inode, file_offset);
-               if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
-                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
-                       btrfs_free_reserved_extent(root, ordered->start,
-                                                  ordered->disk_len, 1);
-               btrfs_put_ordered_extent(ordered);
-               btrfs_put_ordered_extent(ordered);
+       if (io_bio && dip) {
+               bio_endio(io_bio, ret);
+               /*
+                * The end io callbacks free our dip, do the final put on io_bio
+                * and all the cleanup and final put for dio_bio (through
+                * dio_end_io()).
+                */
+               dip = NULL;
+               io_bio = NULL;
+       } else {
+               if (write) {
+                       struct btrfs_ordered_extent *ordered;
+
+                       ordered = btrfs_lookup_ordered_extent(inode,
+                                                             file_offset);
+                       set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+                       /*
+                        * Decrements our ref on the ordered extent and removes
+                        * the ordered extent from the inode's ordered tree,
+                        * doing all the proper resource cleanup such as for the
+                        * reserved space and waking up any waiters for this
+                        * ordered extent (through btrfs_remove_ordered_extent).
+                        */
+                       btrfs_finish_ordered_io(ordered);
+               } else {
+                       unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
+                             file_offset + dio_bio->bi_iter.bi_size - 1);
+               }
+               clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+               /*
+                * Releases and cleans up our dio_bio, no need to bio_put()
+                * nor bio_endio()/bio_io_error() against dio_bio.
+                */
+               dio_end_io(dio_bio, ret);
        }
-       bio_endio(dio_bio, ret);
+       if (io_bio)
+               bio_put(io_bio);
+       kfree(dip);
 }
 
 static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb,
@@ -8330,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                                   btrfs_submit_direct, flags);
        if (iov_iter_rw(iter) == WRITE) {
                current->journal_info = NULL;
-               if (ret < 0 && ret != -EIOCBQUEUED)
-                       btrfs_delalloc_release_space(inode, count);
-               else if (ret >= 0 && (size_t)ret < count)
+               if (ret < 0 && ret != -EIOCBQUEUED) {
+                       /*
+                        * If the error comes from submitting stage,
+                        * btrfs_get_blocsk_direct() has free'd data space,
+                        * and metadata space will be handled by
+                        * finish_ordered_fn, don't do that again to make
+                        * sure bytes_may_use is correct.
+                        */
+                       if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
+                                    &BTRFS_I(inode)->runtime_flags))
+                               btrfs_delalloc_release_space(inode, count);
+               } else if (ret >= 0 && (size_t)ret < count)
                        btrfs_delalloc_release_space(inode,
                                                     count - (size_t)ret);
        }
index c86b835da7a8739ec16396d589ccb651278ab44d..5d91776e12a215cddf666a2a1674a5ff5749c92e 100644 (file)
@@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 {
 
 
 static int btrfs_clone(struct inode *src, struct inode *inode,
-                      u64 off, u64 olen, u64 olen_aligned, u64 destoff);
+                      u64 off, u64 olen, u64 olen_aligned, u64 destoff,
+                      int no_time_update);
 
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -2765,14 +2766,11 @@ out:
        return ret;
 }
 
-static struct page *extent_same_get_page(struct inode *inode, u64 off)
+static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
 {
        struct page *page;
-       pgoff_t index;
        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 
-       index = off >> PAGE_CACHE_SHIFT;
-
        page = grab_cache_page(inode->i_mapping, index);
        if (!page)
                return NULL;
@@ -2793,6 +2791,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off)
        return page;
 }
 
+static int gather_extent_pages(struct inode *inode, struct page **pages,
+                              int num_pages, u64 off)
+{
+       int i;
+       pgoff_t index = off >> PAGE_CACHE_SHIFT;
+
+       for (i = 0; i < num_pages; i++) {
+               pages[i] = extent_same_get_page(inode, index + i);
+               if (!pages[i])
+                       return -ENOMEM;
+       }
+       return 0;
+}
+
 static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
 {
        /* do any pending delalloc/csum calc on src, one way or
@@ -2818,52 +2830,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
        }
 }
 
-static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
-                               struct inode *inode2, u64 loff2, u64 len)
+static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
 {
-       unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
-       unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
-
        mutex_unlock(&inode1->i_mutex);
        mutex_unlock(&inode2->i_mutex);
 }
 
-static void btrfs_double_lock(struct inode *inode1, u64 loff1,
-                             struct inode *inode2, u64 loff2, u64 len)
+static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
+{
+       if (inode1 < inode2)
+               swap(inode1, inode2);
+
+       mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+       if (inode1 != inode2)
+               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+}
+
+static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
+                                     struct inode *inode2, u64 loff2, u64 len)
+{
+       unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+       unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+}
+
+static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
+                                    struct inode *inode2, u64 loff2, u64 len)
 {
        if (inode1 < inode2) {
                swap(inode1, inode2);
                swap(loff1, loff2);
        }
-
-       mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
        lock_extent_range(inode1, loff1, len);
-       if (inode1 != inode2) {
-               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+       if (inode1 != inode2)
                lock_extent_range(inode2, loff2, len);
+}
+
+struct cmp_pages {
+       int             num_pages;
+       struct page     **src_pages;
+       struct page     **dst_pages;
+};
+
+static void btrfs_cmp_data_free(struct cmp_pages *cmp)
+{
+       int i;
+       struct page *pg;
+
+       for (i = 0; i < cmp->num_pages; i++) {
+               pg = cmp->src_pages[i];
+               if (pg)
+                       page_cache_release(pg);
+               pg = cmp->dst_pages[i];
+               if (pg)
+                       page_cache_release(pg);
+       }
+       kfree(cmp->src_pages);
+       kfree(cmp->dst_pages);
+}
+
+static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
+                                 struct inode *dst, u64 dst_loff,
+                                 u64 len, struct cmp_pages *cmp)
+{
+       int ret;
+       int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+       struct page **src_pgarr, **dst_pgarr;
+
+       /*
+        * We must gather up all the pages before we initiate our
+        * extent locking. We use an array for the page pointers. Size
+        * of the array is bounded by len, which is in turn bounded by
+        * BTRFS_MAX_DEDUPE_LEN.
+        */
+       src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+       dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+       if (!src_pgarr || !dst_pgarr) {
+               kfree(src_pgarr);
+               kfree(dst_pgarr);
+               return -ENOMEM;
        }
+       cmp->num_pages = num_pages;
+       cmp->src_pages = src_pgarr;
+       cmp->dst_pages = dst_pgarr;
+
+       ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff);
+       if (ret)
+               goto out;
+
+       ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff);
+
+out:
+       if (ret)
+               btrfs_cmp_data_free(cmp);
+       return 0;
 }
 
 static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
-                         u64 dst_loff, u64 len)
+                         u64 dst_loff, u64 len, struct cmp_pages *cmp)
 {
        int ret = 0;
+       int i;
        struct page *src_page, *dst_page;
        unsigned int cmp_len = PAGE_CACHE_SIZE;
        void *addr, *dst_addr;
 
+       i = 0;
        while (len) {
                if (len < PAGE_CACHE_SIZE)
                        cmp_len = len;
 
-               src_page = extent_same_get_page(src, loff);
-               if (!src_page)
-                       return -EINVAL;
-               dst_page = extent_same_get_page(dst, dst_loff);
-               if (!dst_page) {
-                       page_cache_release(src_page);
-                       return -EINVAL;
-               }
+               BUG_ON(i >= cmp->num_pages);
+
+               src_page = cmp->src_pages[i];
+               dst_page = cmp->dst_pages[i];
+
                addr = kmap_atomic(src_page);
                dst_addr = kmap_atomic(dst_page);
 
@@ -2875,15 +2955,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
 
                kunmap_atomic(addr);
                kunmap_atomic(dst_addr);
-               page_cache_release(src_page);
-               page_cache_release(dst_page);
 
                if (ret)
                        break;
 
-               loff += cmp_len;
-               dst_loff += cmp_len;
                len -= cmp_len;
+               i++;
        }
 
        return ret;
@@ -2914,27 +2991,62 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
 {
        int ret;
        u64 len = olen;
+       struct cmp_pages cmp;
+       int same_inode = 0;
+       u64 same_lock_start = 0;
+       u64 same_lock_len = 0;
 
-       /*
-        * btrfs_clone() can't handle extents in the same file
-        * yet. Once that works, we can drop this check and replace it
-        * with a check for the same inode, but overlapping extents.
-        */
        if (src == dst)
-               return -EINVAL;
+               same_inode = 1;
 
        if (len == 0)
                return 0;
 
-       btrfs_double_lock(src, loff, dst, dst_loff, len);
+       if (same_inode) {
+               mutex_lock(&src->i_mutex);
 
-       ret = extent_same_check_offsets(src, loff, &len, olen);
-       if (ret)
-               goto out_unlock;
+               ret = extent_same_check_offsets(src, loff, &len, olen);
+               if (ret)
+                       goto out_unlock;
 
-       ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
-       if (ret)
-               goto out_unlock;
+               /*
+                * Single inode case wants the same checks, except we
+                * don't want our length pushed out past i_size as
+                * comparing that data range makes no sense.
+                *
+                * extent_same_check_offsets() will do this for an
+                * unaligned length at i_size, so catch it here and
+                * reject the request.
+                *
+                * This effectively means we require aligned extents
+                * for the single-inode case, whereas the other cases
+                * allow an unaligned length so long as it ends at
+                * i_size.
+                */
+               if (len != olen) {
+                       ret = -EINVAL;
+                       goto out_unlock;
+               }
+
+               /* Check for overlapping ranges */
+               if (dst_loff + len > loff && dst_loff < loff + len) {
+                       ret = -EINVAL;
+                       goto out_unlock;
+               }
+
+               same_lock_start = min_t(u64, loff, dst_loff);
+               same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
+       } else {
+               btrfs_double_inode_lock(src, dst);
+
+               ret = extent_same_check_offsets(src, loff, &len, olen);
+               if (ret)
+                       goto out_unlock;
+
+               ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
+               if (ret)
+                       goto out_unlock;
+       }
 
        /* don't make the dst file partly checksummed */
        if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
@@ -2943,12 +3055,32 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                goto out_unlock;
        }
 
-       ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
+       ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
+       if (ret)
+               goto out_unlock;
+
+       if (same_inode)
+               lock_extent_range(src, same_lock_start, same_lock_len);
+       else
+               btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
+
+       /* pass original length for comparison so we stay within i_size */
+       ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
        if (ret == 0)
-               ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
+               ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1);
+
+       if (same_inode)
+               unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start,
+                             same_lock_start + same_lock_len - 1);
+       else
+               btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
 
+       btrfs_cmp_data_free(&cmp);
 out_unlock:
-       btrfs_double_unlock(src, loff, dst, dst_loff, len);
+       if (same_inode)
+               mutex_unlock(&src->i_mutex);
+       else
+               btrfs_double_inode_unlock(src, dst);
 
        return ret;
 }
@@ -3100,13 +3232,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
                                     struct inode *inode,
                                     u64 endoff,
                                     const u64 destoff,
-                                    const u64 olen)
+                                    const u64 olen,
+                                    int no_time_update)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
 
        inode_inc_iversion(inode);
-       inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       if (!no_time_update)
+               inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        /*
         * We round up to the block size at eof when determining which
         * extents to clone above, but shouldn't round up the file size.
@@ -3191,13 +3325,13 @@ static void clone_update_extent_map(struct inode *inode,
  * @inode: Inode to clone to
  * @off: Offset within source to start clone from
  * @olen: Original length, passed by user, of range to clone
- * @olen_aligned: Block-aligned value of olen, extent_same uses
- *               identical values here
+ * @olen_aligned: Block-aligned value of olen
  * @destoff: Offset within @inode to start clone
+ * @no_time_update: Whether to update mtime/ctime on the target inode
  */
 static int btrfs_clone(struct inode *src, struct inode *inode,
                       const u64 off, const u64 olen, const u64 olen_aligned,
-                      const u64 destoff)
+                      const u64 destoff, int no_time_update)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_path *path = NULL;
@@ -3521,7 +3655,8 @@ process_slot:
                                              root->sectorsize);
                        ret = clone_finish_inode_update(trans, inode,
                                                        last_dest_end,
-                                                       destoff, olen);
+                                                       destoff, olen,
+                                                       no_time_update);
                        if (ret)
                                goto out;
                        if (new_key.offset + datal >= destoff + len)
@@ -3559,7 +3694,7 @@ process_slot:
                clone_update_extent_map(inode, trans, NULL, last_dest_end,
                                        destoff + len - last_dest_end);
                ret = clone_finish_inode_update(trans, inode, destoff + len,
-                                               destoff, olen);
+                                               destoff, olen, no_time_update);
        }
 
 out:
@@ -3696,7 +3831,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                lock_extent_range(inode, destoff, len);
        }
 
-       ret = btrfs_clone(src, inode, off, olen, len, destoff);
+       ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
 
        if (same_inode) {
                u64 lock_start = min_t(u64, off, destoff);
index 89656d799ff6fa3772b52cb17d535ebdc1cbb910..52170cf1757e3d25192a6cf6211a9d2edaa52751 100644 (file)
@@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
        trace_btrfs_ordered_extent_put(entry->inode, entry);
 
        if (atomic_dec_and_test(&entry->refs)) {
+               ASSERT(list_empty(&entry->log_list));
+               ASSERT(list_empty(&entry->trans_list));
+               ASSERT(list_empty(&entry->root_extent_list));
+               ASSERT(RB_EMPTY_NODE(&entry->rb_node));
                if (entry->inode)
                        btrfs_add_delayed_iput(entry->inode);
                while (!list_empty(&entry->list)) {
@@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
        spin_lock_irq(&tree->lock);
        node = &entry->rb_node;
        rb_erase(node, &tree->tree);
+       RB_CLEAR_NODE(node);
        if (tree->last == node)
                tree->last = NULL;
        set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
index d5f1f033b7a00f3c2e2826e336ae5ca5b3e31d4a..e9ace099162ce14d73d04523962465ee163b30c7 100644 (file)
@@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *qgroup;
        int ret = 0;
+       /* Sometimes we would want to clear the limit on this qgroup.
+        * To meet this requirement, we treat the -1 as a special value
+        * which tell kernel to clear the limit on this qgroup.
+        */
+       const u64 CLEAR_VALUE = -1;
 
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        quota_root = fs_info->quota_root;
@@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
        }
 
        spin_lock(&fs_info->qgroup_lock);
-       if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
-               qgroup->max_rfer = limit->max_rfer;
-       if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
-               qgroup->max_excl = limit->max_excl;
-       if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
-               qgroup->rsv_rfer = limit->rsv_rfer;
-       if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
-               qgroup->rsv_excl = limit->rsv_excl;
+       if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) {
+               if (limit->max_rfer == CLEAR_VALUE) {
+                       qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
+                       limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER;
+                       qgroup->max_rfer = 0;
+               } else {
+                       qgroup->max_rfer = limit->max_rfer;
+               }
+       }
+       if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) {
+               if (limit->max_excl == CLEAR_VALUE) {
+                       qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
+                       limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL;
+                       qgroup->max_excl = 0;
+               } else {
+                       qgroup->max_excl = limit->max_excl;
+               }
+       }
+       if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) {
+               if (limit->rsv_rfer == CLEAR_VALUE) {
+                       qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
+                       limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER;
+                       qgroup->rsv_rfer = 0;
+               } else {
+                       qgroup->rsv_rfer = limit->rsv_rfer;
+               }
+       }
+       if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) {
+               if (limit->rsv_excl == CLEAR_VALUE) {
+                       qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
+                       limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL;
+                       qgroup->rsv_excl = 0;
+               } else {
+                       qgroup->rsv_excl = limit->rsv_excl;
+               }
+       }
        qgroup->lim_flags |= limit->flags;
 
        spin_unlock(&fs_info->qgroup_lock);
index 827951fbf7fcb7dac026e48ded6e29f2f723adef..88cbb5995667951192b94c2627f759a771eda9e3 100644 (file)
@@ -4049,7 +4049,7 @@ restart:
        if (trans && progress && err == -ENOSPC) {
                ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
                                              rc->block_group->flags);
-               if (ret == 0) {
+               if (ret == 1) {
                        err = 0;
                        progress = 0;
                        goto restart;
index 9f2feabe99f211f9c8eb5d489c44c85f54754c27..94db0fa5225a9fd493163fc117cd25d8b01b7343 100644 (file)
@@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
 static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
                                                int is_dev_replace)
 {
-       int ret = 0;
        unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
        int max_active = fs_info->thread_pool_size;
 
@@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
                        fs_info->scrub_workers =
                                btrfs_alloc_workqueue("btrfs-scrub", flags,
                                                      max_active, 4);
-               if (!fs_info->scrub_workers) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (!fs_info->scrub_workers)
+                       goto fail_scrub_workers;
+
                fs_info->scrub_wr_completion_workers =
                        btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
                                              max_active, 2);
-               if (!fs_info->scrub_wr_completion_workers) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (!fs_info->scrub_wr_completion_workers)
+                       goto fail_scrub_wr_completion_workers;
+
                fs_info->scrub_nocow_workers =
                        btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
-               if (!fs_info->scrub_nocow_workers) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (!fs_info->scrub_nocow_workers)
+                       goto fail_scrub_nocow_workers;
                fs_info->scrub_parity_workers =
                        btrfs_alloc_workqueue("btrfs-scrubparity", flags,
                                              max_active, 2);
-               if (!fs_info->scrub_parity_workers) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
+               if (!fs_info->scrub_parity_workers)
+                       goto fail_scrub_parity_workers;
        }
        ++fs_info->scrub_workers_refcnt;
-out:
-       return ret;
+       return 0;
+
+fail_scrub_parity_workers:
+       btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+fail_scrub_nocow_workers:
+       btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
+fail_scrub_wr_completion_workers:
+       btrfs_destroy_workqueue(fs_info->scrub_workers);
+fail_scrub_workers:
+       return -ENOMEM;
 }
 
 static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
index 1ce80c1c4eb6c1a414cfe88009c5c1c0e5c14132..9c45431e69aba3a7a8e2d0bcfb8c3b4a18182df2 100644 (file)
@@ -4117,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
        return 0;
 }
 
+/*
+ * At the moment we always log all xattrs. This is to figure out at log replay
+ * time which xattrs must have their deletion replayed. If a xattr is missing
+ * in the log tree and exists in the fs/subvol tree, we delete it. This is
+ * because if a xattr is deleted, the inode is fsynced and a power failure
+ * happens, causing the log to be replayed the next time the fs is mounted,
+ * we want the xattr to not exist anymore (same behaviour as other filesystems
+ * with a journal, ext3/4, xfs, f2fs, etc).
+ */
+static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct inode *inode,
+                               struct btrfs_path *path,
+                               struct btrfs_path *dst_path)
+{
+       int ret;
+       struct btrfs_key key;
+       const u64 ino = btrfs_ino(inode);
+       int ins_nr = 0;
+       int start_slot = 0;
+
+       key.objectid = ino;
+       key.type = BTRFS_XATTR_ITEM_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               return ret;
+
+       while (true) {
+               int slot = path->slots[0];
+               struct extent_buffer *leaf = path->nodes[0];
+               int nritems = btrfs_header_nritems(leaf);
+
+               if (slot >= nritems) {
+                       if (ins_nr > 0) {
+                               u64 last_extent = 0;
+
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, start_slot,
+                                                ins_nr, 1, 0);
+                               /* can't be 1, extent items aren't processed */
+                               ASSERT(ret <= 0);
+                               if (ret < 0)
+                                       return ret;
+                               ins_nr = 0;
+                       }
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ret;
+                       else if (ret > 0)
+                               break;
+                       continue;
+               }
+
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+               if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
+                       break;
+
+               if (ins_nr == 0)
+                       start_slot = slot;
+               ins_nr++;
+               path->slots[0]++;
+               cond_resched();
+       }
+       if (ins_nr > 0) {
+               u64 last_extent = 0;
+
+               ret = copy_items(trans, inode, dst_path, path,
+                                &last_extent, start_slot,
+                                ins_nr, 1, 0);
+               /* can't be 1, extent items aren't processed */
+               ASSERT(ret <= 0);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return 0;
+}
+
+/*
+ * If the no holes feature is enabled we need to make sure any hole between the
+ * last extent and the i_size of our inode is explicitly marked in the log. This
+ * is to make sure that doing something like:
+ *
+ *      1) create file with 128Kb of data
+ *      2) truncate file to 64Kb
+ *      3) truncate file to 256Kb
+ *      4) fsync file
+ *      5) <crash/power failure>
+ *      6) mount fs and trigger log replay
+ *
+ * Will give us a file with a size of 256Kb, the first 64Kb of data match what
+ * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
+ * file correspond to a hole. The presence of explicit holes in a log tree is
+ * what guarantees that log replay will remove/adjust file extent items in the
+ * fs/subvol tree.
+ *
+ * Here we do not need to care about holes between extents, that is already done
+ * by copy_items(). We also only need to do this in the full sync path, where we
+ * lookup for extents from the fs/subvol tree only. In the fast path case, we
+ * lookup the list of modified extent maps and if any represents a hole, we
+ * insert a corresponding extent representing a hole in the log tree.
+ */
+static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct inode *inode,
+                                  struct btrfs_path *path)
+{
+       int ret;
+       struct btrfs_key key;
+       u64 hole_start;
+       u64 hole_size;
+       struct extent_buffer *leaf;
+       struct btrfs_root *log = root->log_root;
+       const u64 ino = btrfs_ino(inode);
+       const u64 i_size = i_size_read(inode);
+
+       if (!btrfs_fs_incompat(root->fs_info, NO_HOLES))
+               return 0;
+
+       key.objectid = ino;
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       ASSERT(ret != 0);
+       if (ret < 0)
+               return ret;
+
+       ASSERT(path->slots[0] > 0);
+       path->slots[0]--;
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+       if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
+               /* inode does not have any extents */
+               hole_start = 0;
+               hole_size = i_size;
+       } else {
+               struct btrfs_file_extent_item *extent;
+               u64 len;
+
+               /*
+                * If there's an extent beyond i_size, an explicit hole was
+                * already inserted by copy_items().
+                */
+               if (key.offset >= i_size)
+                       return 0;
+
+               extent = btrfs_item_ptr(leaf, path->slots[0],
+                                       struct btrfs_file_extent_item);
+
+               if (btrfs_file_extent_type(leaf, extent) ==
+                   BTRFS_FILE_EXTENT_INLINE) {
+                       len = btrfs_file_extent_inline_len(leaf,
+                                                          path->slots[0],
+                                                          extent);
+                       ASSERT(len == i_size);
+                       return 0;
+               }
+
+               len = btrfs_file_extent_num_bytes(leaf, extent);
+               /* Last extent goes beyond i_size, no need to log a hole. */
+               if (key.offset + len > i_size)
+                       return 0;
+               hole_start = key.offset + len;
+               hole_size = i_size - hole_start;
+       }
+       btrfs_release_path(path);
+
+       /* Last extent ends at i_size. */
+       if (hole_size == 0)
+               return 0;
+
+       hole_size = ALIGN(hole_size, root->sectorsize);
+       ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
+                                      hole_size, 0, hole_size, 0, 0, 0);
+       return ret;
+}
+
 /* log a single inode in the tree log.
  * At least one parent directory for this inode must exist in the tree
  * or be logged already.
@@ -4155,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        u64 ino = btrfs_ino(inode);
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        u64 logged_isize = 0;
+       bool need_log_inode_item = true;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -4263,11 +4445,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                } else {
                        if (inode_only == LOG_INODE_ALL)
                                fast_search = true;
-                       ret = log_inode_item(trans, log, dst_path, inode);
-                       if (ret) {
-                               err = ret;
-                               goto out_unlock;
-                       }
                        goto log_extents;
                }
 
@@ -4290,6 +4467,28 @@ again:
                if (min_key.type > max_key.type)
                        break;
 
+               if (min_key.type == BTRFS_INODE_ITEM_KEY)
+                       need_log_inode_item = false;
+
+               /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
+               if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
+                       if (ins_nr == 0)
+                               goto next_slot;
+                       ret = copy_items(trans, inode, dst_path, path,
+                                        &last_extent, ins_start_slot,
+                                        ins_nr, inode_only, logged_isize);
+                       if (ret < 0) {
+                               err = ret;
+                               goto out_unlock;
+                       }
+                       ins_nr = 0;
+                       if (ret) {
+                               btrfs_release_path(path);
+                               continue;
+                       }
+                       goto next_slot;
+               }
+
                src = path->nodes[0];
                if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
                        ins_nr++;
@@ -4357,9 +4556,26 @@ next_slot:
                ins_nr = 0;
        }
 
+       btrfs_release_path(path);
+       btrfs_release_path(dst_path);
+       err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
+       if (err)
+               goto out_unlock;
+       if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
+               btrfs_release_path(path);
+               btrfs_release_path(dst_path);
+               err = btrfs_log_trailing_hole(trans, root, inode, path);
+               if (err)
+                       goto out_unlock;
+       }
 log_extents:
        btrfs_release_path(path);
        btrfs_release_path(dst_path);
+       if (need_log_inode_item) {
+               err = log_inode_item(trans, log, dst_path, inode);
+               if (err)
+                       goto out_unlock;
+       }
        if (fast_search) {
                /*
                 * Some ordered extents started by fsync might have completed
index 4b438b4c8c9195b3aaf11519339719efbef7b21b..fbe7c104531c9e5eaf8b347f46d0b8a783f88322 100644 (file)
@@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
        root = root->fs_info->chunk_root;
        extent_root = root->fs_info->extent_root;
 
+       /*
+        * Prevent races with automatic removal of unused block groups.
+        * After we relocate and before we remove the chunk with offset
+        * chunk_offset, automatic removal of the block group can kick in,
+        * resulting in a failure when calling btrfs_remove_chunk() below.
+        *
+        * Make sure to acquire this mutex before doing a tree search (dev
+        * or chunk trees) to find chunks. Otherwise the cleaner kthread might
+        * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
+        * we release the path used to search the chunk/dev tree and before
+        * the current task acquires this mutex and calls us.
+        */
+       ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex));
+
        ret = btrfs_can_relocate(extent_root, chunk_offset);
        if (ret)
                return -ENOSPC;
@@ -2814,13 +2828,18 @@ again:
        key.type = BTRFS_CHUNK_ITEM_KEY;
 
        while (1) {
+               mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        goto error;
+               }
                BUG_ON(ret == 0); /* Corruption */
 
                ret = btrfs_previous_item(chunk_root, path, key.objectid,
                                          key.type);
+               if (ret)
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret < 0)
                        goto error;
                if (ret > 0)
@@ -2843,6 +2862,7 @@ again:
                        else
                                BUG_ON(ret);
                }
+               mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
 
                if (found_key.offset == 0)
                        break;
@@ -3299,9 +3319,12 @@ again:
                        goto error;
                }
 
+               mutex_lock(&fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        goto error;
+               }
 
                /*
                 * this shouldn't happen, it means the last relocate
@@ -3313,6 +3336,7 @@ again:
                ret = btrfs_previous_item(chunk_root, path, 0,
                                          BTRFS_CHUNK_ITEM_KEY);
                if (ret) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        ret = 0;
                        break;
                }
@@ -3321,8 +3345,10 @@ again:
                slot = path->slots[0];
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
-               if (found_key.objectid != key.objectid)
+               if (found_key.objectid != key.objectid) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        break;
+               }
 
                chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 
@@ -3335,10 +3361,13 @@ again:
                ret = should_balance_chunk(chunk_root, leaf, chunk,
                                           found_key.offset);
                btrfs_release_path(path);
-               if (!ret)
+               if (!ret) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        goto loop;
+               }
 
                if (counting) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                        spin_lock(&fs_info->balance_lock);
                        bctl->stat.expected++;
                        spin_unlock(&fs_info->balance_lock);
@@ -3348,6 +3377,7 @@ again:
                ret = btrfs_relocate_chunk(chunk_root,
                                           found_key.objectid,
                                           found_key.offset);
+               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                if (ret && ret != -ENOSPC)
                        goto error;
                if (ret == -ENOSPC) {
@@ -4087,11 +4117,16 @@ again:
        key.type = BTRFS_DEV_EXTENT_KEY;
 
        do {
+               mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-               if (ret < 0)
+               if (ret < 0) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        goto done;
+               }
 
                ret = btrfs_previous_item(root, path, 0, key.type);
+               if (ret)
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret < 0)
                        goto done;
                if (ret) {
@@ -4105,6 +4140,7 @@ again:
                btrfs_item_key_to_cpu(l, &key, path->slots[0]);
 
                if (key.objectid != device->devid) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        btrfs_release_path(path);
                        break;
                }
@@ -4113,6 +4149,7 @@ again:
                length = btrfs_dev_extent_length(l, dev_extent);
 
                if (key.offset + length <= new_size) {
+                       mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                        btrfs_release_path(path);
                        break;
                }
@@ -4122,6 +4159,7 @@ again:
                btrfs_release_path(path);
 
                ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
+               mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
                if (ret && ret != -ENOSPC)
                        goto done;
                if (ret == -ENOSPC)
@@ -5715,7 +5753,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e
 static void btrfs_end_bio(struct bio *bio, int err)
 {
        struct btrfs_bio *bbio = bio->bi_private;
-       struct btrfs_device *dev = bbio->stripes[0].dev;
        int is_orig_bio = 0;
 
        if (err) {
@@ -5723,6 +5760,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
                if (err == -EIO || err == -EREMOTEIO) {
                        unsigned int stripe_index =
                                btrfs_io_bio(bio)->stripe_index;
+                       struct btrfs_device *dev;
 
                        BUG_ON(stripe_index >= bbio->num_stripes);
                        dev = bbio->stripes[stripe_index].dev;