Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[linux-drm-fsl-dcu.git] / fs / btrfs / extent_io.c
index cdee391fc7bfd57c596204a8474142e7afe60335..e7e7afb4a87268211e8b0ef881a6eeac0068eefd 100644 (file)
 
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
+static struct bio_set *btrfs_bioset;
 
+#ifdef CONFIG_BTRFS_DEBUG
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
 
-#define LEAK_DEBUG 0
-#if LEAK_DEBUG
 static DEFINE_SPINLOCK(leak_lock);
+
+static inline
+void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&leak_lock, flags);
+       list_add(new, head);
+       spin_unlock_irqrestore(&leak_lock, flags);
+}
+
+static inline
+void btrfs_leak_debug_del(struct list_head *entry)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&leak_lock, flags);
+       list_del(entry);
+       spin_unlock_irqrestore(&leak_lock, flags);
+}
+
+static inline
+void btrfs_leak_debug_check(void)
+{
+       struct extent_state *state;
+       struct extent_buffer *eb;
+
+       while (!list_empty(&states)) {
+               state = list_entry(states.next, struct extent_state, leak_list);
+               printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+                      "state %lu in tree %p refs %d\n",
+                      (unsigned long long)state->start,
+                      (unsigned long long)state->end,
+                      state->state, state->tree, atomic_read(&state->refs));
+               list_del(&state->leak_list);
+               kmem_cache_free(extent_state_cache, state);
+       }
+
+       while (!list_empty(&buffers)) {
+               eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+               printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+                      "refs %d\n", (unsigned long long)eb->start,
+                      eb->len, atomic_read(&eb->refs));
+               list_del(&eb->leak_list);
+               kmem_cache_free(extent_buffer_cache, eb);
+       }
+}
+#else
+#define btrfs_leak_debug_add(new, head)        do {} while (0)
+#define btrfs_leak_debug_del(entry)    do {} while (0)
+#define btrfs_leak_debug_check()       do {} while (0)
 #endif
 
 #define BUFFER_LRU_MAX 64
@@ -75,38 +126,26 @@ int __init extent_io_init(void)
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_buffer_cache)
                goto free_state_cache;
+
+       btrfs_bioset = bioset_create(BIO_POOL_SIZE,
+                                    offsetof(struct btrfs_io_bio, bio));
+       if (!btrfs_bioset)
+               goto free_buffer_cache;
        return 0;
 
+free_buffer_cache:
+       kmem_cache_destroy(extent_buffer_cache);
+       extent_buffer_cache = NULL;
+
 free_state_cache:
        kmem_cache_destroy(extent_state_cache);
+       extent_state_cache = NULL;
        return -ENOMEM;
 }
 
 void extent_io_exit(void)
 {
-       struct extent_state *state;
-       struct extent_buffer *eb;
-
-       while (!list_empty(&states)) {
-               state = list_entry(states.next, struct extent_state, leak_list);
-               printk(KERN_ERR "btrfs state leak: start %llu end %llu "
-                      "state %lu in tree %p refs %d\n",
-                      (unsigned long long)state->start,
-                      (unsigned long long)state->end,
-                      state->state, state->tree, atomic_read(&state->refs));
-               list_del(&state->leak_list);
-               kmem_cache_free(extent_state_cache, state);
-
-       }
-
-       while (!list_empty(&buffers)) {
-               eb = list_entry(buffers.next, struct extent_buffer, leak_list);
-               printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
-                      "refs %d\n", (unsigned long long)eb->start,
-                      eb->len, atomic_read(&eb->refs));
-               list_del(&eb->leak_list);
-               kmem_cache_free(extent_buffer_cache, eb);
-       }
+       btrfs_leak_debug_check();
 
        /*
         * Make sure all delayed rcu free are flushed before we
@@ -117,6 +156,8 @@ void extent_io_exit(void)
                kmem_cache_destroy(extent_state_cache);
        if (extent_buffer_cache)
                kmem_cache_destroy(extent_buffer_cache);
+       if (btrfs_bioset)
+               bioset_free(btrfs_bioset);
 }
 
 void extent_io_tree_init(struct extent_io_tree *tree,
@@ -134,9 +175,6 @@ void extent_io_tree_init(struct extent_io_tree *tree,
 static struct extent_state *alloc_extent_state(gfp_t mask)
 {
        struct extent_state *state;
-#if LEAK_DEBUG
-       unsigned long flags;
-#endif
 
        state = kmem_cache_alloc(extent_state_cache, mask);
        if (!state)
@@ -144,11 +182,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
        state->state = 0;
        state->private = 0;
        state->tree = NULL;
-#if LEAK_DEBUG
-       spin_lock_irqsave(&leak_lock, flags);
-       list_add(&state->leak_list, &states);
-       spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+       btrfs_leak_debug_add(&state->leak_list, &states);
        atomic_set(&state->refs, 1);
        init_waitqueue_head(&state->wq);
        trace_alloc_extent_state(state, mask, _RET_IP_);
@@ -160,15 +194,8 @@ void free_extent_state(struct extent_state *state)
        if (!state)
                return;
        if (atomic_dec_and_test(&state->refs)) {
-#if LEAK_DEBUG
-               unsigned long flags;
-#endif
                WARN_ON(state->tree);
-#if LEAK_DEBUG
-               spin_lock_irqsave(&leak_lock, flags);
-               list_del(&state->leak_list);
-               spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+               btrfs_leak_debug_del(&state->leak_list);
                trace_free_extent_state(state, _RET_IP_);
                kmem_cache_free(extent_state_cache, state);
        }
@@ -308,21 +335,21 @@ static void merge_state(struct extent_io_tree *tree,
 }
 
 static void set_state_cb(struct extent_io_tree *tree,
-                        struct extent_state *state, int *bits)
+                        struct extent_state *state, unsigned long *bits)
 {
        if (tree->ops && tree->ops->set_bit_hook)
                tree->ops->set_bit_hook(tree->mapping->host, state, bits);
 }
 
 static void clear_state_cb(struct extent_io_tree *tree,
-                          struct extent_state *state, int *bits)
+                          struct extent_state *state, unsigned long *bits)
 {
        if (tree->ops && tree->ops->clear_bit_hook)
                tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
 }
 
 static void set_state_bits(struct extent_io_tree *tree,
-                          struct extent_state *state, int *bits);
+                          struct extent_state *state, unsigned long *bits);
 
 /*
  * insert an extent_state struct into the tree.  'bits' are set on the
@@ -336,7 +363,7 @@ static void set_state_bits(struct extent_io_tree *tree,
  */
 static int insert_state(struct extent_io_tree *tree,
                        struct extent_state *state, u64 start, u64 end,
-                       int *bits)
+                       unsigned long *bits)
 {
        struct rb_node *node;
 
@@ -424,10 +451,10 @@ static struct extent_state *next_state(struct extent_state *state)
  */
 static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
                                            struct extent_state *state,
-                                           int *bits, int wake)
+                                           unsigned long *bits, int wake)
 {
        struct extent_state *next;
-       int bits_to_clear = *bits & ~EXTENT_CTLBITS;
+       unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
 
        if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
                u64 range = state->end - state->start + 1;
@@ -463,7 +490,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
        return prealloc;
 }
 
-void extent_io_tree_panic(struct extent_io_tree *tree, int err)
+static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
 {
        btrfs_panic(tree_fs_info(tree), err, "Locking error: "
                    "Extent tree was modified by another "
@@ -483,7 +510,7 @@ void extent_io_tree_panic(struct extent_io_tree *tree, int err)
  * This takes the tree lock, and returns 0 on success and < 0 on error.
  */
 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                    int bits, int wake, int delete,
+                    unsigned long bits, int wake, int delete,
                     struct extent_state **cached_state,
                     gfp_t mask)
 {
@@ -644,7 +671,8 @@ static void wait_on_state(struct extent_io_tree *tree,
  * The range [start, end] is inclusive.
  * The tree lock is taken by this function
  */
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                           unsigned long bits)
 {
        struct extent_state *state;
        struct rb_node *node;
@@ -685,9 +713,9 @@ out:
 
 static void set_state_bits(struct extent_io_tree *tree,
                           struct extent_state *state,
-                          int *bits)
+                          unsigned long *bits)
 {
-       int bits_to_set = *bits & ~EXTENT_CTLBITS;
+       unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
 
        set_state_cb(tree, state, bits);
        if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@@ -730,8 +758,9 @@ static void uncache_state(struct extent_state **cached_ptr)
 
 static int __must_check
 __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                int bits, int exclusive_bits, u64 *failed_start,
-                struct extent_state **cached_state, gfp_t mask)
+                unsigned long bits, unsigned long exclusive_bits,
+                u64 *failed_start, struct extent_state **cached_state,
+                gfp_t mask)
 {
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
@@ -923,9 +952,9 @@ search_again:
        goto again;
 }
 
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
-                  u64 *failed_start, struct extent_state **cached_state,
-                  gfp_t mask)
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+                  unsigned long bits, u64 * failed_start,
+                  struct extent_state **cached_state, gfp_t mask)
 {
        return __set_extent_bit(tree, start, end, bits, 0, failed_start,
                                cached_state, mask);
@@ -950,7 +979,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
  * boundary bits like LOCK.
  */
 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                      int bits, int clear_bits,
+                      unsigned long bits, unsigned long clear_bits,
                       struct extent_state **cached_state, gfp_t mask)
 {
        struct extent_state *state;
@@ -1143,14 +1172,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
 }
 
 int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                   int bits, gfp_t mask)
+                   unsigned long bits, gfp_t mask)
 {
        return set_extent_bit(tree, start, end, bits, NULL,
                              NULL, mask);
 }
 
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                     int bits, gfp_t mask)
+                     unsigned long bits, gfp_t mask)
 {
        return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
 }
@@ -1189,7 +1218,7 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
                        struct extent_state **cached_state, gfp_t mask)
 {
-       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
+       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
                              cached_state, mask);
 }
 
@@ -1205,7 +1234,7 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
  * us if waiting is desired.
  */
 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                    int bits, struct extent_state **cached_state)
+                    unsigned long bits, struct extent_state **cached_state)
 {
        int err;
        u64 failed_start;
@@ -1313,8 +1342,9 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
  * return it.  tree->lock must be held.  NULL will returned if
  * nothing was found after 'start'
  */
-struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
-                                                u64 start, int bits)
+static struct extent_state *
+find_first_extent_bit_state(struct extent_io_tree *tree,
+                           u64 start, unsigned long bits)
 {
        struct rb_node *node;
        struct extent_state *state;
@@ -1348,7 +1378,7 @@ out:
  * If nothing was found, 1 is returned. If found something, return 0.
  */
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-                         u64 *start_ret, u64 *end_ret, int bits,
+                         u64 *start_ret, u64 *end_ret, unsigned long bits,
                          struct extent_state **cached_state)
 {
        struct extent_state *state;
@@ -1638,7 +1668,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
        unsigned long nr_pages = end_index - index + 1;
        int i;
-       int clear_bits = 0;
+       unsigned long clear_bits = 0;
 
        if (op & EXTENT_CLEAR_UNLOCK)
                clear_bits |= EXTENT_LOCKED;
@@ -1777,6 +1807,64 @@ out:
        return ret;
 }
 
+void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
+                           int count)
+{
+       struct rb_node *node;
+       struct extent_state *state;
+
+       spin_lock(&tree->lock);
+       /*
+        * this search will find all the extents that end after
+        * our range starts.
+        */
+       node = tree_search(tree, start);
+       BUG_ON(!node);
+
+       state = rb_entry(node, struct extent_state, rb_node);
+       BUG_ON(state->start != start);
+
+       while (count) {
+               state->private = *csums++;
+               count--;
+               state = next_state(state);
+       }
+       spin_unlock(&tree->lock);
+}
+
+static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
+{
+       struct bio_vec *bvec = bio->bi_io_vec + bio_index;
+
+       return page_offset(bvec->bv_page) + bvec->bv_offset;
+}
+
+void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
+                       u32 csums[], int count)
+{
+       struct rb_node *node;
+       struct extent_state *state = NULL;
+       u64 start;
+
+       spin_lock(&tree->lock);
+       do {
+               start = __btrfs_get_bio_offset(bio, bio_index);
+               if (state == NULL || state->start != start) {
+                       node = tree_search(tree, start);
+                       BUG_ON(!node);
+
+                       state = rb_entry(node, struct extent_state, rb_node);
+                       BUG_ON(state->start != start);
+               }
+               state->private = *csums++;
+               count--;
+               bio_index++;
+
+               state = next_state(state);
+       } while (count);
+       spin_unlock(&tree->lock);
+}
+
 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
 {
        struct rb_node *node;
@@ -1811,7 +1899,7 @@ out:
  * range is found set.
  */
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                  int bits, int filled, struct extent_state *cached)
+                  unsigned long bits, int filled, struct extent_state *cached)
 {
        struct extent_state *state = NULL;
        struct rb_node *node;
@@ -1872,28 +1960,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
                SetPageUptodate(page);
 }
 
-/*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static void check_page_locked(struct extent_io_tree *tree, struct page *page)
-{
-       u64 start = page_offset(page);
-       u64 end = start + PAGE_CACHE_SIZE - 1;
-       if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
-               unlock_page(page);
-}
-
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static void check_page_writeback(struct extent_io_tree *tree,
-                                struct page *page)
-{
-       end_page_writeback(page);
-}
-
 /*
  * When IO fails, either with EIO or csum verification fails, we
  * try other mirrors that might have a good copy of the data.  This
@@ -1971,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
                return 0;
 
-       bio = bio_alloc(GFP_NOFS, 1);
+       bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio)
                return -EIO;
        bio->bi_private = &compl;
@@ -2261,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
                return -EIO;
        }
 
-       bio = bio_alloc(GFP_NOFS, 1);
+       bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
        if (!bio) {
                free_io_failure(inode, failrec, 0);
                return -EIO;
@@ -2323,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
        struct extent_io_tree *tree;
        u64 start;
        u64 end;
-       int whole_page;
 
        do {
                struct page *page = bvec->bv_page;
                tree = &BTRFS_I(page->mapping->host)->io_tree;
 
-               start = page_offset(page) + bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
+               /* We always issue full-page reads, but if some block
+                * in a page fails to read, blk_update_request() will
+                * advance bv_offset and adjust bv_len to compensate.
+                * Print a warning for nonzero offsets, and an error
+                * if they don't add up to a full page.  */
+               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+                       printk("%s page write in btrfs with offset %u and length %u\n",
+                              bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+                              ? KERN_ERR "partial" : KERN_INFO "incomplete",
+                              bvec->bv_offset, bvec->bv_len);
 
-               if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
-                       whole_page = 1;
-               else
-                       whole_page = 0;
+               start = page_offset(page);
+               end = start + bvec->bv_offset + bvec->bv_len - 1;
 
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
@@ -2343,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
                if (end_extent_writepage(page, err, start, end))
                        continue;
 
-               if (whole_page)
-                       end_page_writeback(page);
-               else
-                       check_page_writeback(tree, page);
+               end_page_writeback(page);
        } while (bvec >= bio->bi_io_vec);
 
        bio_put(bio);
@@ -2371,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        struct extent_io_tree *tree;
        u64 start;
        u64 end;
-       int whole_page;
        int mirror;
        int ret;
 
@@ -2382,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                struct page *page = bvec->bv_page;
                struct extent_state *cached = NULL;
                struct extent_state *state;
+               struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 
                pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
-                        "mirror=%ld\n", (u64)bio->bi_sector, err,
-                        (long int)bio->bi_bdev);
+                        "mirror=%lu\n", (u64)bio->bi_sector, err,
+                        io_bio->mirror_num);
                tree = &BTRFS_I(page->mapping->host)->io_tree;
 
-               start = page_offset(page) + bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
+               /* We always issue full-page reads, but if some block
+                * in a page fails to read, blk_update_request() will
+                * advance bv_offset and adjust bv_len to compensate.
+                * Print a warning for nonzero offsets, and an error
+                * if they don't add up to a full page.  */
+               if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+                       printk("%s page read in btrfs with offset %u and length %u\n",
+                              bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+                              ? KERN_ERR "partial" : KERN_INFO "incomplete",
+                              bvec->bv_offset, bvec->bv_len);
 
-               if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
-                       whole_page = 1;
-               else
-                       whole_page = 0;
+               start = page_offset(page);
+               end = start + bvec->bv_offset + bvec->bv_len - 1;
 
                if (++bvec <= bvec_end)
                        prefetchw(&bvec->bv_page->flags);
@@ -2410,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                }
                spin_unlock(&tree->lock);
 
-               mirror = (int)(unsigned long)bio->bi_bdev;
+               mirror = io_bio->mirror_num;
                if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                        ret = tree->ops->readpage_end_io_hook(page, start, end,
                                                              state, mirror);
@@ -2453,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                }
                unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
 
-               if (whole_page) {
-                       if (uptodate) {
-                               SetPageUptodate(page);
-                       } else {
-                               ClearPageUptodate(page);
-                               SetPageError(page);
-                       }
-                       unlock_page(page);
+               if (uptodate) {
+                       SetPageUptodate(page);
                } else {
-                       if (uptodate) {
-                               check_page_uptodate(tree, page);
-                       } else {
-                               ClearPageUptodate(page);
-                               SetPageError(page);
-                       }
-                       check_page_locked(tree, page);
+                       ClearPageUptodate(page);
+                       SetPageError(page);
                }
+               unlock_page(page);
        } while (bvec <= bvec_end);
 
        bio_put(bio);
 }
 
+/*
+ * this allocates from the btrfs_bioset.  We're returning a bio right now
+ * but you can call btrfs_io_bio for the appropriate container_of magic
+ */
 struct bio *
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags)
 {
        struct bio *bio;
 
-       bio = bio_alloc(gfp_flags, nr_vecs);
+       bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
 
        if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-               while (!bio && (nr_vecs /= 2))
-                       bio = bio_alloc(gfp_flags, nr_vecs);
+               while (!bio && (nr_vecs /= 2)) {
+                       bio = bio_alloc_bioset(gfp_flags,
+                                              nr_vecs, btrfs_bioset);
+               }
        }
 
        if (bio) {
@@ -2496,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
        return bio;
 }
 
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+       return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
+}
+
+
+/* this also allocates from the btrfs_bioset */
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+       return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+}
+
+
 static int __must_check submit_one_bio(int rw, struct bio *bio,
                                       int mirror_num, unsigned long bio_flags)
 {
@@ -2560,8 +2643,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
                if (old_compressed)
                        contig = bio->bi_sector == sector;
                else
-                       contig = bio->bi_sector + (bio->bi_size >> 9) ==
-                               sector;
+                       contig = bio_end_sector(bio) == sector;
 
                if (prev_bio_flags != bio_flags || !contig ||
                    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
@@ -2596,7 +2678,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
        return ret;
 }
 
-void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
+static void attach_extent_buffer_page(struct extent_buffer *eb,
+                                     struct page *page)
 {
        if (!PagePrivate(page)) {
                SetPagePrivate(page);
@@ -2626,7 +2709,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                                   struct page *page,
                                   get_extent_t *get_extent,
                                   struct bio **bio, int mirror_num,
-                                  unsigned long *bio_flags)
+                                  unsigned long *bio_flags, int rw)
 {
        struct inode *inode = page->mapping->host;
        u64 start = page_offset(page);
@@ -2772,7 +2855,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                }
 
                pnr -= page->index;
-               ret = submit_extent_page(READ, tree, page,
+               ret = submit_extent_page(rw, tree, page,
                                         sector, disk_io_size, pg_offset,
                                         bdev, bio, pnr,
                                         end_bio_extent_readpage, mirror_num,
@@ -2805,7 +2888,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        int ret;
 
        ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
-                                     &bio_flags);
+                                     &bio_flags, READ);
        if (bio)
                ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
        return ret;
@@ -3104,7 +3187,7 @@ static int eb_wait(void *word)
        return 0;
 }
 
-static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
+void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
 {
        wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
                    TASK_UNINTERRUPTIBLE);
@@ -3229,7 +3312,7 @@ static int write_one_eb(struct extent_buffer *eb,
        u64 offset = eb->start;
        unsigned long i, num_pages;
        unsigned long bio_flags = 0;
-       int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
+       int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
        int ret = 0;
 
        clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
@@ -3666,14 +3749,14 @@ int extent_readpages(struct extent_io_tree *tree,
                        continue;
                for (i = 0; i < nr; i++) {
                        __extent_read_full_page(tree, pagepool[i], get_extent,
-                                       &bio, 0, &bio_flags);
+                                       &bio, 0, &bio_flags, READ);
                        page_cache_release(pagepool[i]);
                }
                nr = 0;
        }
        for (i = 0; i < nr; i++) {
                __extent_read_full_page(tree, pagepool[i], get_extent,
-                                       &bio, 0, &bio_flags);
+                                       &bio, 0, &bio_flags, READ);
                page_cache_release(pagepool[i]);
        }
 
@@ -3714,9 +3797,9 @@ int extent_invalidatepage(struct extent_io_tree *tree,
  * are locked or under IO and drops the related state bits if it is safe
  * to drop the page.
  */
-int try_release_extent_state(struct extent_map_tree *map,
-                            struct extent_io_tree *tree, struct page *page,
-                            gfp_t mask)
+static int try_release_extent_state(struct extent_map_tree *map,
+                                   struct extent_io_tree *tree,
+                                   struct page *page, gfp_t mask)
 {
        u64 start = page_offset(page);
        u64 end = start + PAGE_CACHE_SIZE - 1;
@@ -3913,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                last_for_get_extent = isize;
        }
 
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
                         &cached_state);
 
        em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4000,19 +4083,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 out_free:
        free_extent_map(em);
 out:
-       unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
                             &cached_state, GFP_NOFS);
        return ret;
 }
 
 static void __free_extent_buffer(struct extent_buffer *eb)
 {
-#if LEAK_DEBUG
-       unsigned long flags;
-       spin_lock_irqsave(&leak_lock, flags);
-       list_del(&eb->leak_list);
-       spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+       btrfs_leak_debug_del(&eb->leak_list);
        kmem_cache_free(extent_buffer_cache, eb);
 }
 
@@ -4022,9 +4100,6 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
                                                   gfp_t mask)
 {
        struct extent_buffer *eb = NULL;
-#if LEAK_DEBUG
-       unsigned long flags;
-#endif
 
        eb = kmem_cache_zalloc(extent_buffer_cache, mask);
        if (eb == NULL)
@@ -4044,11 +4119,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
        init_waitqueue_head(&eb->write_lock_wq);
        init_waitqueue_head(&eb->read_lock_wq);
 
-#if LEAK_DEBUG
-       spin_lock_irqsave(&leak_lock, flags);
-       list_add(&eb->leak_list, &buffers);
-       spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+       btrfs_leak_debug_add(&eb->leak_list, &buffers);
+
        spin_lock_init(&eb->refs_lock);
        atomic_set(&eb->refs, 1);
        atomic_set(&eb->io_pages, 0);
@@ -4386,7 +4458,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
 }
 
 /* Expects to have eb->eb_lock already held */
-static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
+static int release_extent_buffer(struct extent_buffer *eb)
 {
        WARN_ON(atomic_read(&eb->refs) == 0);
        if (atomic_dec_and_test(&eb->refs)) {
@@ -4444,7 +4516,7 @@ void free_extent_buffer(struct extent_buffer *eb)
         * I know this is terrible, but it's temporary until we stop tracking
         * the uptodate bits and such for the extent buffers.
         */
-       release_extent_buffer(eb, GFP_ATOMIC);
+       release_extent_buffer(eb);
 }
 
 void free_extent_buffer_stale(struct extent_buffer *eb)
@@ -4458,7 +4530,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
        if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
            test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
                atomic_dec(&eb->refs);
-       release_extent_buffer(eb, GFP_NOFS);
+       release_extent_buffer(eb);
 }
 
 void clear_extent_buffer_dirty(struct extent_buffer *eb)
@@ -4510,17 +4582,6 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
        return was_dirty;
 }
 
-static int range_straddles_pages(u64 start, u64 len)
-{
-       if (len < PAGE_CACHE_SIZE)
-               return 1;
-       if (start & (PAGE_CACHE_SIZE - 1))
-               return 1;
-       if ((start + len) & (PAGE_CACHE_SIZE - 1))
-               return 1;
-       return 0;
-}
-
 int clear_extent_buffer_uptodate(struct extent_buffer *eb)
 {
        unsigned long i;
@@ -4552,37 +4613,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb)
        return 0;
 }
 
-int extent_range_uptodate(struct extent_io_tree *tree,
-                         u64 start, u64 end)
-{
-       struct page *page;
-       int ret;
-       int pg_uptodate = 1;
-       int uptodate;
-       unsigned long index;
-
-       if (range_straddles_pages(start, end - start + 1)) {
-               ret = test_range_bit(tree, start, end,
-                                    EXTENT_UPTODATE, 1, NULL);
-               if (ret)
-                       return 1;
-       }
-       while (start <= end) {
-               index = start >> PAGE_CACHE_SHIFT;
-               page = find_get_page(tree->mapping, index);
-               if (!page)
-                       return 1;
-               uptodate = PageUptodate(page);
-               page_cache_release(page);
-               if (!uptodate) {
-                       pg_uptodate = 0;
-                       break;
-               }
-               start += PAGE_CACHE_SIZE;
-       }
-       return pg_uptodate;
-}
-
 int extent_buffer_uptodate(struct extent_buffer *eb)
 {
        return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -4645,7 +4675,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
                        ClearPageError(page);
                        err = __extent_read_full_page(tree, page,
                                                      get_extent, &bio,
-                                                     mirror_num, &bio_flags);
+                                                     mirror_num, &bio_flags,
+                                                     READ | REQ_META);
                        if (err)
                                ret = err;
                } else {
@@ -4654,7 +4685,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        }
 
        if (bio) {
-               err = submit_one_bio(READ, bio, mirror_num, bio_flags);
+               err = submit_one_bio(READ | REQ_META, bio, mirror_num,
+                                    bio_flags);
                if (err)
                        return err;
        }
@@ -5018,7 +5050,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
        }
 }
 
-int try_release_extent_buffer(struct page *page, gfp_t mask)
+int try_release_extent_buffer(struct page *page)
 {
        struct extent_buffer *eb;
 
@@ -5048,9 +5080,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
        }
        spin_unlock(&page->mapping->private_lock);
 
-       if ((mask & GFP_NOFS) == GFP_NOFS)
-               mask = GFP_NOFS;
-
        /*
         * If tree ref isn't set then we know the ref on this eb is a real ref,
         * so just return, this page will likely be freed soon anyway.
@@ -5060,5 +5089,5 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
                return 0;
        }
 
-       return release_extent_buffer(eb, mask);
+       return release_extent_buffer(eb);
 }