Btrfs: more efficient btrfs_drop_extent_cache
authorFilipe Manana <fdmanana@gmail.com>
Tue, 25 Feb 2014 14:15:13 +0000 (14:15 +0000)
committerJosef Bacik <jbacik@fb.com>
Mon, 10 Mar 2014 19:16:57 +0000 (15:16 -0400)
While droping extent map structures from the extent cache that cover our
target range, we would remove each extent map structure from the red black
tree and then add either 1 or 2 new extent map structures if the former
extent map covered sections outside our target range.

This change simply attempts to replace the existing extent map structure
with a new one that covers the subsection we're not interested in, instead
of doing a red black remove operation followed by an insertion operation.

The number of elements in an inode's extent map tree can get very high for large
files under random writes. For example, while running the following test:

    sysbench --test=fileio --file-num=1 --file-total-size=10G \
        --file-test-mode=rndrw --num-threads=32 --file-block-size=32768 \
        --max-requests=500000 --file-rw-ratio=2 [prepare|run]

I captured the following histogram capturing the number of extent_map items
in the red black tree while that test was running:

    Count: 122462
    Range:  1.000 - 172231.000; Mean: 96415.831; Median: 101855.000; Stddev: 49700.981
    Percentiles:  90th: 160120.000; 95th: 166335.000; 99th: 171070.000
       1.000 -    5.231:   452 |
       5.231 -  187.392:    87 |
     187.392 -  585.911:   206 |
     585.911 - 1827.438:   623 |
    1827.438 - 5695.245:  1962 #
    5695.245 - 17744.861:  6204 ####
   17744.861 - 55283.764: 21115 ############
   55283.764 - 172231.000: 91813 #####################################################

Benchmark:

    sysbench --test=fileio --file-num=1 --file-total-size=10G --file-test-mode=rndwr \
        --num-threads=64 --file-block-size=32768 --max-requests=0 --max-time=60 \
        --file-io-mode=sync --file-fsync-freq=0 [prepare|run]

Before this change: 122.1Mb/sec
After this change:  125.07Mb/sec
(averages of 5 test runs)

Test machine: quad core intel i5-3570K, 32Gb of ram, SSD

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/file.c

index 64d08f94485dc84c957ebd1eb81cc40c8f20ef0f..1874aee69c86391e6b8a70eb3931a8a824e3da65 100644 (file)
@@ -318,6 +318,20 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
                try_merge_map(tree, em);
 }
 
+static inline void setup_extent_mapping(struct extent_map_tree *tree,
+                                       struct extent_map *em,
+                                       int modified)
+{
+       atomic_inc(&em->refs);
+       em->mod_start = em->start;
+       em->mod_len = em->len;
+
+       if (modified)
+               list_move(&em->list, &tree->modified_extents);
+       else
+               try_merge_map(tree, em);
+}
+
 /**
  * add_extent_mapping - add new extent map to the extent tree
  * @tree:      tree to insert new map in
@@ -337,15 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree,
        if (ret)
                goto out;
 
-       atomic_inc(&em->refs);
-
-       em->mod_start = em->start;
-       em->mod_len = em->len;
-
-       if (modified)
-               list_move(&em->list, &tree->modified_extents);
-       else
-               try_merge_map(tree, em);
+       setup_extent_mapping(tree, em, modified);
 out:
        return ret;
 }
@@ -432,3 +438,18 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
        RB_CLEAR_NODE(&em->rb_node);
        return ret;
 }
+
+void replace_extent_mapping(struct extent_map_tree *tree,
+                           struct extent_map *cur,
+                           struct extent_map *new,
+                           int modified)
+{
+       WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
+       ASSERT(extent_map_in_tree(cur));
+       if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
+               list_del_init(&cur->list);
+       rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map);
+       RB_CLEAR_NODE(&cur->rb_node);
+
+       setup_extent_mapping(tree, new, modified);
+}
index f0a645a14d6e83a5e2b04dcf8ba71d69725ecc32..e7fd8a56a140f89e521d7901a02c8448e1b929d5 100644 (file)
@@ -68,6 +68,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
 int add_extent_mapping(struct extent_map_tree *tree,
                       struct extent_map *em, int modified);
 int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
+void replace_extent_mapping(struct extent_map_tree *tree,
+                           struct extent_map *cur,
+                           struct extent_map *new,
+                           int modified);
 
 struct extent_map *alloc_extent_map(void);
 void free_extent_map(struct extent_map *em);
index 762ca32bd988b413389f6ee5590b31c92a616bc7..31e48b9470607a834818eb9fbaa83e4ed013ac14 100644 (file)
@@ -591,7 +591,6 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                clear_bit(EXTENT_FLAG_PINNED, &em->flags);
                clear_bit(EXTENT_FLAG_LOGGING, &flags);
                modified = !list_empty(&em->list);
-               remove_extent_mapping(em_tree, em);
                if (no_splits)
                        goto next;
 
@@ -622,8 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        split->bdev = em->bdev;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
-                       ret = add_extent_mapping(em_tree, split, modified);
-                       BUG_ON(ret); /* Logic error */
+                       replace_extent_mapping(em_tree, em, split, modified);
                        free_extent_map(split);
                        split = split2;
                        split2 = NULL;
@@ -661,12 +659,20 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                                split->orig_block_len = 0;
                        }
 
-                       ret = add_extent_mapping(em_tree, split, modified);
-                       BUG_ON(ret); /* Logic error */
+                       if (extent_map_in_tree(em)) {
+                               replace_extent_mapping(em_tree, em, split,
+                                                      modified);
+                       } else {
+                               ret = add_extent_mapping(em_tree, split,
+                                                        modified);
+                               ASSERT(ret == 0); /* Logic error */
+                       }
                        free_extent_map(split);
                        split = NULL;
                }
 next:
+               if (extent_map_in_tree(em))
+                       remove_extent_mapping(em_tree, em);
                write_unlock(&em_tree->lock);
 
                /* once for us */