Merge tag 'ep93xx-fixes-for-3.6' of git://github.com/RyanMallon/linux-ep93xx into...
[linux.git] / fs / btrfs / delayed-inode.c
1 /*
2  * Copyright (C) 2011 Fujitsu.  All rights reserved.
3  * Written by Miao Xie <miaox@cn.fujitsu.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 021110-1307, USA.
18  */
19
20 #include <linux/slab.h>
21 #include "delayed-inode.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24
25 #define BTRFS_DELAYED_WRITEBACK         400
26 #define BTRFS_DELAYED_BACKGROUND        100
27
28 static struct kmem_cache *delayed_node_cache;
29
30 int __init btrfs_delayed_inode_init(void)
31 {
32         delayed_node_cache = kmem_cache_create("delayed_node",
33                                         sizeof(struct btrfs_delayed_node),
34                                         0,
35                                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
36                                         NULL);
37         if (!delayed_node_cache)
38                 return -ENOMEM;
39         return 0;
40 }
41
42 void btrfs_delayed_inode_exit(void)
43 {
44         if (delayed_node_cache)
45                 kmem_cache_destroy(delayed_node_cache);
46 }
47
48 static inline void btrfs_init_delayed_node(
49                                 struct btrfs_delayed_node *delayed_node,
50                                 struct btrfs_root *root, u64 inode_id)
51 {
52         delayed_node->root = root;
53         delayed_node->inode_id = inode_id;
54         atomic_set(&delayed_node->refs, 0);
55         delayed_node->count = 0;
56         delayed_node->in_list = 0;
57         delayed_node->inode_dirty = 0;
58         delayed_node->ins_root = RB_ROOT;
59         delayed_node->del_root = RB_ROOT;
60         mutex_init(&delayed_node->mutex);
61         delayed_node->index_cnt = 0;
62         INIT_LIST_HEAD(&delayed_node->n_list);
63         INIT_LIST_HEAD(&delayed_node->p_list);
64         delayed_node->bytes_reserved = 0;
65 }
66
67 static inline int btrfs_is_continuous_delayed_item(
68                                         struct btrfs_delayed_item *item1,
69                                         struct btrfs_delayed_item *item2)
70 {
71         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
72             item1->key.objectid == item2->key.objectid &&
73             item1->key.type == item2->key.type &&
74             item1->key.offset + 1 == item2->key.offset)
75                 return 1;
76         return 0;
77 }
78
79 static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
80                                                         struct btrfs_root *root)
81 {
82         return root->fs_info->delayed_root;
83 }
84
85 static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
86 {
87         struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
88         struct btrfs_root *root = btrfs_inode->root;
89         u64 ino = btrfs_ino(inode);
90         struct btrfs_delayed_node *node;
91
92         node = ACCESS_ONCE(btrfs_inode->delayed_node);
93         if (node) {
94                 atomic_inc(&node->refs);
95                 return node;
96         }
97
98         spin_lock(&root->inode_lock);
99         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
100         if (node) {
101                 if (btrfs_inode->delayed_node) {
102                         atomic_inc(&node->refs);        /* can be accessed */
103                         BUG_ON(btrfs_inode->delayed_node != node);
104                         spin_unlock(&root->inode_lock);
105                         return node;
106                 }
107                 btrfs_inode->delayed_node = node;
108                 atomic_inc(&node->refs);        /* can be accessed */
109                 atomic_inc(&node->refs);        /* cached in the inode */
110                 spin_unlock(&root->inode_lock);
111                 return node;
112         }
113         spin_unlock(&root->inode_lock);
114
115         return NULL;
116 }
117
118 /* Will return either the node or PTR_ERR(-ENOMEM) */
119 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
120                                                         struct inode *inode)
121 {
122         struct btrfs_delayed_node *node;
123         struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
124         struct btrfs_root *root = btrfs_inode->root;
125         u64 ino = btrfs_ino(inode);
126         int ret;
127
128 again:
129         node = btrfs_get_delayed_node(inode);
130         if (node)
131                 return node;
132
133         node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
134         if (!node)
135                 return ERR_PTR(-ENOMEM);
136         btrfs_init_delayed_node(node, root, ino);
137
138         atomic_inc(&node->refs);        /* cached in the btrfs inode */
139         atomic_inc(&node->refs);        /* can be accessed */
140
141         ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
142         if (ret) {
143                 kmem_cache_free(delayed_node_cache, node);
144                 return ERR_PTR(ret);
145         }
146
147         spin_lock(&root->inode_lock);
148         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
149         if (ret == -EEXIST) {
150                 kmem_cache_free(delayed_node_cache, node);
151                 spin_unlock(&root->inode_lock);
152                 radix_tree_preload_end();
153                 goto again;
154         }
155         btrfs_inode->delayed_node = node;
156         spin_unlock(&root->inode_lock);
157         radix_tree_preload_end();
158
159         return node;
160 }
161
162 /*
163  * Call it when holding delayed_node->mutex
164  *
165  * If mod = 1, add this node into the prepared list.
166  */
167 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
168                                      struct btrfs_delayed_node *node,
169                                      int mod)
170 {
171         spin_lock(&root->lock);
172         if (node->in_list) {
173                 if (!list_empty(&node->p_list))
174                         list_move_tail(&node->p_list, &root->prepare_list);
175                 else if (mod)
176                         list_add_tail(&node->p_list, &root->prepare_list);
177         } else {
178                 list_add_tail(&node->n_list, &root->node_list);
179                 list_add_tail(&node->p_list, &root->prepare_list);
180                 atomic_inc(&node->refs);        /* inserted into list */
181                 root->nodes++;
182                 node->in_list = 1;
183         }
184         spin_unlock(&root->lock);
185 }
186
187 /* Call it when holding delayed_node->mutex */
188 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
189                                        struct btrfs_delayed_node *node)
190 {
191         spin_lock(&root->lock);
192         if (node->in_list) {
193                 root->nodes--;
194                 atomic_dec(&node->refs);        /* not in the list */
195                 list_del_init(&node->n_list);
196                 if (!list_empty(&node->p_list))
197                         list_del_init(&node->p_list);
198                 node->in_list = 0;
199         }
200         spin_unlock(&root->lock);
201 }
202
203 struct btrfs_delayed_node *btrfs_first_delayed_node(
204                         struct btrfs_delayed_root *delayed_root)
205 {
206         struct list_head *p;
207         struct btrfs_delayed_node *node = NULL;
208
209         spin_lock(&delayed_root->lock);
210         if (list_empty(&delayed_root->node_list))
211                 goto out;
212
213         p = delayed_root->node_list.next;
214         node = list_entry(p, struct btrfs_delayed_node, n_list);
215         atomic_inc(&node->refs);
216 out:
217         spin_unlock(&delayed_root->lock);
218
219         return node;
220 }
221
222 struct btrfs_delayed_node *btrfs_next_delayed_node(
223                                                 struct btrfs_delayed_node *node)
224 {
225         struct btrfs_delayed_root *delayed_root;
226         struct list_head *p;
227         struct btrfs_delayed_node *next = NULL;
228
229         delayed_root = node->root->fs_info->delayed_root;
230         spin_lock(&delayed_root->lock);
231         if (!node->in_list) {   /* not in the list */
232                 if (list_empty(&delayed_root->node_list))
233                         goto out;
234                 p = delayed_root->node_list.next;
235         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
236                 goto out;
237         else
238                 p = node->n_list.next;
239
240         next = list_entry(p, struct btrfs_delayed_node, n_list);
241         atomic_inc(&next->refs);
242 out:
243         spin_unlock(&delayed_root->lock);
244
245         return next;
246 }
247
248 static void __btrfs_release_delayed_node(
249                                 struct btrfs_delayed_node *delayed_node,
250                                 int mod)
251 {
252         struct btrfs_delayed_root *delayed_root;
253
254         if (!delayed_node)
255                 return;
256
257         delayed_root = delayed_node->root->fs_info->delayed_root;
258
259         mutex_lock(&delayed_node->mutex);
260         if (delayed_node->count)
261                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
262         else
263                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
264         mutex_unlock(&delayed_node->mutex);
265
266         if (atomic_dec_and_test(&delayed_node->refs)) {
267                 struct btrfs_root *root = delayed_node->root;
268                 spin_lock(&root->inode_lock);
269                 if (atomic_read(&delayed_node->refs) == 0) {
270                         radix_tree_delete(&root->delayed_nodes_tree,
271                                           delayed_node->inode_id);
272                         kmem_cache_free(delayed_node_cache, delayed_node);
273                 }
274                 spin_unlock(&root->inode_lock);
275         }
276 }
277
278 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
279 {
280         __btrfs_release_delayed_node(node, 0);
281 }
282
283 struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
284                                         struct btrfs_delayed_root *delayed_root)
285 {
286         struct list_head *p;
287         struct btrfs_delayed_node *node = NULL;
288
289         spin_lock(&delayed_root->lock);
290         if (list_empty(&delayed_root->prepare_list))
291                 goto out;
292
293         p = delayed_root->prepare_list.next;
294         list_del_init(p);
295         node = list_entry(p, struct btrfs_delayed_node, p_list);
296         atomic_inc(&node->refs);
297 out:
298         spin_unlock(&delayed_root->lock);
299
300         return node;
301 }
302
303 static inline void btrfs_release_prepared_delayed_node(
304                                         struct btrfs_delayed_node *node)
305 {
306         __btrfs_release_delayed_node(node, 1);
307 }
308
309 struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
310 {
311         struct btrfs_delayed_item *item;
312         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
313         if (item) {
314                 item->data_len = data_len;
315                 item->ins_or_del = 0;
316                 item->bytes_reserved = 0;
317                 item->delayed_node = NULL;
318                 atomic_set(&item->refs, 1);
319         }
320         return item;
321 }
322
323 /*
324  * __btrfs_lookup_delayed_item - look up the delayed item by key
325  * @delayed_node: pointer to the delayed node
326  * @key:          the key to look up
327  * @prev:         used to store the prev item if the right item isn't found
328  * @next:         used to store the next item if the right item isn't found
329  *
330  * Note: if we don't find the right item, we will return the prev item and
331  * the next item.
332  */
333 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
334                                 struct rb_root *root,
335                                 struct btrfs_key *key,
336                                 struct btrfs_delayed_item **prev,
337                                 struct btrfs_delayed_item **next)
338 {
339         struct rb_node *node, *prev_node = NULL;
340         struct btrfs_delayed_item *delayed_item = NULL;
341         int ret = 0;
342
343         node = root->rb_node;
344
345         while (node) {
346                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
347                                         rb_node);
348                 prev_node = node;
349                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
350                 if (ret < 0)
351                         node = node->rb_right;
352                 else if (ret > 0)
353                         node = node->rb_left;
354                 else
355                         return delayed_item;
356         }
357
358         if (prev) {
359                 if (!prev_node)
360                         *prev = NULL;
361                 else if (ret < 0)
362                         *prev = delayed_item;
363                 else if ((node = rb_prev(prev_node)) != NULL) {
364                         *prev = rb_entry(node, struct btrfs_delayed_item,
365                                          rb_node);
366                 } else
367                         *prev = NULL;
368         }
369
370         if (next) {
371                 if (!prev_node)
372                         *next = NULL;
373                 else if (ret > 0)
374                         *next = delayed_item;
375                 else if ((node = rb_next(prev_node)) != NULL) {
376                         *next = rb_entry(node, struct btrfs_delayed_item,
377                                          rb_node);
378                 } else
379                         *next = NULL;
380         }
381         return NULL;
382 }
383
384 struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
385                                         struct btrfs_delayed_node *delayed_node,
386                                         struct btrfs_key *key)
387 {
388         struct btrfs_delayed_item *item;
389
390         item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
391                                            NULL, NULL);
392         return item;
393 }
394
395 struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
396                                         struct btrfs_delayed_node *delayed_node,
397                                         struct btrfs_key *key)
398 {
399         struct btrfs_delayed_item *item;
400
401         item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
402                                            NULL, NULL);
403         return item;
404 }
405
406 struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
407                                         struct btrfs_delayed_node *delayed_node,
408                                         struct btrfs_key *key)
409 {
410         struct btrfs_delayed_item *item, *next;
411
412         item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
413                                            NULL, &next);
414         if (!item)
415                 item = next;
416
417         return item;
418 }
419
420 struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
421                                         struct btrfs_delayed_node *delayed_node,
422                                         struct btrfs_key *key)
423 {
424         struct btrfs_delayed_item *item, *next;
425
426         item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
427                                            NULL, &next);
428         if (!item)
429                 item = next;
430
431         return item;
432 }
433
434 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
435                                     struct btrfs_delayed_item *ins,
436                                     int action)
437 {
438         struct rb_node **p, *node;
439         struct rb_node *parent_node = NULL;
440         struct rb_root *root;
441         struct btrfs_delayed_item *item;
442         int cmp;
443
444         if (action == BTRFS_DELAYED_INSERTION_ITEM)
445                 root = &delayed_node->ins_root;
446         else if (action == BTRFS_DELAYED_DELETION_ITEM)
447                 root = &delayed_node->del_root;
448         else
449                 BUG();
450         p = &root->rb_node;
451         node = &ins->rb_node;
452
453         while (*p) {
454                 parent_node = *p;
455                 item = rb_entry(parent_node, struct btrfs_delayed_item,
456                                  rb_node);
457
458                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
459                 if (cmp < 0)
460                         p = &(*p)->rb_right;
461                 else if (cmp > 0)
462                         p = &(*p)->rb_left;
463                 else
464                         return -EEXIST;
465         }
466
467         rb_link_node(node, parent_node, p);
468         rb_insert_color(node, root);
469         ins->delayed_node = delayed_node;
470         ins->ins_or_del = action;
471
472         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
473             action == BTRFS_DELAYED_INSERTION_ITEM &&
474             ins->key.offset >= delayed_node->index_cnt)
475                         delayed_node->index_cnt = ins->key.offset + 1;
476
477         delayed_node->count++;
478         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
479         return 0;
480 }
481
482 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
483                                               struct btrfs_delayed_item *item)
484 {
485         return __btrfs_add_delayed_item(node, item,
486                                         BTRFS_DELAYED_INSERTION_ITEM);
487 }
488
489 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
490                                              struct btrfs_delayed_item *item)
491 {
492         return __btrfs_add_delayed_item(node, item,
493                                         BTRFS_DELAYED_DELETION_ITEM);
494 }
495
496 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
497 {
498         struct rb_root *root;
499         struct btrfs_delayed_root *delayed_root;
500
501         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
502
503         BUG_ON(!delayed_root);
504         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
505                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
506
507         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
508                 root = &delayed_item->delayed_node->ins_root;
509         else
510                 root = &delayed_item->delayed_node->del_root;
511
512         rb_erase(&delayed_item->rb_node, root);
513         delayed_item->delayed_node->count--;
514         atomic_dec(&delayed_root->items);
515         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
516             waitqueue_active(&delayed_root->wait))
517                 wake_up(&delayed_root->wait);
518 }
519
520 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
521 {
522         if (item) {
523                 __btrfs_remove_delayed_item(item);
524                 if (atomic_dec_and_test(&item->refs))
525                         kfree(item);
526         }
527 }
528
529 struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
530                                         struct btrfs_delayed_node *delayed_node)
531 {
532         struct rb_node *p;
533         struct btrfs_delayed_item *item = NULL;
534
535         p = rb_first(&delayed_node->ins_root);
536         if (p)
537                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
538
539         return item;
540 }
541
542 struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
543                                         struct btrfs_delayed_node *delayed_node)
544 {
545         struct rb_node *p;
546         struct btrfs_delayed_item *item = NULL;
547
548         p = rb_first(&delayed_node->del_root);
549         if (p)
550                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
551
552         return item;
553 }
554
555 struct btrfs_delayed_item *__btrfs_next_delayed_item(
556                                                 struct btrfs_delayed_item *item)
557 {
558         struct rb_node *p;
559         struct btrfs_delayed_item *next = NULL;
560
561         p = rb_next(&item->rb_node);
562         if (p)
563                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
564
565         return next;
566 }
567
568 static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
569                                                    u64 root_id)
570 {
571         struct btrfs_key root_key;
572
573         if (root->objectid == root_id)
574                 return root;
575
576         root_key.objectid = root_id;
577         root_key.type = BTRFS_ROOT_ITEM_KEY;
578         root_key.offset = (u64)-1;
579         return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
580 }
581
582 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
583                                                struct btrfs_root *root,
584                                                struct btrfs_delayed_item *item)
585 {
586         struct btrfs_block_rsv *src_rsv;
587         struct btrfs_block_rsv *dst_rsv;
588         u64 num_bytes;
589         int ret;
590
591         if (!trans->bytes_reserved)
592                 return 0;
593
594         src_rsv = trans->block_rsv;
595         dst_rsv = &root->fs_info->delayed_block_rsv;
596
597         num_bytes = btrfs_calc_trans_metadata_size(root, 1);
598         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
599         if (!ret) {
600                 trace_btrfs_space_reservation(root->fs_info, "delayed_item",
601                                               item->key.objectid,
602                                               num_bytes, 1);
603                 item->bytes_reserved = num_bytes;
604         }
605
606         return ret;
607 }
608
609 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
610                                                 struct btrfs_delayed_item *item)
611 {
612         struct btrfs_block_rsv *rsv;
613
614         if (!item->bytes_reserved)
615                 return;
616
617         rsv = &root->fs_info->delayed_block_rsv;
618         trace_btrfs_space_reservation(root->fs_info, "delayed_item",
619                                       item->key.objectid, item->bytes_reserved,
620                                       0);
621         btrfs_block_rsv_release(root, rsv,
622                                 item->bytes_reserved);
623 }
624
625 static int btrfs_delayed_inode_reserve_metadata(
626                                         struct btrfs_trans_handle *trans,
627                                         struct btrfs_root *root,
628                                         struct inode *inode,
629                                         struct btrfs_delayed_node *node)
630 {
631         struct btrfs_block_rsv *src_rsv;
632         struct btrfs_block_rsv *dst_rsv;
633         u64 num_bytes;
634         int ret;
635         bool release = false;
636
637         src_rsv = trans->block_rsv;
638         dst_rsv = &root->fs_info->delayed_block_rsv;
639
640         num_bytes = btrfs_calc_trans_metadata_size(root, 1);
641
642         /*
643          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
644          * which doesn't reserve space for speed.  This is a problem since we
645          * still need to reserve space for this update, so try to reserve the
646          * space.
647          *
648          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
649          * we're accounted for.
650          */
651         if (!src_rsv || (!trans->bytes_reserved &&
652             src_rsv != &root->fs_info->delalloc_block_rsv)) {
653                 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
654                 /*
655                  * Since we're under a transaction reserve_metadata_bytes could
656                  * try to commit the transaction which will make it return
657                  * EAGAIN to make us stop the transaction we have, so return
658                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
659                  */
660                 if (ret == -EAGAIN)
661                         ret = -ENOSPC;
662                 if (!ret) {
663                         node->bytes_reserved = num_bytes;
664                         trace_btrfs_space_reservation(root->fs_info,
665                                                       "delayed_inode",
666                                                       btrfs_ino(inode),
667                                                       num_bytes, 1);
668                 }
669                 return ret;
670         } else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
671                 spin_lock(&BTRFS_I(inode)->lock);
672                 if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
673                                        &BTRFS_I(inode)->runtime_flags)) {
674                         spin_unlock(&BTRFS_I(inode)->lock);
675                         release = true;
676                         goto migrate;
677                 }
678                 spin_unlock(&BTRFS_I(inode)->lock);
679
680                 /* Ok we didn't have space pre-reserved.  This shouldn't happen
681                  * too often but it can happen if we do delalloc to an existing
682                  * inode which gets dirtied because of the time update, and then
683                  * isn't touched again until after the transaction commits and
684                  * then we try to write out the data.  First try to be nice and
685                  * reserve something strictly for us.  If not be a pain and try
686                  * to steal from the delalloc block rsv.
687                  */
688                 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
689                 if (!ret)
690                         goto out;
691
692                 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
693                 if (!ret)
694                         goto out;
695
696                 /*
697                  * Ok this is a problem, let's just steal from the global rsv
698                  * since this really shouldn't happen that often.
699                  */
700                 WARN_ON(1);
701                 ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
702                                               dst_rsv, num_bytes);
703                 goto out;
704         }
705
706 migrate:
707         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
708
709 out:
710         /*
711          * Migrate only takes a reservation, it doesn't touch the size of the
712          * block_rsv.  This is to simplify people who don't normally have things
713          * migrated from their block rsv.  If they go to release their
714          * reservation, that will decrease the size as well, so if migrate
715          * reduced size we'd end up with a negative size.  But for the
716          * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
717          * but we could in fact do this reserve/migrate dance several times
718          * between the time we did the original reservation and we'd clean it
719          * up.  So to take care of this, release the space for the meta
720          * reservation here.  I think it may be time for a documentation page on
721          * how block rsvs. work.
722          */
723         if (!ret) {
724                 trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
725                                               btrfs_ino(inode), num_bytes, 1);
726                 node->bytes_reserved = num_bytes;
727         }
728
729         if (release) {
730                 trace_btrfs_space_reservation(root->fs_info, "delalloc",
731                                               btrfs_ino(inode), num_bytes, 0);
732                 btrfs_block_rsv_release(root, src_rsv, num_bytes);
733         }
734
735         return ret;
736 }
737
738 static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
739                                                 struct btrfs_delayed_node *node)
740 {
741         struct btrfs_block_rsv *rsv;
742
743         if (!node->bytes_reserved)
744                 return;
745
746         rsv = &root->fs_info->delayed_block_rsv;
747         trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
748                                       node->inode_id, node->bytes_reserved, 0);
749         btrfs_block_rsv_release(root, rsv,
750                                 node->bytes_reserved);
751         node->bytes_reserved = 0;
752 }
753
754 /*
755  * This helper will insert some continuous items into the same leaf according
756  * to the free space of the leaf.
757  */
758 static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
759                                 struct btrfs_root *root,
760                                 struct btrfs_path *path,
761                                 struct btrfs_delayed_item *item)
762 {
763         struct btrfs_delayed_item *curr, *next;
764         int free_space;
765         int total_data_size = 0, total_size = 0;
766         struct extent_buffer *leaf;
767         char *data_ptr;
768         struct btrfs_key *keys;
769         u32 *data_size;
770         struct list_head head;
771         int slot;
772         int nitems;
773         int i;
774         int ret = 0;
775
776         BUG_ON(!path->nodes[0]);
777
778         leaf = path->nodes[0];
779         free_space = btrfs_leaf_free_space(root, leaf);
780         INIT_LIST_HEAD(&head);
781
782         next = item;
783         nitems = 0;
784
785         /*
786          * count the number of the continuous items that we can insert in batch
787          */
788         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
789                free_space) {
790                 total_data_size += next->data_len;
791                 total_size += next->data_len + sizeof(struct btrfs_item);
792                 list_add_tail(&next->tree_list, &head);
793                 nitems++;
794
795                 curr = next;
796                 next = __btrfs_next_delayed_item(curr);
797                 if (!next)
798                         break;
799
800                 if (!btrfs_is_continuous_delayed_item(curr, next))
801                         break;
802         }
803
804         if (!nitems) {
805                 ret = 0;
806                 goto out;
807         }
808
809         /*
810          * we need allocate some memory space, but it might cause the task
811          * to sleep, so we set all locked nodes in the path to blocking locks
812          * first.
813          */
814         btrfs_set_path_blocking(path);
815
816         keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
817         if (!keys) {
818                 ret = -ENOMEM;
819                 goto out;
820         }
821
822         data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
823         if (!data_size) {
824                 ret = -ENOMEM;
825                 goto error;
826         }
827
828         /* get keys of all the delayed items */
829         i = 0;
830         list_for_each_entry(next, &head, tree_list) {
831                 keys[i] = next->key;
832                 data_size[i] = next->data_len;
833                 i++;
834         }
835
836         /* reset all the locked nodes in the patch to spinning locks. */
837         btrfs_clear_path_blocking(path, NULL, 0);
838
839         /* insert the keys of the items */
840         setup_items_for_insert(trans, root, path, keys, data_size,
841                                total_data_size, total_size, nitems);
842
843         /* insert the dir index items */
844         slot = path->slots[0];
845         list_for_each_entry_safe(curr, next, &head, tree_list) {
846                 data_ptr = btrfs_item_ptr(leaf, slot, char);
847                 write_extent_buffer(leaf, &curr->data,
848                                     (unsigned long)data_ptr,
849                                     curr->data_len);
850                 slot++;
851
852                 btrfs_delayed_item_release_metadata(root, curr);
853
854                 list_del(&curr->tree_list);
855                 btrfs_release_delayed_item(curr);
856         }
857
858 error:
859         kfree(data_size);
860         kfree(keys);
861 out:
862         return ret;
863 }
864
865 /*
866  * This helper can just do simple insertion that needn't extend item for new
867  * data, such as directory name index insertion, inode insertion.
868  */
869 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
870                                      struct btrfs_root *root,
871                                      struct btrfs_path *path,
872                                      struct btrfs_delayed_item *delayed_item)
873 {
874         struct extent_buffer *leaf;
875         struct btrfs_item *item;
876         char *ptr;
877         int ret;
878
879         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
880                                       delayed_item->data_len);
881         if (ret < 0 && ret != -EEXIST)
882                 return ret;
883
884         leaf = path->nodes[0];
885
886         item = btrfs_item_nr(leaf, path->slots[0]);
887         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
888
889         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
890                             delayed_item->data_len);
891         btrfs_mark_buffer_dirty(leaf);
892
893         btrfs_delayed_item_release_metadata(root, delayed_item);
894         return 0;
895 }
896
897 /*
898  * we insert an item first, then if there are some continuous items, we try
899  * to insert those items into the same leaf.
900  */
901 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
902                                       struct btrfs_path *path,
903                                       struct btrfs_root *root,
904                                       struct btrfs_delayed_node *node)
905 {
906         struct btrfs_delayed_item *curr, *prev;
907         int ret = 0;
908
909 do_again:
910         mutex_lock(&node->mutex);
911         curr = __btrfs_first_delayed_insertion_item(node);
912         if (!curr)
913                 goto insert_end;
914
915         ret = btrfs_insert_delayed_item(trans, root, path, curr);
916         if (ret < 0) {
917                 btrfs_release_path(path);
918                 goto insert_end;
919         }
920
921         prev = curr;
922         curr = __btrfs_next_delayed_item(prev);
923         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
924                 /* insert the continuous items into the same leaf */
925                 path->slots[0]++;
926                 btrfs_batch_insert_items(trans, root, path, curr);
927         }
928         btrfs_release_delayed_item(prev);
929         btrfs_mark_buffer_dirty(path->nodes[0]);
930
931         btrfs_release_path(path);
932         mutex_unlock(&node->mutex);
933         goto do_again;
934
935 insert_end:
936         mutex_unlock(&node->mutex);
937         return ret;
938 }
939
940 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
941                                     struct btrfs_root *root,
942                                     struct btrfs_path *path,
943                                     struct btrfs_delayed_item *item)
944 {
945         struct btrfs_delayed_item *curr, *next;
946         struct extent_buffer *leaf;
947         struct btrfs_key key;
948         struct list_head head;
949         int nitems, i, last_item;
950         int ret = 0;
951
952         BUG_ON(!path->nodes[0]);
953
954         leaf = path->nodes[0];
955
956         i = path->slots[0];
957         last_item = btrfs_header_nritems(leaf) - 1;
958         if (i > last_item)
959                 return -ENOENT; /* FIXME: Is errno suitable? */
960
961         next = item;
962         INIT_LIST_HEAD(&head);
963         btrfs_item_key_to_cpu(leaf, &key, i);
964         nitems = 0;
965         /*
966          * count the number of the dir index items that we can delete in batch
967          */
968         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
969                 list_add_tail(&next->tree_list, &head);
970                 nitems++;
971
972                 curr = next;
973                 next = __btrfs_next_delayed_item(curr);
974                 if (!next)
975                         break;
976
977                 if (!btrfs_is_continuous_delayed_item(curr, next))
978                         break;
979
980                 i++;
981                 if (i > last_item)
982                         break;
983                 btrfs_item_key_to_cpu(leaf, &key, i);
984         }
985
986         if (!nitems)
987                 return 0;
988
989         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
990         if (ret)
991                 goto out;
992
993         list_for_each_entry_safe(curr, next, &head, tree_list) {
994                 btrfs_delayed_item_release_metadata(root, curr);
995                 list_del(&curr->tree_list);
996                 btrfs_release_delayed_item(curr);
997         }
998
999 out:
1000         return ret;
1001 }
1002
1003 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
1004                                       struct btrfs_path *path,
1005                                       struct btrfs_root *root,
1006                                       struct btrfs_delayed_node *node)
1007 {
1008         struct btrfs_delayed_item *curr, *prev;
1009         int ret = 0;
1010
1011 do_again:
1012         mutex_lock(&node->mutex);
1013         curr = __btrfs_first_delayed_deletion_item(node);
1014         if (!curr)
1015                 goto delete_fail;
1016
1017         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
1018         if (ret < 0)
1019                 goto delete_fail;
1020         else if (ret > 0) {
1021                 /*
1022                  * can't find the item which the node points to, so this node
1023                  * is invalid, just drop it.
1024                  */
1025                 prev = curr;
1026                 curr = __btrfs_next_delayed_item(prev);
1027                 btrfs_release_delayed_item(prev);
1028                 ret = 0;
1029                 btrfs_release_path(path);
1030                 if (curr)
1031                         goto do_again;
1032                 else
1033                         goto delete_fail;
1034         }
1035
1036         btrfs_batch_delete_items(trans, root, path, curr);
1037         btrfs_release_path(path);
1038         mutex_unlock(&node->mutex);
1039         goto do_again;
1040
1041 delete_fail:
1042         btrfs_release_path(path);
1043         mutex_unlock(&node->mutex);
1044         return ret;
1045 }
1046
1047 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1048 {
1049         struct btrfs_delayed_root *delayed_root;
1050
1051         if (delayed_node && delayed_node->inode_dirty) {
1052                 BUG_ON(!delayed_node->root);
1053                 delayed_node->inode_dirty = 0;
1054                 delayed_node->count--;
1055
1056                 delayed_root = delayed_node->root->fs_info->delayed_root;
1057                 atomic_dec(&delayed_root->items);
1058                 if (atomic_read(&delayed_root->items) <
1059                     BTRFS_DELAYED_BACKGROUND &&
1060                     waitqueue_active(&delayed_root->wait))
1061                         wake_up(&delayed_root->wait);
1062         }
1063 }
1064
1065 static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1066                                       struct btrfs_root *root,
1067                                       struct btrfs_path *path,
1068                                       struct btrfs_delayed_node *node)
1069 {
1070         struct btrfs_key key;
1071         struct btrfs_inode_item *inode_item;
1072         struct extent_buffer *leaf;
1073         int ret;
1074
1075         mutex_lock(&node->mutex);
1076         if (!node->inode_dirty) {
1077                 mutex_unlock(&node->mutex);
1078                 return 0;
1079         }
1080
1081         key.objectid = node->inode_id;
1082         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1083         key.offset = 0;
1084         ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1085         if (ret > 0) {
1086                 btrfs_release_path(path);
1087                 mutex_unlock(&node->mutex);
1088                 return -ENOENT;
1089         } else if (ret < 0) {
1090                 mutex_unlock(&node->mutex);
1091                 return ret;
1092         }
1093
1094         btrfs_unlock_up_safe(path, 1);
1095         leaf = path->nodes[0];
1096         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1097                                     struct btrfs_inode_item);
1098         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1099                             sizeof(struct btrfs_inode_item));
1100         btrfs_mark_buffer_dirty(leaf);
1101         btrfs_release_path(path);
1102
1103         btrfs_delayed_inode_release_metadata(root, node);
1104         btrfs_release_delayed_inode(node);
1105         mutex_unlock(&node->mutex);
1106
1107         return 0;
1108 }
1109
1110 /*
1111  * Called when committing the transaction.
1112  * Returns 0 on success.
1113  * Returns < 0 on error and returns with an aborted transaction with any
1114  * outstanding delayed items cleaned up.
1115  */
1116 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1117                             struct btrfs_root *root)
1118 {
1119         struct btrfs_root *curr_root = root;
1120         struct btrfs_delayed_root *delayed_root;
1121         struct btrfs_delayed_node *curr_node, *prev_node;
1122         struct btrfs_path *path;
1123         struct btrfs_block_rsv *block_rsv;
1124         int ret = 0;
1125
1126         if (trans->aborted)
1127                 return -EIO;
1128
1129         path = btrfs_alloc_path();
1130         if (!path)
1131                 return -ENOMEM;
1132         path->leave_spinning = 1;
1133
1134         block_rsv = trans->block_rsv;
1135         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1136
1137         delayed_root = btrfs_get_delayed_root(root);
1138
1139         curr_node = btrfs_first_delayed_node(delayed_root);
1140         while (curr_node) {
1141                 curr_root = curr_node->root;
1142                 ret = btrfs_insert_delayed_items(trans, path, curr_root,
1143                                                  curr_node);
1144                 if (!ret)
1145                         ret = btrfs_delete_delayed_items(trans, path,
1146                                                 curr_root, curr_node);
1147                 if (!ret)
1148                         ret = btrfs_update_delayed_inode(trans, curr_root,
1149                                                 path, curr_node);
1150                 if (ret) {
1151                         btrfs_release_delayed_node(curr_node);
1152                         btrfs_abort_transaction(trans, root, ret);
1153                         break;
1154                 }
1155
1156                 prev_node = curr_node;
1157                 curr_node = btrfs_next_delayed_node(curr_node);
1158                 btrfs_release_delayed_node(prev_node);
1159         }
1160
1161         btrfs_free_path(path);
1162         trans->block_rsv = block_rsv;
1163
1164         return ret;
1165 }
1166
1167 static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1168                                               struct btrfs_delayed_node *node)
1169 {
1170         struct btrfs_path *path;
1171         struct btrfs_block_rsv *block_rsv;
1172         int ret;
1173
1174         path = btrfs_alloc_path();
1175         if (!path)
1176                 return -ENOMEM;
1177         path->leave_spinning = 1;
1178
1179         block_rsv = trans->block_rsv;
1180         trans->block_rsv = &node->root->fs_info->delayed_block_rsv;
1181
1182         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1183         if (!ret)
1184                 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1185         if (!ret)
1186                 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1187         btrfs_free_path(path);
1188
1189         trans->block_rsv = block_rsv;
1190         return ret;
1191 }
1192
1193 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1194                                      struct inode *inode)
1195 {
1196         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1197         int ret;
1198
1199         if (!delayed_node)
1200                 return 0;
1201
1202         mutex_lock(&delayed_node->mutex);
1203         if (!delayed_node->count) {
1204                 mutex_unlock(&delayed_node->mutex);
1205                 btrfs_release_delayed_node(delayed_node);
1206                 return 0;
1207         }
1208         mutex_unlock(&delayed_node->mutex);
1209
1210         ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
1211         btrfs_release_delayed_node(delayed_node);
1212         return ret;
1213 }
1214
1215 void btrfs_remove_delayed_node(struct inode *inode)
1216 {
1217         struct btrfs_delayed_node *delayed_node;
1218
1219         delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1220         if (!delayed_node)
1221                 return;
1222
1223         BTRFS_I(inode)->delayed_node = NULL;
1224         btrfs_release_delayed_node(delayed_node);
1225 }
1226
1227 struct btrfs_async_delayed_node {
1228         struct btrfs_root *root;
1229         struct btrfs_delayed_node *delayed_node;
1230         struct btrfs_work work;
1231 };
1232
1233 static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1234 {
1235         struct btrfs_async_delayed_node *async_node;
1236         struct btrfs_trans_handle *trans;
1237         struct btrfs_path *path;
1238         struct btrfs_delayed_node *delayed_node = NULL;
1239         struct btrfs_root *root;
1240         struct btrfs_block_rsv *block_rsv;
1241         unsigned long nr = 0;
1242         int need_requeue = 0;
1243         int ret;
1244
1245         async_node = container_of(work, struct btrfs_async_delayed_node, work);
1246
1247         path = btrfs_alloc_path();
1248         if (!path)
1249                 goto out;
1250         path->leave_spinning = 1;
1251
1252         delayed_node = async_node->delayed_node;
1253         root = delayed_node->root;
1254
1255         trans = btrfs_join_transaction(root);
1256         if (IS_ERR(trans))
1257                 goto free_path;
1258
1259         block_rsv = trans->block_rsv;
1260         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1261
1262         ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
1263         if (!ret)
1264                 ret = btrfs_delete_delayed_items(trans, path, root,
1265                                                  delayed_node);
1266
1267         if (!ret)
1268                 btrfs_update_delayed_inode(trans, root, path, delayed_node);
1269
1270         /*
1271          * Maybe new delayed items have been inserted, so we need requeue
1272          * the work. Besides that, we must dequeue the empty delayed nodes
1273          * to avoid the race between delayed items balance and the worker.
1274          * The race like this:
1275          *      Task1                           Worker thread
1276          *                                      count == 0, needn't requeue
1277          *                                        also needn't insert the
1278          *                                        delayed node into prepare
1279          *                                        list again.
1280          *      add lots of delayed items
1281          *      queue the delayed node
1282          *        already in the list,
1283          *        and not in the prepare
1284          *        list, it means the delayed
1285          *        node is being dealt with
1286          *        by the worker.
1287          *      do delayed items balance
1288          *        the delayed node is being
1289          *        dealt with by the worker
1290          *        now, just wait.
1291          *                                      the worker goto idle.
1292          * Task1 will sleep until the transaction is commited.
1293          */
1294         mutex_lock(&delayed_node->mutex);
1295         if (delayed_node->count)
1296                 need_requeue = 1;
1297         else
1298                 btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
1299                                            delayed_node);
1300         mutex_unlock(&delayed_node->mutex);
1301
1302         nr = trans->blocks_used;
1303
1304         trans->block_rsv = block_rsv;
1305         btrfs_end_transaction_dmeta(trans, root);
1306         __btrfs_btree_balance_dirty(root, nr);
1307 free_path:
1308         btrfs_free_path(path);
1309 out:
1310         if (need_requeue)
1311                 btrfs_requeue_work(&async_node->work);
1312         else {
1313                 btrfs_release_prepared_delayed_node(delayed_node);
1314                 kfree(async_node);
1315         }
1316 }
1317
1318 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1319                                      struct btrfs_root *root, int all)
1320 {
1321         struct btrfs_async_delayed_node *async_node;
1322         struct btrfs_delayed_node *curr;
1323         int count = 0;
1324
1325 again:
1326         curr = btrfs_first_prepared_delayed_node(delayed_root);
1327         if (!curr)
1328                 return 0;
1329
1330         async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
1331         if (!async_node) {
1332                 btrfs_release_prepared_delayed_node(curr);
1333                 return -ENOMEM;
1334         }
1335
1336         async_node->root = root;
1337         async_node->delayed_node = curr;
1338
1339         async_node->work.func = btrfs_async_run_delayed_node_done;
1340         async_node->work.flags = 0;
1341
1342         btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
1343         count++;
1344
1345         if (all || count < 4)
1346                 goto again;
1347
1348         return 0;
1349 }
1350
1351 void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1352 {
1353         struct btrfs_delayed_root *delayed_root;
1354         delayed_root = btrfs_get_delayed_root(root);
1355         WARN_ON(btrfs_first_delayed_node(delayed_root));
1356 }
1357
1358 void btrfs_balance_delayed_items(struct btrfs_root *root)
1359 {
1360         struct btrfs_delayed_root *delayed_root;
1361
1362         delayed_root = btrfs_get_delayed_root(root);
1363
1364         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1365                 return;
1366
1367         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1368                 int ret;
1369                 ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
1370                 if (ret)
1371                         return;
1372
1373                 wait_event_interruptible_timeout(
1374                                 delayed_root->wait,
1375                                 (atomic_read(&delayed_root->items) <
1376                                  BTRFS_DELAYED_BACKGROUND),
1377                                 HZ);
1378                 return;
1379         }
1380
1381         btrfs_wq_run_delayed_node(delayed_root, root, 0);
1382 }
1383
1384 /* Will return 0 or -ENOMEM */
1385 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1386                                    struct btrfs_root *root, const char *name,
1387                                    int name_len, struct inode *dir,
1388                                    struct btrfs_disk_key *disk_key, u8 type,
1389                                    u64 index)
1390 {
1391         struct btrfs_delayed_node *delayed_node;
1392         struct btrfs_delayed_item *delayed_item;
1393         struct btrfs_dir_item *dir_item;
1394         int ret;
1395
1396         delayed_node = btrfs_get_or_create_delayed_node(dir);
1397         if (IS_ERR(delayed_node))
1398                 return PTR_ERR(delayed_node);
1399
1400         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1401         if (!delayed_item) {
1402                 ret = -ENOMEM;
1403                 goto release_node;
1404         }
1405
1406         delayed_item->key.objectid = btrfs_ino(dir);
1407         btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
1408         delayed_item->key.offset = index;
1409
1410         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1411         dir_item->location = *disk_key;
1412         dir_item->transid = cpu_to_le64(trans->transid);
1413         dir_item->data_len = 0;
1414         dir_item->name_len = cpu_to_le16(name_len);
1415         dir_item->type = type;
1416         memcpy((char *)(dir_item + 1), name, name_len);
1417
1418         ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1419         /*
1420          * we have reserved enough space when we start a new transaction,
1421          * so reserving metadata failure is impossible
1422          */
1423         BUG_ON(ret);
1424
1425
1426         mutex_lock(&delayed_node->mutex);
1427         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1428         if (unlikely(ret)) {
1429                 printk(KERN_ERR "err add delayed dir index item(name: %s) into "
1430                                 "the insertion tree of the delayed node"
1431                                 "(root id: %llu, inode id: %llu, errno: %d)\n",
1432                                 name,
1433                                 (unsigned long long)delayed_node->root->objectid,
1434                                 (unsigned long long)delayed_node->inode_id,
1435                                 ret);
1436                 BUG();
1437         }
1438         mutex_unlock(&delayed_node->mutex);
1439
1440 release_node:
1441         btrfs_release_delayed_node(delayed_node);
1442         return ret;
1443 }
1444
1445 static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1446                                                struct btrfs_delayed_node *node,
1447                                                struct btrfs_key *key)
1448 {
1449         struct btrfs_delayed_item *item;
1450
1451         mutex_lock(&node->mutex);
1452         item = __btrfs_lookup_delayed_insertion_item(node, key);
1453         if (!item) {
1454                 mutex_unlock(&node->mutex);
1455                 return 1;
1456         }
1457
1458         btrfs_delayed_item_release_metadata(root, item);
1459         btrfs_release_delayed_item(item);
1460         mutex_unlock(&node->mutex);
1461         return 0;
1462 }
1463
1464 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1465                                    struct btrfs_root *root, struct inode *dir,
1466                                    u64 index)
1467 {
1468         struct btrfs_delayed_node *node;
1469         struct btrfs_delayed_item *item;
1470         struct btrfs_key item_key;
1471         int ret;
1472
1473         node = btrfs_get_or_create_delayed_node(dir);
1474         if (IS_ERR(node))
1475                 return PTR_ERR(node);
1476
1477         item_key.objectid = btrfs_ino(dir);
1478         btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
1479         item_key.offset = index;
1480
1481         ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1482         if (!ret)
1483                 goto end;
1484
1485         item = btrfs_alloc_delayed_item(0);
1486         if (!item) {
1487                 ret = -ENOMEM;
1488                 goto end;
1489         }
1490
1491         item->key = item_key;
1492
1493         ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1494         /*
1495          * we have reserved enough space when we start a new transaction,
1496          * so reserving metadata failure is impossible.
1497          */
1498         BUG_ON(ret);
1499
1500         mutex_lock(&node->mutex);
1501         ret = __btrfs_add_delayed_deletion_item(node, item);
1502         if (unlikely(ret)) {
1503                 printk(KERN_ERR "err add delayed dir index item(index: %llu) "
1504                                 "into the deletion tree of the delayed node"
1505                                 "(root id: %llu, inode id: %llu, errno: %d)\n",
1506                                 (unsigned long long)index,
1507                                 (unsigned long long)node->root->objectid,
1508                                 (unsigned long long)node->inode_id,
1509                                 ret);
1510                 BUG();
1511         }
1512         mutex_unlock(&node->mutex);
1513 end:
1514         btrfs_release_delayed_node(node);
1515         return ret;
1516 }
1517
1518 int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1519 {
1520         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1521
1522         if (!delayed_node)
1523                 return -ENOENT;
1524
1525         /*
1526          * Since we have held i_mutex of this directory, it is impossible that
1527          * a new directory index is added into the delayed node and index_cnt
1528          * is updated now. So we needn't lock the delayed node.
1529          */
1530         if (!delayed_node->index_cnt) {
1531                 btrfs_release_delayed_node(delayed_node);
1532                 return -EINVAL;
1533         }
1534
1535         BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1536         btrfs_release_delayed_node(delayed_node);
1537         return 0;
1538 }
1539
1540 void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1541                              struct list_head *del_list)
1542 {
1543         struct btrfs_delayed_node *delayed_node;
1544         struct btrfs_delayed_item *item;
1545
1546         delayed_node = btrfs_get_delayed_node(inode);
1547         if (!delayed_node)
1548                 return;
1549
1550         mutex_lock(&delayed_node->mutex);
1551         item = __btrfs_first_delayed_insertion_item(delayed_node);
1552         while (item) {
1553                 atomic_inc(&item->refs);
1554                 list_add_tail(&item->readdir_list, ins_list);
1555                 item = __btrfs_next_delayed_item(item);
1556         }
1557
1558         item = __btrfs_first_delayed_deletion_item(delayed_node);
1559         while (item) {
1560                 atomic_inc(&item->refs);
1561                 list_add_tail(&item->readdir_list, del_list);
1562                 item = __btrfs_next_delayed_item(item);
1563         }
1564         mutex_unlock(&delayed_node->mutex);
1565         /*
1566          * This delayed node is still cached in the btrfs inode, so refs
1567          * must be > 1 now, and we needn't check it is going to be freed
1568          * or not.
1569          *
1570          * Besides that, this function is used to read dir, we do not
1571          * insert/delete delayed items in this period. So we also needn't
1572          * requeue or dequeue this delayed node.
1573          */
1574         atomic_dec(&delayed_node->refs);
1575 }
1576
1577 void btrfs_put_delayed_items(struct list_head *ins_list,
1578                              struct list_head *del_list)
1579 {
1580         struct btrfs_delayed_item *curr, *next;
1581
1582         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1583                 list_del(&curr->readdir_list);
1584                 if (atomic_dec_and_test(&curr->refs))
1585                         kfree(curr);
1586         }
1587
1588         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1589                 list_del(&curr->readdir_list);
1590                 if (atomic_dec_and_test(&curr->refs))
1591                         kfree(curr);
1592         }
1593 }
1594
1595 int btrfs_should_delete_dir_index(struct list_head *del_list,
1596                                   u64 index)
1597 {
1598         struct btrfs_delayed_item *curr, *next;
1599         int ret;
1600
1601         if (list_empty(del_list))
1602                 return 0;
1603
1604         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1605                 if (curr->key.offset > index)
1606                         break;
1607
1608                 list_del(&curr->readdir_list);
1609                 ret = (curr->key.offset == index);
1610
1611                 if (atomic_dec_and_test(&curr->refs))
1612                         kfree(curr);
1613
1614                 if (ret)
1615                         return 1;
1616                 else
1617                         continue;
1618         }
1619         return 0;
1620 }
1621
1622 /*
1623  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1624  *
1625  */
1626 int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
1627                                     filldir_t filldir,
1628                                     struct list_head *ins_list)
1629 {
1630         struct btrfs_dir_item *di;
1631         struct btrfs_delayed_item *curr, *next;
1632         struct btrfs_key location;
1633         char *name;
1634         int name_len;
1635         int over = 0;
1636         unsigned char d_type;
1637
1638         if (list_empty(ins_list))
1639                 return 0;
1640
1641         /*
1642          * Changing the data of the delayed item is impossible. So
1643          * we needn't lock them. And we have held i_mutex of the
1644          * directory, nobody can delete any directory indexes now.
1645          */
1646         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1647                 list_del(&curr->readdir_list);
1648
1649                 if (curr->key.offset < filp->f_pos) {
1650                         if (atomic_dec_and_test(&curr->refs))
1651                                 kfree(curr);
1652                         continue;
1653                 }
1654
1655                 filp->f_pos = curr->key.offset;
1656
1657                 di = (struct btrfs_dir_item *)curr->data;
1658                 name = (char *)(di + 1);
1659                 name_len = le16_to_cpu(di->name_len);
1660
1661                 d_type = btrfs_filetype_table[di->type];
1662                 btrfs_disk_key_to_cpu(&location, &di->location);
1663
1664                 over = filldir(dirent, name, name_len, curr->key.offset,
1665                                location.objectid, d_type);
1666
1667                 if (atomic_dec_and_test(&curr->refs))
1668                         kfree(curr);
1669
1670                 if (over)
1671                         return 1;
1672         }
1673         return 0;
1674 }
1675
1676 BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
1677                          generation, 64);
1678 BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
1679                          sequence, 64);
1680 BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
1681                          transid, 64);
1682 BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
1683 BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
1684                          nbytes, 64);
1685 BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
1686                          block_group, 64);
1687 BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
1688 BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
1689 BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
1690 BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
1691 BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
1692 BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
1693
1694 BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
1695 BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
1696
1697 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1698                                   struct btrfs_inode_item *inode_item,
1699                                   struct inode *inode)
1700 {
1701         btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
1702         btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
1703         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1704         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1705         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1706         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1707         btrfs_set_stack_inode_generation(inode_item,
1708                                          BTRFS_I(inode)->generation);
1709         btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1710         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1711         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1712         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1713         btrfs_set_stack_inode_block_group(inode_item, 0);
1714
1715         btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1716                                      inode->i_atime.tv_sec);
1717         btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1718                                       inode->i_atime.tv_nsec);
1719
1720         btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1721                                      inode->i_mtime.tv_sec);
1722         btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1723                                       inode->i_mtime.tv_nsec);
1724
1725         btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1726                                      inode->i_ctime.tv_sec);
1727         btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1728                                       inode->i_ctime.tv_nsec);
1729 }
1730
1731 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1732 {
1733         struct btrfs_delayed_node *delayed_node;
1734         struct btrfs_inode_item *inode_item;
1735         struct btrfs_timespec *tspec;
1736
1737         delayed_node = btrfs_get_delayed_node(inode);
1738         if (!delayed_node)
1739                 return -ENOENT;
1740
1741         mutex_lock(&delayed_node->mutex);
1742         if (!delayed_node->inode_dirty) {
1743                 mutex_unlock(&delayed_node->mutex);
1744                 btrfs_release_delayed_node(delayed_node);
1745                 return -ENOENT;
1746         }
1747
1748         inode_item = &delayed_node->inode_item;
1749
1750         inode->i_uid = btrfs_stack_inode_uid(inode_item);
1751         inode->i_gid = btrfs_stack_inode_gid(inode_item);
1752         btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1753         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1754         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1755         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1756         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1757         inode->i_version = btrfs_stack_inode_sequence(inode_item);
1758         inode->i_rdev = 0;
1759         *rdev = btrfs_stack_inode_rdev(inode_item);
1760         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1761
1762         tspec = btrfs_inode_atime(inode_item);
1763         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
1764         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1765
1766         tspec = btrfs_inode_mtime(inode_item);
1767         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
1768         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1769
1770         tspec = btrfs_inode_ctime(inode_item);
1771         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
1772         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1773
1774         inode->i_generation = BTRFS_I(inode)->generation;
1775         BTRFS_I(inode)->index_cnt = (u64)-1;
1776
1777         mutex_unlock(&delayed_node->mutex);
1778         btrfs_release_delayed_node(delayed_node);
1779         return 0;
1780 }
1781
1782 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1783                                struct btrfs_root *root, struct inode *inode)
1784 {
1785         struct btrfs_delayed_node *delayed_node;
1786         int ret = 0;
1787
1788         delayed_node = btrfs_get_or_create_delayed_node(inode);
1789         if (IS_ERR(delayed_node))
1790                 return PTR_ERR(delayed_node);
1791
1792         mutex_lock(&delayed_node->mutex);
1793         if (delayed_node->inode_dirty) {
1794                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1795                 goto release_node;
1796         }
1797
1798         ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
1799                                                    delayed_node);
1800         if (ret)
1801                 goto release_node;
1802
1803         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1804         delayed_node->inode_dirty = 1;
1805         delayed_node->count++;
1806         atomic_inc(&root->fs_info->delayed_root->items);
1807 release_node:
1808         mutex_unlock(&delayed_node->mutex);
1809         btrfs_release_delayed_node(delayed_node);
1810         return ret;
1811 }
1812
1813 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1814 {
1815         struct btrfs_root *root = delayed_node->root;
1816         struct btrfs_delayed_item *curr_item, *prev_item;
1817
1818         mutex_lock(&delayed_node->mutex);
1819         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1820         while (curr_item) {
1821                 btrfs_delayed_item_release_metadata(root, curr_item);
1822                 prev_item = curr_item;
1823                 curr_item = __btrfs_next_delayed_item(prev_item);
1824                 btrfs_release_delayed_item(prev_item);
1825         }
1826
1827         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1828         while (curr_item) {
1829                 btrfs_delayed_item_release_metadata(root, curr_item);
1830                 prev_item = curr_item;
1831                 curr_item = __btrfs_next_delayed_item(prev_item);
1832                 btrfs_release_delayed_item(prev_item);
1833         }
1834
1835         if (delayed_node->inode_dirty) {
1836                 btrfs_delayed_inode_release_metadata(root, delayed_node);
1837                 btrfs_release_delayed_inode(delayed_node);
1838         }
1839         mutex_unlock(&delayed_node->mutex);
1840 }
1841
1842 void btrfs_kill_delayed_inode_items(struct inode *inode)
1843 {
1844         struct btrfs_delayed_node *delayed_node;
1845
1846         delayed_node = btrfs_get_delayed_node(inode);
1847         if (!delayed_node)
1848                 return;
1849
1850         __btrfs_kill_delayed_node(delayed_node);
1851         btrfs_release_delayed_node(delayed_node);
1852 }
1853
1854 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1855 {
1856         u64 inode_id = 0;
1857         struct btrfs_delayed_node *delayed_nodes[8];
1858         int i, n;
1859
1860         while (1) {
1861                 spin_lock(&root->inode_lock);
1862                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1863                                            (void **)delayed_nodes, inode_id,
1864                                            ARRAY_SIZE(delayed_nodes));
1865                 if (!n) {
1866                         spin_unlock(&root->inode_lock);
1867                         break;
1868                 }
1869
1870                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1871
1872                 for (i = 0; i < n; i++)
1873                         atomic_inc(&delayed_nodes[i]->refs);
1874                 spin_unlock(&root->inode_lock);
1875
1876                 for (i = 0; i < n; i++) {
1877                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1878                         btrfs_release_delayed_node(delayed_nodes[i]);
1879                 }
1880         }
1881 }
1882
1883 void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1884 {
1885         struct btrfs_delayed_root *delayed_root;
1886         struct btrfs_delayed_node *curr_node, *prev_node;
1887
1888         delayed_root = btrfs_get_delayed_root(root);
1889
1890         curr_node = btrfs_first_delayed_node(delayed_root);
1891         while (curr_node) {
1892                 __btrfs_kill_delayed_node(curr_node);
1893
1894                 prev_node = curr_node;
1895                 curr_node = btrfs_next_delayed_node(curr_node);
1896                 btrfs_release_delayed_node(prev_node);
1897         }
1898 }
1899