Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux.git] / fs / reiserfs / do_balan.c
1 /*
2  * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3  */
4
5 /* Now we have all buffers that must be used in balancing of the tree   */
6 /* Further calculations can not cause schedule(), and thus the buffer   */
7 /* tree will be stable until the balancing will be finished             */
8 /* balance the tree according to the analysis made before,              */
9 /* and using buffers obtained after all above.                          */
10
11 /**
12  ** balance_leaf_when_delete
13  ** balance_leaf
14  ** do_balance
15  **
16  **/
17
18 #include <asm/uaccess.h>
19 #include <linux/time.h>
20 #include "reiserfs.h"
21 #include <linux/buffer_head.h>
22 #include <linux/kernel.h>
23
24 static inline void buffer_info_init_left(struct tree_balance *tb,
25                                          struct buffer_info *bi)
26 {
27         bi->tb          = tb;
28         bi->bi_bh       = tb->L[0];
29         bi->bi_parent   = tb->FL[0];
30         bi->bi_position = get_left_neighbor_position(tb, 0);
31 }
32
33 static inline void buffer_info_init_right(struct tree_balance *tb,
34                                           struct buffer_info *bi)
35 {
36         bi->tb          = tb;
37         bi->bi_bh       = tb->R[0];
38         bi->bi_parent   = tb->FR[0];
39         bi->bi_position = get_right_neighbor_position(tb, 0);
40 }
41
42 static inline void buffer_info_init_tbS0(struct tree_balance *tb,
43                                          struct buffer_info *bi)
44 {
45         bi->tb          = tb;
46         bi->bi_bh        = PATH_PLAST_BUFFER(tb->tb_path);
47         bi->bi_parent   = PATH_H_PPARENT(tb->tb_path, 0);
48         bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
49 }
50
51 static inline void buffer_info_init_bh(struct tree_balance *tb,
52                                        struct buffer_info *bi,
53                                        struct buffer_head *bh)
54 {
55         bi->tb          = tb;
56         bi->bi_bh       = bh;
57         bi->bi_parent   = NULL;
58         bi->bi_position = 0;
59 }
60
61 inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
62                                        struct buffer_head *bh, int flag)
63 {
64         journal_mark_dirty(tb->transaction_handle,
65                            tb->transaction_handle->t_super, bh);
66 }
67
68 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
69 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
70
71 /* summary:
72  if deleting something ( tb->insert_size[0] < 0 )
73    return(balance_leaf_when_delete()); (flag d handled here)
74  else
75    if lnum is larger than 0 we put items into the left node
76    if rnum is larger than 0 we put items into the right node
77    if snum1 is larger than 0 we put items into the new node s1
78    if snum2 is larger than 0 we put items into the new node s2
79 Note that all *num* count new items being created.
80
81 It would be easier to read balance_leaf() if each of these summary
82 lines was a separate procedure rather than being inlined.  I think
83 that there are many passages here and in balance_leaf_when_delete() in
84 which two calls to one procedure can replace two passages, and it
85 might save cache space and improve software maintenance costs to do so.
86
87 Vladimir made the perceptive comment that we should offload most of
88 the decision making in this function into fix_nodes/check_balance, and
89 then create some sort of structure in tb that says what actions should
90 be performed by do_balance.
91
92 -Hans */
93
94 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
95  *
96  * lnum, rnum can have values >= -1
97  *      -1 means that the neighbor must be joined with S
98  *       0 means that nothing should be done with the neighbor
99  *      >0 means to shift entirely or partly the specified number of items to the neighbor
100  */
101 static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
102 {
103         struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
104         int item_pos = PATH_LAST_POSITION(tb->tb_path);
105         int pos_in_item = tb->tb_path->pos_in_item;
106         struct buffer_info bi;
107         int n;
108         struct item_head *ih;
109
110         RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
111                "vs- 12000: level: wrong FR %z", tb->FR[0]);
112         RFALSE(tb->blknum[0] > 1,
113                "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
114         RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
115                "PAP-12010: tree can not be empty");
116
117         ih = B_N_PITEM_HEAD(tbS0, item_pos);
118         buffer_info_init_tbS0(tb, &bi);
119
120         /* Delete or truncate the item */
121
122         switch (flag) {
123         case M_DELETE:          /* delete item in S[0] */
124
125                 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
126                        "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
127                        -tb->insert_size[0], ih);
128
129                 leaf_delete_items(&bi, 0, item_pos, 1, -1);
130
131                 if (!item_pos && tb->CFL[0]) {
132                         if (B_NR_ITEMS(tbS0)) {
133                                 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
134                                             0);
135                         } else {
136                                 if (!PATH_H_POSITION(tb->tb_path, 1))
137                                         replace_key(tb, tb->CFL[0], tb->lkey[0],
138                                                     PATH_H_PPARENT(tb->tb_path,
139                                                                    0), 0);
140                         }
141                 }
142
143                 RFALSE(!item_pos && !tb->CFL[0],
144                        "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
145                        tb->L[0]);
146
147                 break;
148
149         case M_CUT:{            /* cut item in S[0] */
150                         if (is_direntry_le_ih(ih)) {
151
152                                 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
153                                 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
154                                 tb->insert_size[0] = -1;
155                                 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
156                                                      -tb->insert_size[0]);
157
158                                 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
159                                        "PAP-12030: can not change delimiting key. CFL[0]=%p",
160                                        tb->CFL[0]);
161
162                                 if (!item_pos && !pos_in_item && tb->CFL[0]) {
163                                         replace_key(tb, tb->CFL[0], tb->lkey[0],
164                                                     tbS0, 0);
165                                 }
166                         } else {
167                                 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
168                                                      -tb->insert_size[0]);
169
170                                 RFALSE(!ih_item_len(ih),
171                                        "PAP-12035: cut must leave non-zero dynamic length of item");
172                         }
173                         break;
174                 }
175
176         default:
177                 print_cur_tb("12040");
178                 reiserfs_panic(tb->tb_sb, "PAP-12040",
179                                "unexpected mode: %s(%d)",
180                                (flag ==
181                                 M_PASTE) ? "PASTE" : ((flag ==
182                                                        M_INSERT) ? "INSERT" :
183                                                       "UNKNOWN"), flag);
184         }
185
186         /* the rule is that no shifting occurs unless by shifting a node can be freed */
187         n = B_NR_ITEMS(tbS0);
188         if (tb->lnum[0]) {      /* L[0] takes part in balancing */
189                 if (tb->lnum[0] == -1) {        /* L[0] must be joined with S[0] */
190                         if (tb->rnum[0] == -1) {        /* R[0] must be also joined with S[0] */
191                                 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
192                                         /* all contents of all the 3 buffers will be in L[0] */
193                                         if (PATH_H_POSITION(tb->tb_path, 1) == 0
194                                             && 1 < B_NR_ITEMS(tb->FR[0]))
195                                                 replace_key(tb, tb->CFL[0],
196                                                             tb->lkey[0],
197                                                             tb->FR[0], 1);
198
199                                         leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
200                                                         -1, NULL);
201                                         leaf_move_items(LEAF_FROM_R_TO_L, tb,
202                                                         B_NR_ITEMS(tb->R[0]),
203                                                         -1, NULL);
204
205                                         reiserfs_invalidate_buffer(tb, tbS0);
206                                         reiserfs_invalidate_buffer(tb,
207                                                                    tb->R[0]);
208
209                                         return 0;
210                                 }
211                                 /* all contents of all the 3 buffers will be in R[0] */
212                                 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
213                                                 NULL);
214                                 leaf_move_items(LEAF_FROM_L_TO_R, tb,
215                                                 B_NR_ITEMS(tb->L[0]), -1, NULL);
216
217                                 /* right_delimiting_key is correct in R[0] */
218                                 replace_key(tb, tb->CFR[0], tb->rkey[0],
219                                             tb->R[0], 0);
220
221                                 reiserfs_invalidate_buffer(tb, tbS0);
222                                 reiserfs_invalidate_buffer(tb, tb->L[0]);
223
224                                 return -1;
225                         }
226
227                         RFALSE(tb->rnum[0] != 0,
228                                "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
229                         /* all contents of L[0] and S[0] will be in L[0] */
230                         leaf_shift_left(tb, n, -1);
231
232                         reiserfs_invalidate_buffer(tb, tbS0);
233
234                         return 0;
235                 }
236                 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
237
238                 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
239                        (tb->lnum[0] + tb->rnum[0] > n + 1),
240                        "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
241                        tb->rnum[0], tb->lnum[0], n);
242                 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
243                        (tb->lbytes != -1 || tb->rbytes != -1),
244                        "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
245                        tb->rbytes, tb->lbytes);
246                 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
247                        (tb->lbytes < 1 || tb->rbytes != -1),
248                        "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
249                        tb->rbytes, tb->lbytes);
250
251                 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
252                 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
253
254                 reiserfs_invalidate_buffer(tb, tbS0);
255
256                 return 0;
257         }
258
259         if (tb->rnum[0] == -1) {
260                 /* all contents of R[0] and S[0] will be in R[0] */
261                 leaf_shift_right(tb, n, -1);
262                 reiserfs_invalidate_buffer(tb, tbS0);
263                 return 0;
264         }
265
266         RFALSE(tb->rnum[0],
267                "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
268         return 0;
269 }
270
271 static int balance_leaf(struct tree_balance *tb, struct item_head *ih,  /* item header of inserted item (this is on little endian) */
272                         const char *body,       /* body  of inserted item or bytes to paste */
273                         int flag,       /* i - insert, d - delete, c - cut, p - paste
274                                            (see comment to do_balance) */
275                         struct item_head *insert_key,   /* in our processing of one level we sometimes determine what
276                                                            must be inserted into the next higher level.  This insertion
277                                                            consists of a key or two keys and their corresponding
278                                                            pointers */
279                         struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
280     )
281 {
282         struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
283         int item_pos = PATH_LAST_POSITION(tb->tb_path); /*  index into the array of item headers in S[0]
284                                                            of the affected item */
285         struct buffer_info bi;
286         struct buffer_head *S_new[2];   /* new nodes allocated to hold what could not fit into S */
287         int snum[2];            /* number of items that will be placed
288                                    into S_new (includes partially shifted
289                                    items) */
290         int sbytes[2];          /* if an item is partially shifted into S_new then
291                                    if it is a directory item
292                                    it is the number of entries from the item that are shifted into S_new
293                                    else
294                                    it is the number of bytes from the item that are shifted into S_new
295                                  */
296         int n, i;
297         int ret_val;
298         int pos_in_item;
299         int zeros_num;
300
301         PROC_INFO_INC(tb->tb_sb, balance_at[0]);
302
303         /* Make balance in case insert_size[0] < 0 */
304         if (tb->insert_size[0] < 0)
305                 return balance_leaf_when_delete(tb, flag);
306
307         zeros_num = 0;
308         if (flag == M_INSERT && !body)
309                 zeros_num = ih_item_len(ih);
310
311         pos_in_item = tb->tb_path->pos_in_item;
312         /* for indirect item pos_in_item is measured in unformatted node
313            pointers. Recalculate to bytes */
314         if (flag != M_INSERT
315             && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
316                 pos_in_item *= UNFM_P_SIZE;
317
318         if (tb->lnum[0] > 0) {
319                 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
320                 if (item_pos < tb->lnum[0]) {
321                         /* new item or it part falls to L[0], shift it too */
322                         n = B_NR_ITEMS(tb->L[0]);
323
324                         switch (flag) {
325                         case M_INSERT:  /* insert item into L[0] */
326
327                                 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
328                                         /* part of new item falls into L[0] */
329                                         int new_item_len;
330                                         int version;
331
332                                         ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, -1);
333
334                                         /* Calculate item length to insert to S[0] */
335                                         new_item_len = ih_item_len(ih) - tb->lbytes;
336                                         /* Calculate and check item length to insert to L[0] */
337                                         put_ih_item_len(ih, ih_item_len(ih) - new_item_len);
338
339                                         RFALSE(ih_item_len(ih) <= 0,
340                                                "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
341                                                ih_item_len(ih));
342
343                                         /* Insert new item into L[0] */
344                                         buffer_info_init_left(tb, &bi);
345                                         leaf_insert_into_buf(&bi,
346                                                         n + item_pos - ret_val, ih, body,
347                                                         zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
348
349                                         version = ih_version(ih);
350
351                                         /* Calculate key component, item length and body to insert into S[0] */
352                                         set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
353                                                         (tb-> lbytes << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
354
355                                         put_ih_item_len(ih, new_item_len);
356                                         if (tb->lbytes > zeros_num) {
357                                                 body += (tb->lbytes - zeros_num);
358                                                 zeros_num = 0;
359                                         } else
360                                                 zeros_num -= tb->lbytes;
361
362                                         RFALSE(ih_item_len(ih) <= 0,
363                                                "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
364                                                ih_item_len(ih));
365                                 } else {
366                                         /* new item in whole falls into L[0] */
367                                         /* Shift lnum[0]-1 items to L[0] */
368                                         ret_val = leaf_shift_left(tb, tb->lnum[0] - 1, tb->lbytes);
369                                         /* Insert new item into L[0] */
370                                         buffer_info_init_left(tb, &bi);
371                                         leaf_insert_into_buf(&bi, n + item_pos - ret_val, ih, body, zeros_num);
372                                         tb->insert_size[0] = 0;
373                                         zeros_num = 0;
374                                 }
375                                 break;
376
377                         case M_PASTE:   /* append item in L[0] */
378
379                                 if (item_pos == tb->lnum[0] - 1 && tb->lbytes != -1) {
380                                         /* we must shift the part of the appended item */
381                                         if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {
382
383                                                 RFALSE(zeros_num,
384                                                        "PAP-12090: invalid parameter in case of a directory");
385                                                 /* directory item */
386                                                 if (tb->lbytes > pos_in_item) {
387                                                         /* new directory entry falls into L[0] */
388                                                         struct item_head *pasted;
389                                                         int l_pos_in_item = pos_in_item;
390
391                                                         /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
392                                                         ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes-1);
393                                                         if (ret_val && !item_pos) {
394                                                                 pasted = B_N_PITEM_HEAD(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1);
395                                                                 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes -1);
396                                                         }
397
398                                                         /* Append given directory entry to directory item */
399                                                         buffer_info_init_left(tb, &bi);
400                                                         leaf_paste_in_buffer(&bi, n + item_pos - ret_val, l_pos_in_item, tb->insert_size[0], body, zeros_num);
401
402                                                         /* previous string prepared space for pasting new entry, following string pastes this entry */
403
404                                                         /* when we have merge directory item, pos_in_item has been changed too */
405
406                                                         /* paste new directory entry. 1 is entry number */
407                                                         leaf_paste_entries(&bi, n + item_pos - ret_val, l_pos_in_item,
408                                                                            1, (struct reiserfs_de_head *) body,
409                                                                            body + DEH_SIZE, tb->insert_size[0]);
410                                                         tb->insert_size[0] = 0;
411                                                 } else {
412                                                         /* new directory item doesn't fall into L[0] */
413                                                         /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
414                                                         leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
415                                                 }
416                                                 /* Calculate new position to append in item body */
417                                                 pos_in_item -= tb->lbytes;
418                                         } else {
419                                                 /* regular object */
420                                                 RFALSE(tb->lbytes <= 0, "PAP-12095: there is nothing to shift to L[0]. lbytes=%d", tb->lbytes);
421                                                 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
422                                                        "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
423                                                        ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),pos_in_item);
424
425                                                 if (tb->lbytes >= pos_in_item) {
426                                                         /* appended item will be in L[0] in whole */
427                                                         int l_n;
428
429                                                         /* this bytes number must be appended to the last item of L[h] */
430                                                         l_n = tb->lbytes - pos_in_item;
431
432                                                         /* Calculate new insert_size[0] */
433                                                         tb->insert_size[0] -= l_n;
434
435                                                         RFALSE(tb->insert_size[0] <= 0,
436                                                                "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
437                                                                tb->insert_size[0]);
438                                                         ret_val = leaf_shift_left(tb, tb->lnum[0], ih_item_len
439                                                                             (B_N_PITEM_HEAD(tbS0, item_pos)));
440                                                         /* Append to body of item in L[0] */
441                                                         buffer_info_init_left(tb, &bi);
442                                                         leaf_paste_in_buffer
443                                                             (&bi, n + item_pos - ret_val, ih_item_len
444                                                              (B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val)),
445                                                              l_n, body,
446                                                              zeros_num > l_n ? l_n : zeros_num);
447                                                         /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
448                                                         {
449                                                                 int version;
450                                                                 int temp_l = l_n;
451
452                                                                 RFALSE(ih_item_len(B_N_PITEM_HEAD(tbS0, 0)),
453                                                                      "PAP-12106: item length must be 0");
454                                                                 RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY
455                                                                       (tb->L[0], n + item_pos - ret_val)),
456                                                                      "PAP-12107: items must be of the same file");
457                                                                 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
458                                                                         temp_l = l_n << (tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT);
459                                                                 }
460                                                                 /* update key of first item in S0 */
461                                                                 version = ih_version(B_N_PITEM_HEAD(tbS0, 0));
462                                                                 set_le_key_k_offset(version, B_N_PKEY(tbS0, 0),
463                                                                      le_key_k_offset(version,B_N_PKEY(tbS0, 0)) + temp_l);
464                                                                 /* update left delimiting key */
465                                                                 set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
466                                                                      le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0])) + temp_l);
467                                                         }
468
469                                                         /* Calculate new body, position in item and insert_size[0] */
470                                                         if (l_n > zeros_num) {
471                                                                 body += (l_n - zeros_num);
472                                                                 zeros_num = 0;
473                                                         } else
474                                                                 zeros_num -= l_n;
475                                                         pos_in_item = 0;
476
477                                                         RFALSE(comp_short_le_keys(B_N_PKEY(tbS0, 0), B_N_PKEY(tb->L[0], B_NR_ITEMS(tb->L[0]) - 1))
478                                                              || !op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)
479                                                              || !op_is_left_mergeable(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]), tbS0->b_size),
480                                                              "PAP-12120: item must be merge-able with left neighboring item");
481                                                 } else {        /* only part of the appended item will be in L[0] */
482
483                                                         /* Calculate position in item for append in S[0] */
484                                                         pos_in_item -= tb->lbytes;
485
486                                                         RFALSE(pos_in_item <= 0, "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
487
488                                                         /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
489                                                         leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
490                                                 }
491                                         }
492                                 } else {        /* appended item will be in L[0] in whole */
493
494                                         struct item_head *pasted;
495
496                                         if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) {       /* if we paste into first item of S[0] and it is left mergable */
497                                                 /* then increment pos_in_item by the size of the last item in L[0] */
498                                                 pasted = B_N_PITEM_HEAD(tb->L[0], n - 1);
499                                                 if (is_direntry_le_ih(pasted))
500                                                         pos_in_item += ih_entry_count(pasted);
501                                                 else
502                                                         pos_in_item += ih_item_len(pasted);
503                                         }
504
505                                         /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
506                                         ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
507                                         /* Append to body of item in L[0] */
508                                         buffer_info_init_left(tb, &bi);
509                                         leaf_paste_in_buffer(&bi, n + item_pos - ret_val,
510                                                              pos_in_item,
511                                                              tb->insert_size[0],
512                                                              body, zeros_num);
513
514                                         /* if appended item is directory, paste entry */
515                                         pasted = B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val);
516                                         if (is_direntry_le_ih(pasted))
517                                                 leaf_paste_entries(&bi, n + item_pos - ret_val,
518                                                                    pos_in_item, 1,
519                                                                    (struct reiserfs_de_head *) body,
520                                                                    body + DEH_SIZE,
521                                                                    tb->insert_size[0]);
522                                         /* if appended item is indirect item, put unformatted node into un list */
523                                         if (is_indirect_le_ih(pasted))
524                                                 set_ih_free_space(pasted, 0);
525                                         tb->insert_size[0] = 0;
526                                         zeros_num = 0;
527                                 }
528                                 break;
529                         default:        /* cases d and t */
530                                 reiserfs_panic(tb->tb_sb, "PAP-12130",
531                                                "lnum > 0: unexpected mode: "
532                                                " %s(%d)",
533                                                (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
534                         }
535                 } else {
536                         /* new item doesn't fall into L[0] */
537                         leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
538                 }
539         }
540
541         /* tb->lnum[0] > 0 */
542         /* Calculate new item position */
543         item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
544
545         if (tb->rnum[0] > 0) {
546                 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
547                 n = B_NR_ITEMS(tbS0);
548                 switch (flag) {
549
550                 case M_INSERT:  /* insert item */
551                         if (n - tb->rnum[0] < item_pos) {       /* new item or its part falls to R[0] */
552                                 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) {      /* part of new item falls into R[0] */
553                                         loff_t old_key_comp, old_len, r_zeros_number;
554                                         const char *r_body;
555                                         int version;
556                                         loff_t offset;
557
558                                         leaf_shift_right(tb, tb->rnum[0] - 1, -1);
559
560                                         version = ih_version(ih);
561                                         /* Remember key component and item length */
562                                         old_key_comp = le_ih_k_offset(ih);
563                                         old_len = ih_item_len(ih);
564
565                                         /* Calculate key component and item length to insert into R[0] */
566                                         offset = le_ih_k_offset(ih) + ((old_len - tb->rbytes) << (is_indirect_le_ih(ih) ? tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT : 0));
567                                         set_le_ih_k_offset(ih, offset);
568                                         put_ih_item_len(ih, tb->rbytes);
569                                         /* Insert part of the item into R[0] */
570                                         buffer_info_init_right(tb, &bi);
571                                         if ((old_len - tb->rbytes) > zeros_num) {
572                                                 r_zeros_number = 0;
573                                                 r_body = body + (old_len - tb->rbytes) - zeros_num;
574                                         } else {
575                                                 r_body = body;
576                                                 r_zeros_number = zeros_num - (old_len - tb->rbytes);
577                                                 zeros_num -= r_zeros_number;
578                                         }
579
580                                         leaf_insert_into_buf(&bi, 0, ih, r_body,
581                                                              r_zeros_number);
582
583                                         /* Replace right delimiting key by first key in R[0] */
584                                         replace_key(tb, tb->CFR[0], tb->rkey[0],
585                                                     tb->R[0], 0);
586
587                                         /* Calculate key component and item length to insert into S[0] */
588                                         set_le_ih_k_offset(ih, old_key_comp);
589                                         put_ih_item_len(ih, old_len - tb->rbytes);
590
591                                         tb->insert_size[0] -= tb->rbytes;
592
593                                 } else {        /* whole new item falls into R[0] */
594
595                                         /* Shift rnum[0]-1 items to R[0] */
596                                         ret_val = leaf_shift_right(tb, tb->rnum[0] - 1, tb->rbytes);
597                                         /* Insert new item into R[0] */
598                                         buffer_info_init_right(tb, &bi);
599                                         leaf_insert_into_buf(&bi, item_pos - n + tb->rnum[0] - 1,
600                                                              ih, body, zeros_num);
601
602                                         if (item_pos - n + tb->rnum[0] - 1 == 0) {
603                                                 replace_key(tb, tb->CFR[0],
604                                                             tb->rkey[0],
605                                                             tb->R[0], 0);
606
607                                         }
608                                         zeros_num = tb->insert_size[0] = 0;
609                                 }
610                         } else {        /* new item or part of it doesn't fall into R[0] */
611
612                                 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
613                         }
614                         break;
615
616                 case M_PASTE:   /* append item */
617
618                         if (n - tb->rnum[0] <= item_pos) {      /* pasted item or part of it falls to R[0] */
619                                 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) {  /* we must shift the part of the appended item */
620                                         if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) {        /* we append to directory item */
621                                                 int entry_count;
622
623                                                 RFALSE(zeros_num,
624                                                        "PAP-12145: invalid parameter in case of a directory");
625                                                 entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD
626                                                                   (tbS0, item_pos));
627                                                 if (entry_count - tb->rbytes <
628                                                     pos_in_item)
629                                                         /* new directory entry falls into R[0] */
630                                                 {
631                                                         int paste_entry_position;
632
633                                                         RFALSE(tb->rbytes - 1 >= entry_count || !tb-> insert_size[0],
634                                                                "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
635                                                                tb->rbytes, entry_count);
636                                                         /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
637                                                         leaf_shift_right(tb, tb->rnum[0], tb->rbytes - 1);
638                                                         /* Paste given directory entry to directory item */
639                                                         paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
640                                                         buffer_info_init_right(tb, &bi);
641                                                         leaf_paste_in_buffer(&bi, 0, paste_entry_position, tb->insert_size[0], body, zeros_num);
642                                                         /* paste entry */
643                                                         leaf_paste_entries(&bi, 0, paste_entry_position, 1,
644                                                                            (struct reiserfs_de_head *) body,
645                                                                            body + DEH_SIZE, tb->insert_size[0]);
646
647                                                         if (paste_entry_position == 0) {
648                                                                 /* change delimiting keys */
649                                                                 replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0],0);
650                                                         }
651
652                                                         tb->insert_size[0] = 0;
653                                                         pos_in_item++;
654                                                 } else {        /* new directory entry doesn't fall into R[0] */
655
656                                                         leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
657                                                 }
658                                         } else {        /* regular object */
659
660                                                 int n_shift, n_rem, r_zeros_number;
661                                                 const char *r_body;
662
663                                                 /* Calculate number of bytes which must be shifted from appended item */
664                                                 if ((n_shift = tb->rbytes - tb->insert_size[0]) < 0)
665                                                         n_shift = 0;
666
667                                                 RFALSE(pos_in_item != ih_item_len
668                                                        (B_N_PITEM_HEAD(tbS0, item_pos)),
669                                                        "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
670                                                        pos_in_item, ih_item_len
671                                                        (B_N_PITEM_HEAD(tbS0, item_pos)));
672
673                                                 leaf_shift_right(tb, tb->rnum[0], n_shift);
674                                                 /* Calculate number of bytes which must remain in body after appending to R[0] */
675                                                 if ((n_rem = tb->insert_size[0] - tb->rbytes) < 0)
676                                                         n_rem = 0;
677
678                                                 {
679                                                         int version;
680                                                         unsigned long temp_rem = n_rem;
681
682                                                         version = ih_version(B_N_PITEM_HEAD(tb->R[0], 0));
683                                                         if (is_indirect_le_key(version, B_N_PKEY(tb->R[0], 0))) {
684                                                                 temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
685                                                         }
686                                                         set_le_key_k_offset(version, B_N_PKEY(tb->R[0], 0),
687                                                              le_key_k_offset(version, B_N_PKEY(tb->R[0], 0)) + temp_rem);
688                                                         set_le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]),
689                                                              le_key_k_offset(version, B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0])) + temp_rem);
690                                                 }
691 /*                k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
692                   k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
693                                                 do_balance_mark_internal_dirty(tb, tb->CFR[0], 0);
694
695                                                 /* Append part of body into R[0] */
696                                                 buffer_info_init_right(tb, &bi);
697                                                 if (n_rem > zeros_num) {
698                                                         r_zeros_number = 0;
699                                                         r_body = body + n_rem - zeros_num;
700                                                 } else {
701                                                         r_body = body;
702                                                         r_zeros_number = zeros_num - n_rem;
703                                                         zeros_num -= r_zeros_number;
704                                                 }
705
706                                                 leaf_paste_in_buffer(&bi, 0, n_shift,
707                                                                      tb->insert_size[0] - n_rem,
708                                                                      r_body, r_zeros_number);
709
710                                                 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->R[0], 0))) {
711 #if 0
712                                                         RFALSE(n_rem,
713                                                                "PAP-12160: paste more than one unformatted node pointer");
714 #endif
715                                                         set_ih_free_space(B_N_PITEM_HEAD(tb->R[0], 0), 0);
716                                                 }
717                                                 tb->insert_size[0] = n_rem;
718                                                 if (!n_rem)
719                                                         pos_in_item++;
720                                         }
721                                 } else {        /* pasted item in whole falls into R[0] */
722
723                                         struct item_head *pasted;
724
725                                         ret_val = leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
726                                         /* append item in R[0] */
727                                         if (pos_in_item >= 0) {
728                                                 buffer_info_init_right(tb, &bi);
729                                                 leaf_paste_in_buffer(&bi, item_pos - n + tb->rnum[0], pos_in_item,
730                                                                      tb->insert_size[0], body, zeros_num);
731                                         }
732
733                                         /* paste new entry, if item is directory item */
734                                         pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
735                                         if (is_direntry_le_ih(pasted) && pos_in_item >= 0) {
736                                                 leaf_paste_entries(&bi, item_pos - n + tb->rnum[0],
737                                                                    pos_in_item, 1,
738                                                                    (struct reiserfs_de_head *) body,
739                                                                    body + DEH_SIZE, tb->insert_size[0]);
740                                                 if (!pos_in_item) {
741
742                                                         RFALSE(item_pos - n + tb->rnum[0],
743                                                                "PAP-12165: directory item must be first item of node when pasting is in 0th position");
744
745                                                         /* update delimiting keys */
746                                                         replace_key(tb, tb->CFR[0], tb->rkey[0], tb->R[0], 0);
747                                                 }
748                                         }
749
750                                         if (is_indirect_le_ih(pasted))
751                                                 set_ih_free_space(pasted, 0);
752                                         zeros_num = tb->insert_size[0] = 0;
753                                 }
754                         } else {        /* new item doesn't fall into R[0] */
755
756                                 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
757                         }
758                         break;
759                 default:        /* cases d and t */
760                         reiserfs_panic(tb->tb_sb, "PAP-12175",
761                                        "rnum > 0: unexpected mode: %s(%d)",
762                                        (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
763                 }
764
765         }
766
767         /* tb->rnum[0] > 0 */
768         RFALSE(tb->blknum[0] > 3,
769                "PAP-12180: blknum can not be %d. It must be <= 3", tb->blknum[0]);
770         RFALSE(tb->blknum[0] < 0,
771                "PAP-12185: blknum can not be %d. It must be >= 0", tb->blknum[0]);
772
773         /* if while adding to a node we discover that it is possible to split
774            it in two, and merge the left part into the left neighbor and the
775            right part into the right neighbor, eliminating the node */
776         if (tb->blknum[0] == 0) {       /* node S[0] is empty now */
777
778                 RFALSE(!tb->lnum[0] || !tb->rnum[0],
779                        "PAP-12190: lnum and rnum must not be zero");
780                 /* if insertion was done before 0-th position in R[0], right
781                    delimiting key of the tb->L[0]'s and left delimiting key are
782                    not set correctly */
783                 if (tb->CFL[0]) {
784                         if (!tb->CFR[0])
785                                 reiserfs_panic(tb->tb_sb, "vs-12195",
786                                                "CFR not initialized");
787                         copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
788                                  B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
789                         do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
790                 }
791
792                 reiserfs_invalidate_buffer(tb, tbS0);
793                 return 0;
794         }
795
796         /* Fill new nodes that appear in place of S[0] */
797
798         /* I am told that this copying is because we need an array to enable
799            the looping code. -Hans */
800         snum[0] = tb->s1num, snum[1] = tb->s2num;
801         sbytes[0] = tb->s1bytes;
802         sbytes[1] = tb->s2bytes;
803         for (i = tb->blknum[0] - 2; i >= 0; i--) {
804
805                 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
806                        snum[i]);
807
808                 /* here we shift from S to S_new nodes */
809
810                 S_new[i] = get_FEB(tb);
811
812                 /* initialized block type and tree level */
813                 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
814
815                 n = B_NR_ITEMS(tbS0);
816
817                 switch (flag) {
818                 case M_INSERT:  /* insert item */
819
820                         if (n - snum[i] < item_pos) {   /* new item or it's part falls to first new node S_new[i] */
821                                 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) {   /* part of new item falls into S_new[i] */
822                                         int old_key_comp, old_len, r_zeros_number;
823                                         const char *r_body;
824                                         int version;
825
826                                         /* Move snum[i]-1 items from S[0] to S_new[i] */
827                                         leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
828                                                         snum[i] - 1, -1,
829                                                         S_new[i]);
830                                         /* Remember key component and item length */
831                                         version = ih_version(ih);
832                                         old_key_comp = le_ih_k_offset(ih);
833                                         old_len = ih_item_len(ih);
834
835                                         /* Calculate key component and item length to insert into S_new[i] */
836                                         set_le_ih_k_offset(ih, le_ih_k_offset(ih) +
837                                                            ((old_len - sbytes[i]) << (is_indirect_le_ih(ih) ? tb->tb_sb-> s_blocksize_bits - UNFM_P_SHIFT : 0)));
838
839                                         put_ih_item_len(ih, sbytes[i]);
840
841                                         /* Insert part of the item into S_new[i] before 0-th item */
842                                         buffer_info_init_bh(tb, &bi, S_new[i]);
843
844                                         if ((old_len - sbytes[i]) > zeros_num) {
845                                                 r_zeros_number = 0;
846                                                 r_body = body + (old_len - sbytes[i]) - zeros_num;
847                                         } else {
848                                                 r_body = body;
849                                                 r_zeros_number = zeros_num - (old_len - sbytes[i]);
850                                                 zeros_num -= r_zeros_number;
851                                         }
852
853                                         leaf_insert_into_buf(&bi, 0, ih, r_body, r_zeros_number);
854
855                                         /* Calculate key component and item length to insert into S[i] */
856                                         set_le_ih_k_offset(ih, old_key_comp);
857                                         put_ih_item_len(ih, old_len - sbytes[i]);
858                                         tb->insert_size[0] -= sbytes[i];
859                                 } else {        /* whole new item falls into S_new[i] */
860
861                                         /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
862                                         leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
863                                                         snum[i] - 1, sbytes[i], S_new[i]);
864
865                                         /* Insert new item into S_new[i] */
866                                         buffer_info_init_bh(tb, &bi, S_new[i]);
867                                         leaf_insert_into_buf(&bi, item_pos - n + snum[i] - 1,
868                                                              ih, body, zeros_num);
869
870                                         zeros_num = tb->insert_size[0] = 0;
871                                 }
872                         }
873
874                         else {  /* new item or it part don't falls into S_new[i] */
875
876                                 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
877                                                 snum[i], sbytes[i], S_new[i]);
878                         }
879                         break;
880
881                 case M_PASTE:   /* append item */
882
883                         if (n - snum[i] <= item_pos) {  /* pasted item or part if it falls to S_new[i] */
884                                 if (item_pos == n - snum[i] && sbytes[i] != -1) {       /* we must shift part of the appended item */
885                                         struct item_head *aux_ih;
886
887                                         RFALSE(ih, "PAP-12210: ih must be 0");
888
889                                         aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
890                                         if (is_direntry_le_ih(aux_ih)) {
891                                                 /* we append to directory item */
892
893                                                 int entry_count;
894
895                                                 entry_count = ih_entry_count(aux_ih);
896
897                                                 if (entry_count - sbytes[i] < pos_in_item && pos_in_item <= entry_count) {
898                                                         /* new directory entry falls into S_new[i] */
899
900                                                         RFALSE(!tb->insert_size[0], "PAP-12215: insert_size is already 0");
901                                                         RFALSE(sbytes[i] - 1 >= entry_count,
902                                                                "PAP-12220: there are no so much entries (%d), only %d",
903                                                                sbytes[i] - 1, entry_count);
904
905                                                         /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
906                                                         leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i] - 1, S_new[i]);
907                                                         /* Paste given directory entry to directory item */
908                                                         buffer_info_init_bh(tb, &bi, S_new[i]);
909                                                         leaf_paste_in_buffer(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
910                                                              tb->insert_size[0], body, zeros_num);
911                                                         /* paste new directory entry */
912                                                         leaf_paste_entries(&bi, 0, pos_in_item - entry_count + sbytes[i] - 1, 1,
913                                                                            (struct reiserfs_de_head *) body,
914                                                                            body + DEH_SIZE, tb->insert_size[0]);
915                                                         tb->insert_size[0] = 0;
916                                                         pos_in_item++;
917                                                 } else {        /* new directory entry doesn't fall into S_new[i] */
918                                                         leaf_move_items(LEAF_FROM_S_TO_SNEW,tb, snum[i], sbytes[i], S_new[i]);
919                                                 }
920                                         } else {        /* regular object */
921
922                                                 int n_shift, n_rem, r_zeros_number;
923                                                 const char *r_body;
924
925                                                 RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)) || tb->insert_size[0] <= 0,
926                                                        "PAP-12225: item too short or insert_size <= 0");
927
928                                                 /* Calculate number of bytes which must be shifted from appended item */
929                                                 n_shift = sbytes[i] - tb->insert_size[0];
930                                                 if (n_shift < 0)
931                                                         n_shift = 0;
932                                                 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
933
934                                                 /* Calculate number of bytes which must remain in body after append to S_new[i] */
935                                                 n_rem = tb->insert_size[0] - sbytes[i];
936                                                 if (n_rem < 0)
937                                                         n_rem = 0;
938                                                 /* Append part of body into S_new[0] */
939                                                 buffer_info_init_bh(tb, &bi, S_new[i]);
940                                                 if (n_rem > zeros_num) {
941                                                         r_zeros_number = 0;
942                                                         r_body = body + n_rem - zeros_num;
943                                                 } else {
944                                                         r_body = body;
945                                                         r_zeros_number = zeros_num - n_rem;
946                                                         zeros_num -= r_zeros_number;
947                                                 }
948
949                                                 leaf_paste_in_buffer(&bi, 0, n_shift,
950                                                                      tb->insert_size[0] - n_rem,
951                                                                      r_body, r_zeros_number);
952                                                 {
953                                                         struct item_head *tmp;
954
955                                                         tmp = B_N_PITEM_HEAD(S_new[i], 0);
956                                                         if (is_indirect_le_ih
957                                                             (tmp)) {
958                                                                 set_ih_free_space(tmp, 0);
959                                                                 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + (n_rem << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT)));
960                                                         } else {
961                                                                 set_le_ih_k_offset(tmp, le_ih_k_offset(tmp) + n_rem);
962                                                         }
963                                                 }
964
965                                                 tb->insert_size[0] = n_rem;
966                                                 if (!n_rem)
967                                                         pos_in_item++;
968                                         }
969                                 } else
970                                         /* item falls wholly into S_new[i] */
971                                 {
972                                         int leaf_mi;
973                                         struct item_head *pasted;
974
975 #ifdef CONFIG_REISERFS_CHECK
976                                         struct item_head *ih_check = B_N_PITEM_HEAD(tbS0, item_pos);
977
978                                         if (!is_direntry_le_ih(ih_check)
979                                             && (pos_in_item != ih_item_len(ih_check)
980                                                 || tb->insert_size[0] <= 0))
981                                                 reiserfs_panic(tb->tb_sb,
982                                                              "PAP-12235",
983                                                              "pos_in_item "
984                                                              "must be equal "
985                                                              "to ih_item_len");
986 #endif                          /* CONFIG_REISERFS_CHECK */
987
988                                         leaf_mi = leaf_move_items(LEAF_FROM_S_TO_SNEW,
989                                                             tb, snum[i],
990                                                             sbytes[i],
991                                                             S_new[i]);
992
993                                         RFALSE(leaf_mi,
994                                                "PAP-12240: unexpected value returned by leaf_move_items (%d)",
995                                                leaf_mi);
996
997                                         /* paste into item */
998                                         buffer_info_init_bh(tb, &bi, S_new[i]);
999                                         leaf_paste_in_buffer(&bi,
1000                                                              item_pos - n + snum[i],
1001                                                              pos_in_item,
1002                                                              tb->insert_size[0],
1003                                                              body, zeros_num);
1004
1005                                         pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
1006                                         if (is_direntry_le_ih(pasted)) {
1007                                                 leaf_paste_entries(&bi,
1008                                                                    item_pos - n + snum[i],
1009                                                                    pos_in_item, 1,
1010                                                                    (struct reiserfs_de_head *)body,
1011                                                                    body + DEH_SIZE,
1012                                                                    tb->insert_size[0]
1013                                                     );
1014                                         }
1015
1016                                         /* if we paste to indirect item update ih_free_space */
1017                                         if (is_indirect_le_ih(pasted))
1018                                                 set_ih_free_space(pasted, 0);
1019                                         zeros_num = tb->insert_size[0] = 0;
1020                                 }
1021                         }
1022
1023                         else {  /* pasted item doesn't fall into S_new[i] */
1024
1025                                 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1026                                                 snum[i], sbytes[i], S_new[i]);
1027                         }
1028                         break;
1029                 default:        /* cases d and t */
1030                         reiserfs_panic(tb->tb_sb, "PAP-12245",
1031                                        "blknum > 2: unexpected mode: %s(%d)",
1032                                        (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
1033                 }
1034
1035                 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1036                 insert_ptr[i] = S_new[i];
1037
1038                 RFALSE(!buffer_journaled(S_new[i])
1039                        || buffer_journal_dirty(S_new[i])
1040                        || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1041                        i, S_new[i]);
1042         }
1043
1044         /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1045            affected item which remains in S */
1046         if (0 <= item_pos && item_pos < tb->s0num) {    /* if we must insert or append into buffer S[0] */
1047
1048                 switch (flag) {
1049                 case M_INSERT:  /* insert item into S[0] */
1050                         buffer_info_init_tbS0(tb, &bi);
1051                         leaf_insert_into_buf(&bi, item_pos, ih, body,
1052                                              zeros_num);
1053
1054                         /* If we insert the first key change the delimiting key */
1055                         if (item_pos == 0) {
1056                                 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1057                                         replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1058                         }
1059                         break;
1060
1061                 case M_PASTE:{  /* append item in S[0] */
1062                                 struct item_head *pasted;
1063
1064                                 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1065                                 /* when directory, may be new entry already pasted */
1066                                 if (is_direntry_le_ih(pasted)) {
1067                                         if (pos_in_item >= 0 && pos_in_item <= ih_entry_count(pasted)) {
1068
1069                                                 RFALSE(!tb->insert_size[0],
1070                                                        "PAP-12260: insert_size is 0 already");
1071
1072                                                 /* prepare space */
1073                                                 buffer_info_init_tbS0(tb, &bi);
1074                                                 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1075                                                                      tb->insert_size[0], body,
1076                                                                      zeros_num);
1077
1078                                                 /* paste entry */
1079                                                 leaf_paste_entries(&bi, item_pos, pos_in_item, 1,
1080                                                                    (struct reiserfs_de_head *)body,
1081                                                                    body + DEH_SIZE,
1082                                                                    tb->insert_size[0]);
1083                                                 if (!item_pos && !pos_in_item) {
1084                                                         RFALSE(!tb->CFL[0] || !tb->L[0],
1085                                                                "PAP-12270: CFL[0]/L[0] must be specified");
1086                                                         if (tb->CFL[0])
1087                                                                 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0, 0);
1088                                                 }
1089                                                 tb->insert_size[0] = 0;
1090                                         }
1091                                 } else {        /* regular object */
1092                                         if (pos_in_item == ih_item_len(pasted)) {
1093
1094                                                 RFALSE(tb->insert_size[0] <= 0,
1095                                                        "PAP-12275: insert size must not be %d",
1096                                                        tb->insert_size[0]);
1097                                                 buffer_info_init_tbS0(tb, &bi);
1098                                                 leaf_paste_in_buffer(&bi, item_pos, pos_in_item,
1099                                                                      tb->insert_size[0], body, zeros_num);
1100
1101                                                 if (is_indirect_le_ih(pasted)) {
1102 #if 0
1103                                                         RFALSE(tb->
1104                                                                insert_size[0] !=
1105                                                                UNFM_P_SIZE,
1106                                                                "PAP-12280: insert_size for indirect item must be %d, not %d",
1107                                                                UNFM_P_SIZE,
1108                                                                tb->
1109                                                                insert_size[0]);
1110 #endif
1111                                                         set_ih_free_space(pasted, 0);
1112                                                 }
1113                                                 tb->insert_size[0] = 0;
1114                                         }
1115 #ifdef CONFIG_REISERFS_CHECK
1116                                         else {
1117                                                 if (tb->insert_size[0]) {
1118                                                         print_cur_tb("12285");
1119                                                         reiserfs_panic(tb->tb_sb,
1120                                                             "PAP-12285",
1121                                                             "insert_size "
1122                                                             "must be 0 "
1123                                                             "(%d)",
1124                                                             tb->insert_size[0]);
1125                                                 }
1126                                         }
1127 #endif                          /* CONFIG_REISERFS_CHECK */
1128
1129                                 }
1130                         }       /* case M_PASTE: */
1131                 }
1132         }
1133 #ifdef CONFIG_REISERFS_CHECK
1134         if (flag == M_PASTE && tb->insert_size[0]) {
1135                 print_cur_tb("12290");
1136                 reiserfs_panic(tb->tb_sb,
1137                                "PAP-12290", "insert_size is still not 0 (%d)",
1138                                tb->insert_size[0]);
1139         }
1140 #endif                          /* CONFIG_REISERFS_CHECK */
1141         return 0;
1142 }                               /* Leaf level of the tree is balanced (end of balance_leaf) */
1143
1144 /* Make empty node */
1145 void make_empty_node(struct buffer_info *bi)
1146 {
1147         struct block_head *blkh;
1148
1149         RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
1150
1151         blkh = B_BLK_HEAD(bi->bi_bh);
1152         set_blkh_nr_item(blkh, 0);
1153         set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
1154
1155         if (bi->bi_parent)
1156                 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
1157 }
1158
1159 /* Get first empty buffer */
1160 struct buffer_head *get_FEB(struct tree_balance *tb)
1161 {
1162         int i;
1163         struct buffer_info bi;
1164
1165         for (i = 0; i < MAX_FEB_SIZE; i++)
1166                 if (tb->FEB[i] != NULL)
1167                         break;
1168
1169         if (i == MAX_FEB_SIZE)
1170                 reiserfs_panic(tb->tb_sb, "vs-12300", "FEB list is empty");
1171
1172         buffer_info_init_bh(tb, &bi, tb->FEB[i]);
1173         make_empty_node(&bi);
1174         set_buffer_uptodate(tb->FEB[i]);
1175         tb->used[i] = tb->FEB[i];
1176         tb->FEB[i] = NULL;
1177
1178         return tb->used[i];
1179 }
1180
1181 /* This is now used because reiserfs_free_block has to be able to
1182 ** schedule.
1183 */
1184 static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
1185 {
1186         int i;
1187
1188         if (buffer_dirty(bh))
1189                 reiserfs_warning(tb->tb_sb, "reiserfs-12320",
1190                                  "called with dirty buffer");
1191         for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
1192                 if (!tb->thrown[i]) {
1193                         tb->thrown[i] = bh;
1194                         get_bh(bh);     /* free_thrown puts this */
1195                         return;
1196                 }
1197         reiserfs_warning(tb->tb_sb, "reiserfs-12321",
1198                          "too many thrown buffers");
1199 }
1200
1201 static void free_thrown(struct tree_balance *tb)
1202 {
1203         int i;
1204         b_blocknr_t blocknr;
1205         for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
1206                 if (tb->thrown[i]) {
1207                         blocknr = tb->thrown[i]->b_blocknr;
1208                         if (buffer_dirty(tb->thrown[i]))
1209                                 reiserfs_warning(tb->tb_sb, "reiserfs-12322",
1210                                                  "called with dirty buffer %d",
1211                                                  blocknr);
1212                         brelse(tb->thrown[i]);  /* incremented in store_thrown */
1213                         reiserfs_free_block(tb->transaction_handle, NULL,
1214                                             blocknr, 0);
1215                 }
1216         }
1217 }
1218
1219 void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
1220 {
1221         struct block_head *blkh;
1222         blkh = B_BLK_HEAD(bh);
1223         set_blkh_level(blkh, FREE_LEVEL);
1224         set_blkh_nr_item(blkh, 0);
1225
1226         clear_buffer_dirty(bh);
1227         store_thrown(tb, bh);
1228 }
1229
1230 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
1231 void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1232                  struct buffer_head *src, int n_src)
1233 {
1234
1235         RFALSE(dest == NULL || src == NULL,
1236                "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1237                src, dest);
1238         RFALSE(!B_IS_KEYS_LEVEL(dest),
1239                "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1240                dest);
1241         RFALSE(n_dest < 0 || n_src < 0,
1242                "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1243         RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1244                "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1245                n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
1246
1247         if (B_IS_ITEMS_LEVEL(src))
1248                 /* source buffer contains leaf node */
1249                 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1250                        KEY_SIZE);
1251         else
1252                 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1253                        KEY_SIZE);
1254
1255         do_balance_mark_internal_dirty(tb, dest, 0);
1256 }
1257
1258 int get_left_neighbor_position(struct tree_balance *tb, int h)
1259 {
1260         int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1261
1262         RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
1263                "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1264                h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
1265
1266         if (Sh_position == 0)
1267                 return B_NR_ITEMS(tb->FL[h]);
1268         else
1269                 return Sh_position - 1;
1270 }
1271
1272 int get_right_neighbor_position(struct tree_balance *tb, int h)
1273 {
1274         int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
1275
1276         RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
1277                "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1278                h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
1279
1280         if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1281                 return 0;
1282         else
1283                 return Sh_position + 1;
1284 }
1285
1286 #ifdef CONFIG_REISERFS_CHECK
1287
1288 int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1289 static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1290                                 char *mes)
1291 {
1292         struct disk_child *dc;
1293         int i;
1294
1295         RFALSE(!bh, "PAP-12336: bh == 0");
1296
1297         if (!bh || !B_IS_IN_TREE(bh))
1298                 return;
1299
1300         RFALSE(!buffer_dirty(bh) &&
1301                !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1302                "PAP-12337: buffer (%b) must be dirty", bh);
1303         dc = B_N_CHILD(bh, 0);
1304
1305         for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1306                 if (!is_reusable(s, dc_block_number(dc), 1)) {
1307                         print_cur_tb(mes);
1308                         reiserfs_panic(s, "PAP-12338",
1309                                        "invalid child pointer %y in %b",
1310                                        dc, bh);
1311                 }
1312         }
1313 }
1314
1315 static int locked_or_not_in_tree(struct tree_balance *tb,
1316                                   struct buffer_head *bh, char *which)
1317 {
1318         if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1319             !B_IS_IN_TREE(bh)) {
1320                 reiserfs_warning(tb->tb_sb, "vs-12339", "%s (%b)", which, bh);
1321                 return 1;
1322         }
1323         return 0;
1324 }
1325
1326 static int check_before_balancing(struct tree_balance *tb)
1327 {
1328         int retval = 0;
1329
1330         if (REISERFS_SB(tb->tb_sb)->cur_tb) {
1331                 reiserfs_panic(tb->tb_sb, "vs-12335", "suspect that schedule "
1332                                "occurred based on cur_tb not being null at "
1333                                "this point in code. do_balance cannot properly "
1334                                "handle concurrent tree accesses on a same "
1335                                "mount point.");
1336         }
1337
1338         /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1339            prepped all of these for us). */
1340         if (tb->lnum[0]) {
1341                 retval |= locked_or_not_in_tree(tb, tb->L[0], "L[0]");
1342                 retval |= locked_or_not_in_tree(tb, tb->FL[0], "FL[0]");
1343                 retval |= locked_or_not_in_tree(tb, tb->CFL[0], "CFL[0]");
1344                 check_leaf(tb->L[0]);
1345         }
1346         if (tb->rnum[0]) {
1347                 retval |= locked_or_not_in_tree(tb, tb->R[0], "R[0]");
1348                 retval |= locked_or_not_in_tree(tb, tb->FR[0], "FR[0]");
1349                 retval |= locked_or_not_in_tree(tb, tb->CFR[0], "CFR[0]");
1350                 check_leaf(tb->R[0]);
1351         }
1352         retval |= locked_or_not_in_tree(tb, PATH_PLAST_BUFFER(tb->tb_path),
1353                                         "S[0]");
1354         check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1355
1356         return retval;
1357 }
1358
1359 static void check_after_balance_leaf(struct tree_balance *tb)
1360 {
1361         if (tb->lnum[0]) {
1362                 if (B_FREE_SPACE(tb->L[0]) !=
1363                     MAX_CHILD_SIZE(tb->L[0]) -
1364                     dc_size(B_N_CHILD
1365                             (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1366                         print_cur_tb("12221");
1367                         reiserfs_panic(tb->tb_sb, "PAP-12355",
1368                                        "shift to left was incorrect");
1369                 }
1370         }
1371         if (tb->rnum[0]) {
1372                 if (B_FREE_SPACE(tb->R[0]) !=
1373                     MAX_CHILD_SIZE(tb->R[0]) -
1374                     dc_size(B_N_CHILD
1375                             (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1376                         print_cur_tb("12222");
1377                         reiserfs_panic(tb->tb_sb, "PAP-12360",
1378                                        "shift to right was incorrect");
1379                 }
1380         }
1381         if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1382             (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1383              (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1384               dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1385                                 PATH_H_POSITION(tb->tb_path, 1)))))) {
1386                 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1387                 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1388                              dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1389                                                PATH_H_POSITION(tb->tb_path,
1390                                                                1))));
1391                 print_cur_tb("12223");
1392                 reiserfs_warning(tb->tb_sb, "reiserfs-12363",
1393                                  "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1394                                  "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1395                                  left,
1396                                  MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1397                                  PATH_H_PBUFFER(tb->tb_path, 1),
1398                                  PATH_H_POSITION(tb->tb_path, 1),
1399                                  dc_size(B_N_CHILD
1400                                          (PATH_H_PBUFFER(tb->tb_path, 1),
1401                                           PATH_H_POSITION(tb->tb_path, 1))),
1402                                  right);
1403                 reiserfs_panic(tb->tb_sb, "PAP-12365", "S is incorrect");
1404         }
1405 }
1406
1407 static void check_leaf_level(struct tree_balance *tb)
1408 {
1409         check_leaf(tb->L[0]);
1410         check_leaf(tb->R[0]);
1411         check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1412 }
1413
1414 static void check_internal_levels(struct tree_balance *tb)
1415 {
1416         int h;
1417
1418         /* check all internal nodes */
1419         for (h = 1; tb->insert_size[h]; h++) {
1420                 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1421                                     "BAD BUFFER ON PATH");
1422                 if (tb->lnum[h])
1423                         check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1424                 if (tb->rnum[h])
1425                         check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1426         }
1427
1428 }
1429
1430 #endif
1431
1432 /* Now we have all of the buffers that must be used in balancing of
1433    the tree.  We rely on the assumption that schedule() will not occur
1434    while do_balance works. ( Only interrupt handlers are acceptable.)
1435    We balance the tree according to the analysis made before this,
1436    using buffers already obtained.  For SMP support it will someday be
1437    necessary to add ordered locking of tb. */
1438
1439 /* Some interesting rules of balancing:
1440
1441    we delete a maximum of two nodes per level per balancing: we never
1442    delete R, when we delete two of three nodes L, S, R then we move
1443    them into R.
1444
1445    we only delete L if we are deleting two nodes, if we delete only
1446    one node we delete S
1447
1448    if we shift leaves then we shift as much as we can: this is a
1449    deliberate policy of extremism in node packing which results in
1450    higher average utilization after repeated random balance operations
1451    at the cost of more memory copies and more balancing as a result of
1452    small insertions to full nodes.
1453
1454    if we shift internal nodes we try to evenly balance the node
1455    utilization, with consequent less balancing at the cost of lower
1456    utilization.
1457
1458    one could argue that the policy for directories in leaves should be
1459    that of internal nodes, but we will wait until another day to
1460    evaluate this....  It would be nice to someday measure and prove
1461    these assumptions as to what is optimal....
1462
1463 */
1464
1465 static inline void do_balance_starts(struct tree_balance *tb)
1466 {
1467         /* use print_cur_tb() to see initial state of struct
1468            tree_balance */
1469
1470         /* store_print_tb (tb); */
1471
1472         /* do not delete, just comment it out */
1473 /*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
1474              "check");*/
1475         RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
1476 #ifdef CONFIG_REISERFS_CHECK
1477         REISERFS_SB(tb->tb_sb)->cur_tb = tb;
1478 #endif
1479 }
1480
1481 static inline void do_balance_completed(struct tree_balance *tb)
1482 {
1483
1484 #ifdef CONFIG_REISERFS_CHECK
1485         check_leaf_level(tb);
1486         check_internal_levels(tb);
1487         REISERFS_SB(tb->tb_sb)->cur_tb = NULL;
1488 #endif
1489
1490         /* reiserfs_free_block is no longer schedule safe.  So, we need to
1491          ** put the buffers we want freed on the thrown list during do_balance,
1492          ** and then free them now
1493          */
1494
1495         REISERFS_SB(tb->tb_sb)->s_do_balance++;
1496
1497         /* release all nodes hold to perform the balancing */
1498         unfix_nodes(tb);
1499
1500         free_thrown(tb);
1501 }
1502
1503 void do_balance(struct tree_balance *tb,        /* tree_balance structure */
1504                 struct item_head *ih,   /* item header of inserted item */
1505                 const char *body,       /* body  of inserted item or bytes to paste */
1506                 int flag)
1507 {                               /* i - insert, d - delete
1508                                    c - cut, p - paste
1509
1510                                    Cut means delete part of an item
1511                                    (includes removing an entry from a
1512                                    directory).
1513
1514                                    Delete means delete whole item.
1515
1516                                    Insert means add a new item into the
1517                                    tree.
1518
1519                                    Paste means to append to the end of an
1520                                    existing file or to insert a directory
1521                                    entry.  */
1522         int child_pos,          /* position of a child node in its parent */
1523          h;                     /* level of the tree being processed */
1524         struct item_head insert_key[2]; /* in our processing of one level
1525                                            we sometimes determine what
1526                                            must be inserted into the next
1527                                            higher level.  This insertion
1528                                            consists of a key or two keys
1529                                            and their corresponding
1530                                            pointers */
1531         struct buffer_head *insert_ptr[2];      /* inserted node-ptrs for the next
1532                                                    level */
1533
1534         tb->tb_mode = flag;
1535         tb->need_balance_dirty = 0;
1536
1537         if (FILESYSTEM_CHANGED_TB(tb)) {
1538                 reiserfs_panic(tb->tb_sb, "clm-6000", "fs generation has "
1539                                "changed");
1540         }
1541         /* if we have no real work to do  */
1542         if (!tb->insert_size[0]) {
1543                 reiserfs_warning(tb->tb_sb, "PAP-12350",
1544                                  "insert_size == 0, mode == %c", flag);
1545                 unfix_nodes(tb);
1546                 return;
1547         }
1548
1549         atomic_inc(&(fs_generation(tb->tb_sb)));
1550         do_balance_starts(tb);
1551
1552         /* balance leaf returns 0 except if combining L R and S into
1553            one node.  see balance_internal() for explanation of this
1554            line of code. */
1555         child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
1556             balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
1557
1558 #ifdef CONFIG_REISERFS_CHECK
1559         check_after_balance_leaf(tb);
1560 #endif
1561
1562         /* Balance internal level of the tree. */
1563         for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
1564                 child_pos =
1565                     balance_internal(tb, h, child_pos, insert_key, insert_ptr);
1566
1567         do_balance_completed(tb);
1568
1569 }