blob: 99f80538c4bf90a00ef499ae6af8a28615f0c38d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
3 */
4
5/* Now we have all buffers that must be used in balancing of the tree */
6/* Further calculations can not cause schedule(), and thus the buffer */
7/* tree will be stable until the balancing will be finished */
8/* balance the tree according to the analysis made before, */
9/* and using buffers obtained after all above. */
10
Linus Torvalds1da177e2005-04-16 15:20:36 -070011/**
12 ** balance_leaf_when_delete
13 ** balance_leaf
14 ** do_balance
15 **
16 **/
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <linux/time.h>
20#include <linux/reiserfs_fs.h>
21#include <linux/buffer_head.h>
Ahmed S. Darwish79a81ae2007-02-12 00:52:09 -080022#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023
24#ifdef CONFIG_REISERFS_CHECK
25
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070026struct tree_balance *cur_tb = NULL; /* detects whether more than one
27 copy of tb exists as a means
28 of checking whether schedule
29 is interrupting do_balance */
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#endif
31
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070032inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
33 struct buffer_head *bh, int flag)
Linus Torvalds1da177e2005-04-16 15:20:36 -070034{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070035 journal_mark_dirty(tb->transaction_handle,
36 tb->transaction_handle->t_super, bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -070037}
38
39#define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
40#define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
41
Linus Torvalds1da177e2005-04-16 15:20:36 -070042/* summary:
43 if deleting something ( tb->insert_size[0] < 0 )
44 return(balance_leaf_when_delete()); (flag d handled here)
45 else
46 if lnum is larger than 0 we put items into the left node
47 if rnum is larger than 0 we put items into the right node
48 if snum1 is larger than 0 we put items into the new node s1
49 if snum2 is larger than 0 we put items into the new node s2
50Note that all *num* count new items being created.
51
52It would be easier to read balance_leaf() if each of these summary
53lines was a separate procedure rather than being inlined. I think
54that there are many passages here and in balance_leaf_when_delete() in
55which two calls to one procedure can replace two passages, and it
56might save cache space and improve software maintenance costs to do so.
57
58Vladimir made the perceptive comment that we should offload most of
59the decision making in this function into fix_nodes/check_balance, and
60then create some sort of structure in tb that says what actions should
61be performed by do_balance.
62
63-Hans */
64
Linus Torvalds1da177e2005-04-16 15:20:36 -070065/* Balance leaf node in case of delete or cut: insert_size[0] < 0
66 *
67 * lnum, rnum can have values >= -1
68 * -1 means that the neighbor must be joined with S
69 * 0 means that nothing should be done with the neighbor
70 * >0 means to shift entirely or partly the specified number of items to the neighbor
71 */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070072static int balance_leaf_when_delete(struct tree_balance *tb, int flag)
Linus Torvalds1da177e2005-04-16 15:20:36 -070073{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070074 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
75 int item_pos = PATH_LAST_POSITION(tb->tb_path);
76 int pos_in_item = tb->tb_path->pos_in_item;
77 struct buffer_info bi;
78 int n;
79 struct item_head *ih;
Linus Torvalds1da177e2005-04-16 15:20:36 -070080
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070081 RFALSE(tb->FR[0] && B_LEVEL(tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
82 "vs- 12000: level: wrong FR %z", tb->FR[0]);
83 RFALSE(tb->blknum[0] > 1,
84 "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
85 RFALSE(!tb->blknum[0] && !PATH_H_PPARENT(tb->tb_path, 0),
86 "PAP-12010: tree can not be empty");
Linus Torvalds1da177e2005-04-16 15:20:36 -070087
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070088 ih = B_N_PITEM_HEAD(tbS0, item_pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070090 /* Delete or truncate the item */
Linus Torvalds1da177e2005-04-16 15:20:36 -070091
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070092 switch (flag) {
93 case M_DELETE: /* delete item in S[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -070094
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070095 RFALSE(ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
96 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
97 -tb->insert_size[0], ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
Linus Torvaldsbd4c6252005-07-12 20:21:28 -070099 bi.tb = tb;
100 bi.bi_bh = tbS0;
101 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
102 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
103 leaf_delete_items(&bi, 0, item_pos, 1, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700105 if (!item_pos && tb->CFL[0]) {
106 if (B_NR_ITEMS(tbS0)) {
107 replace_key(tb, tb->CFL[0], tb->lkey[0], tbS0,
108 0);
109 } else {
110 if (!PATH_H_POSITION(tb->tb_path, 1))
111 replace_key(tb, tb->CFL[0], tb->lkey[0],
112 PATH_H_PPARENT(tb->tb_path,
113 0), 0);
114 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700117 RFALSE(!item_pos && !tb->CFL[0],
118 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0],
119 tb->L[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700121 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700123 case M_CUT:{ /* cut item in S[0] */
124 bi.tb = tb;
125 bi.bi_bh = tbS0;
126 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
127 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
128 if (is_direntry_le_ih(ih)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700130 /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
131 /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
132 tb->insert_size[0] = -1;
133 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
134 -tb->insert_size[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700136 RFALSE(!item_pos && !pos_in_item && !tb->CFL[0],
137 "PAP-12030: can not change delimiting key. CFL[0]=%p",
138 tb->CFL[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700140 if (!item_pos && !pos_in_item && tb->CFL[0]) {
141 replace_key(tb, tb->CFL[0], tb->lkey[0],
142 tbS0, 0);
143 }
144 } else {
145 leaf_cut_from_buffer(&bi, item_pos, pos_in_item,
146 -tb->insert_size[0]);
147
148 RFALSE(!ih_item_len(ih),
149 "PAP-12035: cut must leave non-zero dynamic length of item");
150 }
151 break;
152 }
153
154 default:
155 print_cur_tb("12040");
156 reiserfs_panic(tb->tb_sb,
157 "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
158 (flag ==
159 M_PASTE) ? "PASTE" : ((flag ==
160 M_INSERT) ? "INSERT" :
161 "UNKNOWN"), flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700164 /* the rule is that no shifting occurs unless by shifting a node can be freed */
165 n = B_NR_ITEMS(tbS0);
166 if (tb->lnum[0]) { /* L[0] takes part in balancing */
167 if (tb->lnum[0] == -1) { /* L[0] must be joined with S[0] */
168 if (tb->rnum[0] == -1) { /* R[0] must be also joined with S[0] */
169 if (tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0)) {
170 /* all contents of all the 3 buffers will be in L[0] */
171 if (PATH_H_POSITION(tb->tb_path, 1) == 0
172 && 1 < B_NR_ITEMS(tb->FR[0]))
173 replace_key(tb, tb->CFL[0],
174 tb->lkey[0],
175 tb->FR[0], 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700177 leaf_move_items(LEAF_FROM_S_TO_L, tb, n,
178 -1, NULL);
179 leaf_move_items(LEAF_FROM_R_TO_L, tb,
180 B_NR_ITEMS(tb->R[0]),
181 -1, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700183 reiserfs_invalidate_buffer(tb, tbS0);
184 reiserfs_invalidate_buffer(tb,
185 tb->R[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700187 return 0;
188 }
189 /* all contents of all the 3 buffers will be in R[0] */
190 leaf_move_items(LEAF_FROM_S_TO_R, tb, n, -1,
191 NULL);
192 leaf_move_items(LEAF_FROM_L_TO_R, tb,
193 B_NR_ITEMS(tb->L[0]), -1, NULL);
194
195 /* right_delimiting_key is correct in R[0] */
196 replace_key(tb, tb->CFR[0], tb->rkey[0],
197 tb->R[0], 0);
198
199 reiserfs_invalidate_buffer(tb, tbS0);
200 reiserfs_invalidate_buffer(tb, tb->L[0]);
201
202 return -1;
203 }
204
205 RFALSE(tb->rnum[0] != 0,
206 "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
207 /* all contents of L[0] and S[0] will be in L[0] */
208 leaf_shift_left(tb, n, -1);
209
210 reiserfs_invalidate_buffer(tb, tbS0);
211
212 return 0;
213 }
214 /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
215
216 RFALSE((tb->lnum[0] + tb->rnum[0] < n) ||
217 (tb->lnum[0] + tb->rnum[0] > n + 1),
218 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
219 tb->rnum[0], tb->lnum[0], n);
220 RFALSE((tb->lnum[0] + tb->rnum[0] == n) &&
221 (tb->lbytes != -1 || tb->rbytes != -1),
222 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split",
223 tb->rbytes, tb->lbytes);
224 RFALSE((tb->lnum[0] + tb->rnum[0] == n + 1) &&
225 (tb->lbytes < 1 || tb->rbytes != -1),
226 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split",
227 tb->rbytes, tb->lbytes);
228
229 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
230 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
231
232 reiserfs_invalidate_buffer(tb, tbS0);
233
234 return 0;
235 }
236
237 if (tb->rnum[0] == -1) {
238 /* all contents of R[0] and S[0] will be in R[0] */
239 leaf_shift_right(tb, n, -1);
240 reiserfs_invalidate_buffer(tb, tbS0);
241 return 0;
242 }
243
244 RFALSE(tb->rnum[0],
245 "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247}
248
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700249static int balance_leaf(struct tree_balance *tb, struct item_head *ih, /* item header of inserted item (this is on little endian) */
250 const char *body, /* body of inserted item or bytes to paste */
251 int flag, /* i - insert, d - delete, c - cut, p - paste
252 (see comment to do_balance) */
253 struct item_head *insert_key, /* in our processing of one level we sometimes determine what
254 must be inserted into the next higher level. This insertion
255 consists of a key or two keys and their corresponding
256 pointers */
257 struct buffer_head **insert_ptr /* inserted node-ptrs for the next level */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 )
259{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700260 struct buffer_head *tbS0 = PATH_PLAST_BUFFER(tb->tb_path);
261 int item_pos = PATH_LAST_POSITION(tb->tb_path); /* index into the array of item headers in S[0]
262 of the affected item */
263 struct buffer_info bi;
264 struct buffer_head *S_new[2]; /* new nodes allocated to hold what could not fit into S */
265 int snum[2]; /* number of items that will be placed
266 into S_new (includes partially shifted
267 items) */
268 int sbytes[2]; /* if an item is partially shifted into S_new then
269 if it is a directory item
270 it is the number of entries from the item that are shifted into S_new
271 else
272 it is the number of bytes from the item that are shifted into S_new
273 */
274 int n, i;
275 int ret_val;
276 int pos_in_item;
277 int zeros_num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700279 PROC_INFO_INC(tb->tb_sb, balance_at[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700281 /* Make balance in case insert_size[0] < 0 */
282 if (tb->insert_size[0] < 0)
283 return balance_leaf_when_delete(tb, flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700285 zeros_num = 0;
Al Viro9dce07f2008-03-29 03:07:28 +0000286 if (flag == M_INSERT && !body)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700287 zeros_num = ih_item_len(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700289 pos_in_item = tb->tb_path->pos_in_item;
290 /* for indirect item pos_in_item is measured in unformatted node
291 pointers. Recalculate to bytes */
292 if (flag != M_INSERT
293 && is_indirect_le_ih(B_N_PITEM_HEAD(tbS0, item_pos)))
294 pos_in_item *= UNFM_P_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700296 if (tb->lnum[0] > 0) {
297 /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
298 if (item_pos < tb->lnum[0]) {
299 /* new item or it part falls to L[0], shift it too */
300 n = B_NR_ITEMS(tb->L[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700302 switch (flag) {
303 case M_INSERT: /* insert item into L[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700305 if (item_pos == tb->lnum[0] - 1
306 && tb->lbytes != -1) {
307 /* part of new item falls into L[0] */
308 int new_item_len;
309 int version;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700311 ret_val =
312 leaf_shift_left(tb, tb->lnum[0] - 1,
313 -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700315 /* Calculate item length to insert to S[0] */
316 new_item_len =
317 ih_item_len(ih) - tb->lbytes;
318 /* Calculate and check item length to insert to L[0] */
319 put_ih_item_len(ih,
320 ih_item_len(ih) -
321 new_item_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700323 RFALSE(ih_item_len(ih) <= 0,
324 "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
325 ih_item_len(ih));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700327 /* Insert new item into L[0] */
328 bi.tb = tb;
329 bi.bi_bh = tb->L[0];
330 bi.bi_parent = tb->FL[0];
331 bi.bi_position =
332 get_left_neighbor_position(tb, 0);
333 leaf_insert_into_buf(&bi,
334 n + item_pos -
335 ret_val, ih, body,
336 zeros_num >
337 ih_item_len(ih) ?
338 ih_item_len(ih) :
339 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700341 version = ih_version(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700343 /* Calculate key component, item length and body to insert into S[0] */
344 set_le_ih_k_offset(ih,
345 le_ih_k_offset(ih) +
346 (tb->
347 lbytes <<
348 (is_indirect_le_ih
349 (ih) ? tb->tb_sb->
350 s_blocksize_bits -
351 UNFM_P_SHIFT :
352 0)));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700354 put_ih_item_len(ih, new_item_len);
355 if (tb->lbytes > zeros_num) {
356 body +=
357 (tb->lbytes - zeros_num);
358 zeros_num = 0;
359 } else
360 zeros_num -= tb->lbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700362 RFALSE(ih_item_len(ih) <= 0,
363 "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
364 ih_item_len(ih));
365 } else {
366 /* new item in whole falls into L[0] */
367 /* Shift lnum[0]-1 items to L[0] */
368 ret_val =
369 leaf_shift_left(tb, tb->lnum[0] - 1,
370 tb->lbytes);
371 /* Insert new item into L[0] */
372 bi.tb = tb;
373 bi.bi_bh = tb->L[0];
374 bi.bi_parent = tb->FL[0];
375 bi.bi_position =
376 get_left_neighbor_position(tb, 0);
377 leaf_insert_into_buf(&bi,
378 n + item_pos -
379 ret_val, ih, body,
380 zeros_num);
381 tb->insert_size[0] = 0;
382 zeros_num = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700384 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700386 case M_PASTE: /* append item in L[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700388 if (item_pos == tb->lnum[0] - 1
389 && tb->lbytes != -1) {
390 /* we must shift the part of the appended item */
391 if (is_direntry_le_ih
392 (B_N_PITEM_HEAD(tbS0, item_pos))) {
393
394 RFALSE(zeros_num,
395 "PAP-12090: invalid parameter in case of a directory");
396 /* directory item */
397 if (tb->lbytes > pos_in_item) {
398 /* new directory entry falls into L[0] */
399 struct item_head
400 *pasted;
401 int l_pos_in_item =
402 pos_in_item;
403
404 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
405 ret_val =
406 leaf_shift_left(tb,
407 tb->
408 lnum
409 [0],
410 tb->
411 lbytes
412 -
413 1);
414 if (ret_val
415 && !item_pos) {
416 pasted =
417 B_N_PITEM_HEAD
418 (tb->L[0],
419 B_NR_ITEMS
420 (tb->
421 L[0]) -
422 1);
423 l_pos_in_item +=
424 I_ENTRY_COUNT
425 (pasted) -
426 (tb->
427 lbytes -
428 1);
429 }
430
431 /* Append given directory entry to directory item */
432 bi.tb = tb;
433 bi.bi_bh = tb->L[0];
434 bi.bi_parent =
435 tb->FL[0];
436 bi.bi_position =
437 get_left_neighbor_position
438 (tb, 0);
439 leaf_paste_in_buffer
440 (&bi,
441 n + item_pos -
442 ret_val,
443 l_pos_in_item,
444 tb->insert_size[0],
445 body, zeros_num);
446
447 /* previous string prepared space for pasting new entry, following string pastes this entry */
448
449 /* when we have merge directory item, pos_in_item has been changed too */
450
451 /* paste new directory entry. 1 is entry number */
Jeff Mahoneyeba00302009-03-30 14:02:18 -0400452 leaf_paste_entries(&bi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700453 n +
454 item_pos
455 -
456 ret_val,
457 l_pos_in_item,
458 1,
459 (struct
460 reiserfs_de_head
461 *)
462 body,
463 body
464 +
465 DEH_SIZE,
466 tb->
467 insert_size
468 [0]
469 );
470 tb->insert_size[0] = 0;
471 } else {
472 /* new directory item doesn't fall into L[0] */
473 /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
474 leaf_shift_left(tb,
475 tb->
476 lnum[0],
477 tb->
478 lbytes);
479 }
480 /* Calculate new position to append in item body */
481 pos_in_item -= tb->lbytes;
482 } else {
483 /* regular object */
484 RFALSE(tb->lbytes <= 0,
485 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
486 tb->lbytes);
487 RFALSE(pos_in_item !=
488 ih_item_len
489 (B_N_PITEM_HEAD
490 (tbS0, item_pos)),
491 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
492 ih_item_len
493 (B_N_PITEM_HEAD
494 (tbS0, item_pos)),
495 pos_in_item);
496
497 if (tb->lbytes >= pos_in_item) {
498 /* appended item will be in L[0] in whole */
499 int l_n;
500
501 /* this bytes number must be appended to the last item of L[h] */
502 l_n =
503 tb->lbytes -
504 pos_in_item;
505
506 /* Calculate new insert_size[0] */
507 tb->insert_size[0] -=
508 l_n;
509
510 RFALSE(tb->
511 insert_size[0] <=
512 0,
513 "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
514 tb->
515 insert_size[0]);
516 ret_val =
517 leaf_shift_left(tb,
518 tb->
519 lnum
520 [0],
521 ih_item_len
522 (B_N_PITEM_HEAD
523 (tbS0,
524 item_pos)));
525 /* Append to body of item in L[0] */
526 bi.tb = tb;
527 bi.bi_bh = tb->L[0];
528 bi.bi_parent =
529 tb->FL[0];
530 bi.bi_position =
531 get_left_neighbor_position
532 (tb, 0);
533 leaf_paste_in_buffer
534 (&bi,
535 n + item_pos -
536 ret_val,
537 ih_item_len
538 (B_N_PITEM_HEAD
539 (tb->L[0],
540 n + item_pos -
541 ret_val)), l_n,
542 body,
543 zeros_num >
544 l_n ? l_n :
545 zeros_num);
546 /* 0-th item in S0 can be only of DIRECT type when l_n != 0 */
547 {
548 int version;
549 int temp_l =
550 l_n;
551
552 RFALSE
553 (ih_item_len
554 (B_N_PITEM_HEAD
555 (tbS0,
556 0)),
557 "PAP-12106: item length must be 0");
558 RFALSE
559 (comp_short_le_keys
560 (B_N_PKEY
561 (tbS0, 0),
562 B_N_PKEY
563 (tb->L[0],
564 n +
565 item_pos
566 -
567 ret_val)),
568 "PAP-12107: items must be of the same file");
569 if (is_indirect_le_ih(B_N_PITEM_HEAD(tb->L[0], n + item_pos - ret_val))) {
570 temp_l =
571 l_n
572 <<
573 (tb->
574 tb_sb->
575 s_blocksize_bits
576 -
577 UNFM_P_SHIFT);
578 }
579 /* update key of first item in S0 */
580 version =
581 ih_version
582 (B_N_PITEM_HEAD
583 (tbS0, 0));
584 set_le_key_k_offset
585 (version,
586 B_N_PKEY
587 (tbS0, 0),
588 le_key_k_offset
589 (version,
590 B_N_PKEY
591 (tbS0,
592 0)) +
593 temp_l);
594 /* update left delimiting key */
595 set_le_key_k_offset
596 (version,
597 B_N_PDELIM_KEY
598 (tb->
599 CFL[0],
600 tb->
601 lkey[0]),
602 le_key_k_offset
603 (version,
604 B_N_PDELIM_KEY
605 (tb->
606 CFL[0],
607 tb->
608 lkey[0]))
609 + temp_l);
610 }
611
612 /* Calculate new body, position in item and insert_size[0] */
613 if (l_n > zeros_num) {
614 body +=
615 (l_n -
616 zeros_num);
617 zeros_num = 0;
618 } else
619 zeros_num -=
620 l_n;
621 pos_in_item = 0;
622
623 RFALSE
624 (comp_short_le_keys
625 (B_N_PKEY(tbS0, 0),
626 B_N_PKEY(tb->L[0],
627 B_NR_ITEMS
628 (tb->
629 L[0]) -
630 1))
631 ||
632 !op_is_left_mergeable
633 (B_N_PKEY(tbS0, 0),
634 tbS0->b_size)
635 ||
636 !op_is_left_mergeable
637 (B_N_PDELIM_KEY
638 (tb->CFL[0],
639 tb->lkey[0]),
640 tbS0->b_size),
641 "PAP-12120: item must be merge-able with left neighboring item");
642 } else { /* only part of the appended item will be in L[0] */
643
644 /* Calculate position in item for append in S[0] */
645 pos_in_item -=
646 tb->lbytes;
647
648 RFALSE(pos_in_item <= 0,
649 "PAP-12125: no place for paste. pos_in_item=%d",
650 pos_in_item);
651
652 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
653 leaf_shift_left(tb,
654 tb->
655 lnum[0],
656 tb->
657 lbytes);
658 }
659 }
660 } else { /* appended item will be in L[0] in whole */
661
662 struct item_head *pasted;
663
664 if (!item_pos && op_is_left_mergeable(B_N_PKEY(tbS0, 0), tbS0->b_size)) { /* if we paste into first item of S[0] and it is left mergable */
665 /* then increment pos_in_item by the size of the last item in L[0] */
666 pasted =
667 B_N_PITEM_HEAD(tb->L[0],
668 n - 1);
669 if (is_direntry_le_ih(pasted))
670 pos_in_item +=
671 ih_entry_count
672 (pasted);
673 else
674 pos_in_item +=
675 ih_item_len(pasted);
676 }
677
678 /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
679 ret_val =
680 leaf_shift_left(tb, tb->lnum[0],
681 tb->lbytes);
682 /* Append to body of item in L[0] */
683 bi.tb = tb;
684 bi.bi_bh = tb->L[0];
685 bi.bi_parent = tb->FL[0];
686 bi.bi_position =
687 get_left_neighbor_position(tb, 0);
688 leaf_paste_in_buffer(&bi,
689 n + item_pos -
690 ret_val,
691 pos_in_item,
692 tb->insert_size[0],
693 body, zeros_num);
694
695 /* if appended item is directory, paste entry */
696 pasted =
697 B_N_PITEM_HEAD(tb->L[0],
698 n + item_pos -
699 ret_val);
700 if (is_direntry_le_ih(pasted))
Jeff Mahoneyeba00302009-03-30 14:02:18 -0400701 leaf_paste_entries(&bi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700702 n +
703 item_pos -
704 ret_val,
705 pos_in_item,
706 1,
707 (struct
708 reiserfs_de_head
709 *)body,
710 body +
711 DEH_SIZE,
712 tb->
713 insert_size
714 [0]
715 );
716 /* if appended item is indirect item, put unformatted node into un list */
717 if (is_indirect_le_ih(pasted))
718 set_ih_free_space(pasted, 0);
719 tb->insert_size[0] = 0;
720 zeros_num = 0;
721 }
722 break;
723 default: /* cases d and t */
724 reiserfs_panic(tb->tb_sb,
725 "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
726 (flag ==
727 M_DELETE) ? "DELETE" : ((flag ==
728 M_CUT)
729 ? "CUT"
730 :
731 "UNKNOWN"),
732 flag);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700734 } else {
735 /* new item doesn't fall into L[0] */
736 leaf_shift_left(tb, tb->lnum[0], tb->lbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700740 /* tb->lnum[0] > 0 */
741 /* Calculate new item position */
742 item_pos -= (tb->lnum[0] - ((tb->lbytes != -1) ? 1 : 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700744 if (tb->rnum[0] > 0) {
745 /* shift rnum[0] items from S[0] to the right neighbor R[0] */
746 n = B_NR_ITEMS(tbS0);
747 switch (flag) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700749 case M_INSERT: /* insert item */
750 if (n - tb->rnum[0] < item_pos) { /* new item or its part falls to R[0] */
751 if (item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1) { /* part of new item falls into R[0] */
752 loff_t old_key_comp, old_len,
753 r_zeros_number;
754 const char *r_body;
755 int version;
756 loff_t offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700758 leaf_shift_right(tb, tb->rnum[0] - 1,
759 -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700761 version = ih_version(ih);
762 /* Remember key component and item length */
763 old_key_comp = le_ih_k_offset(ih);
764 old_len = ih_item_len(ih);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700766 /* Calculate key component and item length to insert into R[0] */
767 offset =
768 le_ih_k_offset(ih) +
769 ((old_len -
770 tb->
771 rbytes) << (is_indirect_le_ih(ih)
772 ? tb->tb_sb->
773 s_blocksize_bits -
774 UNFM_P_SHIFT : 0));
775 set_le_ih_k_offset(ih, offset);
776 put_ih_item_len(ih, tb->rbytes);
777 /* Insert part of the item into R[0] */
778 bi.tb = tb;
779 bi.bi_bh = tb->R[0];
780 bi.bi_parent = tb->FR[0];
781 bi.bi_position =
782 get_right_neighbor_position(tb, 0);
783 if ((old_len - tb->rbytes) > zeros_num) {
784 r_zeros_number = 0;
785 r_body =
786 body + (old_len -
787 tb->rbytes) -
788 zeros_num;
789 } else {
790 r_body = body;
791 r_zeros_number =
792 zeros_num - (old_len -
793 tb->rbytes);
794 zeros_num -= r_zeros_number;
795 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700797 leaf_insert_into_buf(&bi, 0, ih, r_body,
798 r_zeros_number);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700800 /* Replace right delimiting key by first key in R[0] */
801 replace_key(tb, tb->CFR[0], tb->rkey[0],
802 tb->R[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700804 /* Calculate key component and item length to insert into S[0] */
805 set_le_ih_k_offset(ih, old_key_comp);
806 put_ih_item_len(ih,
807 old_len - tb->rbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700809 tb->insert_size[0] -= tb->rbytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700811 } else { /* whole new item falls into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700813 /* Shift rnum[0]-1 items to R[0] */
814 ret_val =
815 leaf_shift_right(tb,
816 tb->rnum[0] - 1,
817 tb->rbytes);
818 /* Insert new item into R[0] */
819 bi.tb = tb;
820 bi.bi_bh = tb->R[0];
821 bi.bi_parent = tb->FR[0];
822 bi.bi_position =
823 get_right_neighbor_position(tb, 0);
824 leaf_insert_into_buf(&bi,
825 item_pos - n +
826 tb->rnum[0] - 1,
827 ih, body,
828 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700830 if (item_pos - n + tb->rnum[0] - 1 == 0) {
831 replace_key(tb, tb->CFR[0],
832 tb->rkey[0],
833 tb->R[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700835 }
836 zeros_num = tb->insert_size[0] = 0;
837 }
838 } else { /* new item or part of it doesn't fall into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700840 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700842 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700844 case M_PASTE: /* append item */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700846 if (n - tb->rnum[0] <= item_pos) { /* pasted item or part of it falls to R[0] */
847 if (item_pos == n - tb->rnum[0] && tb->rbytes != -1) { /* we must shift the part of the appended item */
848 if (is_direntry_le_ih(B_N_PITEM_HEAD(tbS0, item_pos))) { /* we append to directory item */
849 int entry_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700851 RFALSE(zeros_num,
852 "PAP-12145: invalid parameter in case of a directory");
853 entry_count =
854 I_ENTRY_COUNT(B_N_PITEM_HEAD
855 (tbS0,
856 item_pos));
857 if (entry_count - tb->rbytes <
858 pos_in_item)
859 /* new directory entry falls into R[0] */
860 {
861 int paste_entry_position;
862
863 RFALSE(tb->rbytes - 1 >=
864 entry_count
865 || !tb->
866 insert_size[0],
867 "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
868 tb->rbytes,
869 entry_count);
870 /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
871 leaf_shift_right(tb,
872 tb->
873 rnum
874 [0],
875 tb->
876 rbytes
877 - 1);
878 /* Paste given directory entry to directory item */
879 paste_entry_position =
880 pos_in_item -
881 entry_count +
882 tb->rbytes - 1;
883 bi.tb = tb;
884 bi.bi_bh = tb->R[0];
885 bi.bi_parent =
886 tb->FR[0];
887 bi.bi_position =
888 get_right_neighbor_position
889 (tb, 0);
890 leaf_paste_in_buffer
891 (&bi, 0,
892 paste_entry_position,
893 tb->insert_size[0],
894 body, zeros_num);
895 /* paste entry */
Jeff Mahoneyeba00302009-03-30 14:02:18 -0400896 leaf_paste_entries(&bi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -0700897 0,
898 paste_entry_position,
899 1,
900 (struct
901 reiserfs_de_head
902 *)
903 body,
904 body
905 +
906 DEH_SIZE,
907 tb->
908 insert_size
909 [0]
910 );
911
912 if (paste_entry_position
913 == 0) {
914 /* change delimiting keys */
915 replace_key(tb,
916 tb->
917 CFR
918 [0],
919 tb->
920 rkey
921 [0],
922 tb->
923 R
924 [0],
925 0);
926 }
927
928 tb->insert_size[0] = 0;
929 pos_in_item++;
930 } else { /* new directory entry doesn't fall into R[0] */
931
932 leaf_shift_right(tb,
933 tb->
934 rnum
935 [0],
936 tb->
937 rbytes);
938 }
939 } else { /* regular object */
940
941 int n_shift, n_rem,
942 r_zeros_number;
943 const char *r_body;
944
945 /* Calculate number of bytes which must be shifted from appended item */
946 if ((n_shift =
947 tb->rbytes -
948 tb->insert_size[0]) < 0)
949 n_shift = 0;
950
951 RFALSE(pos_in_item !=
952 ih_item_len
953 (B_N_PITEM_HEAD
954 (tbS0, item_pos)),
955 "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
956 pos_in_item,
957 ih_item_len
958 (B_N_PITEM_HEAD
959 (tbS0, item_pos)));
960
961 leaf_shift_right(tb,
962 tb->rnum[0],
963 n_shift);
964 /* Calculate number of bytes which must remain in body after appending to R[0] */
965 if ((n_rem =
966 tb->insert_size[0] -
967 tb->rbytes) < 0)
968 n_rem = 0;
969
970 {
971 int version;
972 unsigned long temp_rem =
973 n_rem;
974
975 version =
976 ih_version
977 (B_N_PITEM_HEAD
978 (tb->R[0], 0));
979 if (is_indirect_le_key
980 (version,
981 B_N_PKEY(tb->R[0],
982 0))) {
983 temp_rem =
984 n_rem <<
985 (tb->tb_sb->
986 s_blocksize_bits
987 -
988 UNFM_P_SHIFT);
989 }
990 set_le_key_k_offset
991 (version,
992 B_N_PKEY(tb->R[0],
993 0),
994 le_key_k_offset
995 (version,
996 B_N_PKEY(tb->R[0],
997 0)) +
998 temp_rem);
999 set_le_key_k_offset
1000 (version,
1001 B_N_PDELIM_KEY(tb->
1002 CFR
1003 [0],
1004 tb->
1005 rkey
1006 [0]),
1007 le_key_k_offset
1008 (version,
1009 B_N_PDELIM_KEY
1010 (tb->CFR[0],
1011 tb->rkey[0])) +
1012 temp_rem);
1013 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014/* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
1015 k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001016 do_balance_mark_internal_dirty
1017 (tb, tb->CFR[0], 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001019 /* Append part of body into R[0] */
1020 bi.tb = tb;
1021 bi.bi_bh = tb->R[0];
1022 bi.bi_parent = tb->FR[0];
1023 bi.bi_position =
1024 get_right_neighbor_position
1025 (tb, 0);
1026 if (n_rem > zeros_num) {
1027 r_zeros_number = 0;
1028 r_body =
1029 body + n_rem -
1030 zeros_num;
1031 } else {
1032 r_body = body;
1033 r_zeros_number =
1034 zeros_num - n_rem;
1035 zeros_num -=
1036 r_zeros_number;
1037 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001039 leaf_paste_in_buffer(&bi, 0,
1040 n_shift,
1041 tb->
1042 insert_size
1043 [0] -
1044 n_rem,
1045 r_body,
1046 r_zeros_number);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001048 if (is_indirect_le_ih
1049 (B_N_PITEM_HEAD
1050 (tb->R[0], 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051#if 0
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001052 RFALSE(n_rem,
1053 "PAP-12160: paste more than one unformatted node pointer");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054#endif
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001055 set_ih_free_space
1056 (B_N_PITEM_HEAD
1057 (tb->R[0], 0), 0);
1058 }
1059 tb->insert_size[0] = n_rem;
1060 if (!n_rem)
1061 pos_in_item++;
1062 }
1063 } else { /* pasted item in whole falls into R[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001065 struct item_head *pasted;
1066
1067 ret_val =
1068 leaf_shift_right(tb, tb->rnum[0],
1069 tb->rbytes);
1070 /* append item in R[0] */
1071 if (pos_in_item >= 0) {
1072 bi.tb = tb;
1073 bi.bi_bh = tb->R[0];
1074 bi.bi_parent = tb->FR[0];
1075 bi.bi_position =
1076 get_right_neighbor_position
1077 (tb, 0);
1078 leaf_paste_in_buffer(&bi,
1079 item_pos -
1080 n +
1081 tb->
1082 rnum[0],
1083 pos_in_item,
1084 tb->
1085 insert_size
1086 [0], body,
1087 zeros_num);
1088 }
1089
1090 /* paste new entry, if item is directory item */
1091 pasted =
1092 B_N_PITEM_HEAD(tb->R[0],
1093 item_pos - n +
1094 tb->rnum[0]);
1095 if (is_direntry_le_ih(pasted)
1096 && pos_in_item >= 0) {
Jeff Mahoneyeba00302009-03-30 14:02:18 -04001097 leaf_paste_entries(&bi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001098 item_pos -
1099 n +
1100 tb->rnum[0],
1101 pos_in_item,
1102 1,
1103 (struct
1104 reiserfs_de_head
1105 *)body,
1106 body +
1107 DEH_SIZE,
1108 tb->
1109 insert_size
1110 [0]
1111 );
1112 if (!pos_in_item) {
1113
1114 RFALSE(item_pos - n +
1115 tb->rnum[0],
1116 "PAP-12165: directory item must be first item of node when pasting is in 0th position");
1117
1118 /* update delimiting keys */
1119 replace_key(tb,
1120 tb->CFR[0],
1121 tb->rkey[0],
1122 tb->R[0],
1123 0);
1124 }
1125 }
1126
1127 if (is_indirect_le_ih(pasted))
1128 set_ih_free_space(pasted, 0);
1129 zeros_num = tb->insert_size[0] = 0;
1130 }
1131 } else { /* new item doesn't fall into R[0] */
1132
1133 leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
1134 }
1135 break;
1136 default: /* cases d and t */
1137 reiserfs_panic(tb->tb_sb,
1138 "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
1139 (flag ==
1140 M_DELETE) ? "DELETE" : ((flag ==
1141 M_CUT) ? "CUT"
1142 : "UNKNOWN"),
1143 flag);
1144 }
1145
1146 }
1147
1148 /* tb->rnum[0] > 0 */
1149 RFALSE(tb->blknum[0] > 3,
1150 "PAP-12180: blknum can not be %d. It must be <= 3",
1151 tb->blknum[0]);
1152 RFALSE(tb->blknum[0] < 0,
1153 "PAP-12185: blknum can not be %d. It must be >= 0",
1154 tb->blknum[0]);
1155
1156 /* if while adding to a node we discover that it is possible to split
1157 it in two, and merge the left part into the left neighbor and the
1158 right part into the right neighbor, eliminating the node */
1159 if (tb->blknum[0] == 0) { /* node S[0] is empty now */
1160
1161 RFALSE(!tb->lnum[0] || !tb->rnum[0],
1162 "PAP-12190: lnum and rnum must not be zero");
1163 /* if insertion was done before 0-th position in R[0], right
1164 delimiting key of the tb->L[0]'s and left delimiting key are
1165 not set correctly */
1166 if (tb->CFL[0]) {
1167 if (!tb->CFR[0])
1168 reiserfs_panic(tb->tb_sb,
1169 "vs-12195: balance_leaf: CFR not initialized");
1170 copy_key(B_N_PDELIM_KEY(tb->CFL[0], tb->lkey[0]),
1171 B_N_PDELIM_KEY(tb->CFR[0], tb->rkey[0]));
1172 do_balance_mark_internal_dirty(tb, tb->CFL[0], 0);
1173 }
1174
1175 reiserfs_invalidate_buffer(tb, tbS0);
1176 return 0;
1177 }
1178
1179 /* Fill new nodes that appear in place of S[0] */
1180
1181 /* I am told that this copying is because we need an array to enable
1182 the looping code. -Hans */
1183 snum[0] = tb->s1num, snum[1] = tb->s2num;
1184 sbytes[0] = tb->s1bytes;
1185 sbytes[1] = tb->s2bytes;
1186 for (i = tb->blknum[0] - 2; i >= 0; i--) {
1187
1188 RFALSE(!snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i,
1189 snum[i]);
1190
1191 /* here we shift from S to S_new nodes */
1192
1193 S_new[i] = get_FEB(tb);
1194
1195 /* initialized block type and tree level */
1196 set_blkh_level(B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL);
1197
1198 n = B_NR_ITEMS(tbS0);
1199
1200 switch (flag) {
1201 case M_INSERT: /* insert item */
1202
1203 if (n - snum[i] < item_pos) { /* new item or it's part falls to first new node S_new[i] */
1204 if (item_pos == n - snum[i] + 1 && sbytes[i] != -1) { /* part of new item falls into S_new[i] */
1205 int old_key_comp, old_len,
1206 r_zeros_number;
1207 const char *r_body;
1208 int version;
1209
1210 /* Move snum[i]-1 items from S[0] to S_new[i] */
1211 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1212 snum[i] - 1, -1,
1213 S_new[i]);
1214 /* Remember key component and item length */
1215 version = ih_version(ih);
1216 old_key_comp = le_ih_k_offset(ih);
1217 old_len = ih_item_len(ih);
1218
1219 /* Calculate key component and item length to insert into S_new[i] */
1220 set_le_ih_k_offset(ih,
1221 le_ih_k_offset(ih) +
1222 ((old_len -
1223 sbytes[i]) <<
1224 (is_indirect_le_ih
1225 (ih) ? tb->tb_sb->
1226 s_blocksize_bits -
1227 UNFM_P_SHIFT :
1228 0)));
1229
1230 put_ih_item_len(ih, sbytes[i]);
1231
1232 /* Insert part of the item into S_new[i] before 0-th item */
1233 bi.tb = tb;
1234 bi.bi_bh = S_new[i];
1235 bi.bi_parent = NULL;
1236 bi.bi_position = 0;
1237
1238 if ((old_len - sbytes[i]) > zeros_num) {
1239 r_zeros_number = 0;
1240 r_body =
1241 body + (old_len -
1242 sbytes[i]) -
1243 zeros_num;
1244 } else {
1245 r_body = body;
1246 r_zeros_number =
1247 zeros_num - (old_len -
1248 sbytes[i]);
1249 zeros_num -= r_zeros_number;
1250 }
1251
1252 leaf_insert_into_buf(&bi, 0, ih, r_body,
1253 r_zeros_number);
1254
1255 /* Calculate key component and item length to insert into S[i] */
1256 set_le_ih_k_offset(ih, old_key_comp);
1257 put_ih_item_len(ih,
1258 old_len - sbytes[i]);
1259 tb->insert_size[0] -= sbytes[i];
1260 } else { /* whole new item falls into S_new[i] */
1261
1262 /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
1263 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1264 snum[i] - 1, sbytes[i],
1265 S_new[i]);
1266
1267 /* Insert new item into S_new[i] */
1268 bi.tb = tb;
1269 bi.bi_bh = S_new[i];
1270 bi.bi_parent = NULL;
1271 bi.bi_position = 0;
1272 leaf_insert_into_buf(&bi,
1273 item_pos - n +
1274 snum[i] - 1, ih,
1275 body, zeros_num);
1276
1277 zeros_num = tb->insert_size[0] = 0;
1278 }
1279 }
1280
1281 else { /* new item or it part don't falls into S_new[i] */
1282
1283 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1284 snum[i], sbytes[i], S_new[i]);
1285 }
1286 break;
1287
1288 case M_PASTE: /* append item */
1289
1290 if (n - snum[i] <= item_pos) { /* pasted item or part if it falls to S_new[i] */
1291 if (item_pos == n - snum[i] && sbytes[i] != -1) { /* we must shift part of the appended item */
1292 struct item_head *aux_ih;
1293
1294 RFALSE(ih, "PAP-12210: ih must be 0");
1295
1296 if (is_direntry_le_ih
1297 (aux_ih =
1298 B_N_PITEM_HEAD(tbS0, item_pos))) {
1299 /* we append to directory item */
1300
1301 int entry_count;
1302
1303 entry_count =
1304 ih_entry_count(aux_ih);
1305
1306 if (entry_count - sbytes[i] <
1307 pos_in_item
1308 && pos_in_item <=
1309 entry_count) {
1310 /* new directory entry falls into S_new[i] */
1311
1312 RFALSE(!tb->
1313 insert_size[0],
1314 "PAP-12215: insert_size is already 0");
1315 RFALSE(sbytes[i] - 1 >=
1316 entry_count,
1317 "PAP-12220: there are no so much entries (%d), only %d",
1318 sbytes[i] - 1,
1319 entry_count);
1320
1321 /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
1322 leaf_move_items
1323 (LEAF_FROM_S_TO_SNEW,
1324 tb, snum[i],
1325 sbytes[i] - 1,
1326 S_new[i]);
1327 /* Paste given directory entry to directory item */
1328 bi.tb = tb;
1329 bi.bi_bh = S_new[i];
1330 bi.bi_parent = NULL;
1331 bi.bi_position = 0;
1332 leaf_paste_in_buffer
1333 (&bi, 0,
1334 pos_in_item -
1335 entry_count +
1336 sbytes[i] - 1,
1337 tb->insert_size[0],
1338 body, zeros_num);
1339 /* paste new directory entry */
Jeff Mahoneyeba00302009-03-30 14:02:18 -04001340 leaf_paste_entries(&bi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001341 0,
1342 pos_in_item
1343 -
1344 entry_count
1345 +
1346 sbytes
1347 [i] -
1348 1, 1,
1349 (struct
1350 reiserfs_de_head
1351 *)
1352 body,
1353 body
1354 +
1355 DEH_SIZE,
1356 tb->
1357 insert_size
1358 [0]
1359 );
1360 tb->insert_size[0] = 0;
1361 pos_in_item++;
1362 } else { /* new directory entry doesn't fall into S_new[i] */
1363 leaf_move_items
1364 (LEAF_FROM_S_TO_SNEW,
1365 tb, snum[i],
1366 sbytes[i],
1367 S_new[i]);
1368 }
1369 } else { /* regular object */
1370
1371 int n_shift, n_rem,
1372 r_zeros_number;
1373 const char *r_body;
1374
1375 RFALSE(pos_in_item !=
1376 ih_item_len
1377 (B_N_PITEM_HEAD
1378 (tbS0, item_pos))
1379 || tb->insert_size[0] <=
1380 0,
1381 "PAP-12225: item too short or insert_size <= 0");
1382
1383 /* Calculate number of bytes which must be shifted from appended item */
1384 n_shift =
1385 sbytes[i] -
1386 tb->insert_size[0];
1387 if (n_shift < 0)
1388 n_shift = 0;
1389 leaf_move_items
1390 (LEAF_FROM_S_TO_SNEW, tb,
1391 snum[i], n_shift,
1392 S_new[i]);
1393
1394 /* Calculate number of bytes which must remain in body after append to S_new[i] */
1395 n_rem =
1396 tb->insert_size[0] -
1397 sbytes[i];
1398 if (n_rem < 0)
1399 n_rem = 0;
1400 /* Append part of body into S_new[0] */
1401 bi.tb = tb;
1402 bi.bi_bh = S_new[i];
1403 bi.bi_parent = NULL;
1404 bi.bi_position = 0;
1405
1406 if (n_rem > zeros_num) {
1407 r_zeros_number = 0;
1408 r_body =
1409 body + n_rem -
1410 zeros_num;
1411 } else {
1412 r_body = body;
1413 r_zeros_number =
1414 zeros_num - n_rem;
1415 zeros_num -=
1416 r_zeros_number;
1417 }
1418
1419 leaf_paste_in_buffer(&bi, 0,
1420 n_shift,
1421 tb->
1422 insert_size
1423 [0] -
1424 n_rem,
1425 r_body,
1426 r_zeros_number);
1427 {
1428 struct item_head *tmp;
1429
1430 tmp =
1431 B_N_PITEM_HEAD(S_new
1432 [i],
1433 0);
1434 if (is_indirect_le_ih
1435 (tmp)) {
1436 set_ih_free_space
1437 (tmp, 0);
1438 set_le_ih_k_offset
1439 (tmp,
1440 le_ih_k_offset
1441 (tmp) +
1442 (n_rem <<
1443 (tb->
1444 tb_sb->
1445 s_blocksize_bits
1446 -
1447 UNFM_P_SHIFT)));
1448 } else {
1449 set_le_ih_k_offset
1450 (tmp,
1451 le_ih_k_offset
1452 (tmp) +
1453 n_rem);
1454 }
1455 }
1456
1457 tb->insert_size[0] = n_rem;
1458 if (!n_rem)
1459 pos_in_item++;
1460 }
1461 } else
1462 /* item falls wholly into S_new[i] */
1463 {
Harvey Harrison8acc5702008-04-28 02:16:21 -07001464 int leaf_mi;
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001465 struct item_head *pasted;
1466
1467#ifdef CONFIG_REISERFS_CHECK
Harvey Harrison8acc5702008-04-28 02:16:21 -07001468 struct item_head *ih_check =
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001469 B_N_PITEM_HEAD(tbS0, item_pos);
1470
Harvey Harrison8acc5702008-04-28 02:16:21 -07001471 if (!is_direntry_le_ih(ih_check)
1472 && (pos_in_item != ih_item_len(ih_check)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001473 || tb->insert_size[0] <= 0))
1474 reiserfs_panic(tb->tb_sb,
1475 "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
1476#endif /* CONFIG_REISERFS_CHECK */
1477
Harvey Harrison8acc5702008-04-28 02:16:21 -07001478 leaf_mi =
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001479 leaf_move_items(LEAF_FROM_S_TO_SNEW,
1480 tb, snum[i],
1481 sbytes[i],
1482 S_new[i]);
1483
Harvey Harrison8acc5702008-04-28 02:16:21 -07001484 RFALSE(leaf_mi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001485 "PAP-12240: unexpected value returned by leaf_move_items (%d)",
Harvey Harrison8acc5702008-04-28 02:16:21 -07001486 leaf_mi);
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001487
1488 /* paste into item */
1489 bi.tb = tb;
1490 bi.bi_bh = S_new[i];
1491 bi.bi_parent = NULL;
1492 bi.bi_position = 0;
1493 leaf_paste_in_buffer(&bi,
1494 item_pos - n +
1495 snum[i],
1496 pos_in_item,
1497 tb->insert_size[0],
1498 body, zeros_num);
1499
1500 pasted =
1501 B_N_PITEM_HEAD(S_new[i],
1502 item_pos - n +
1503 snum[i]);
1504 if (is_direntry_le_ih(pasted)) {
Jeff Mahoneyeba00302009-03-30 14:02:18 -04001505 leaf_paste_entries(&bi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001506 item_pos -
1507 n + snum[i],
1508 pos_in_item,
1509 1,
1510 (struct
1511 reiserfs_de_head
1512 *)body,
1513 body +
1514 DEH_SIZE,
1515 tb->
1516 insert_size
1517 [0]
1518 );
1519 }
1520
1521 /* if we paste to indirect item update ih_free_space */
1522 if (is_indirect_le_ih(pasted))
1523 set_ih_free_space(pasted, 0);
1524 zeros_num = tb->insert_size[0] = 0;
1525 }
1526 }
1527
1528 else { /* pasted item doesn't fall into S_new[i] */
1529
1530 leaf_move_items(LEAF_FROM_S_TO_SNEW, tb,
1531 snum[i], sbytes[i], S_new[i]);
1532 }
1533 break;
1534 default: /* cases d and t */
1535 reiserfs_panic(tb->tb_sb,
1536 "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
1537 (flag ==
1538 M_DELETE) ? "DELETE" : ((flag ==
1539 M_CUT) ? "CUT"
1540 : "UNKNOWN"),
1541 flag);
1542 }
1543
1544 memcpy(insert_key + i, B_N_PKEY(S_new[i], 0), KEY_SIZE);
1545 insert_ptr[i] = S_new[i];
1546
1547 RFALSE(!buffer_journaled(S_new[i])
1548 || buffer_journal_dirty(S_new[i])
1549 || buffer_dirty(S_new[i]), "PAP-12247: S_new[%d] : (%b)",
1550 i, S_new[i]);
1551 }
1552
1553 /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
1554 affected item which remains in S */
1555 if (0 <= item_pos && item_pos < tb->s0num) { /* if we must insert or append into buffer S[0] */
1556
1557 switch (flag) {
1558 case M_INSERT: /* insert item into S[0] */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 bi.tb = tb;
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001560 bi.bi_bh = tbS0;
1561 bi.bi_parent = PATH_H_PPARENT(tb->tb_path, 0);
1562 bi.bi_position = PATH_H_POSITION(tb->tb_path, 1);
1563 leaf_insert_into_buf(&bi, item_pos, ih, body,
1564 zeros_num);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001566 /* If we insert the first key change the delimiting key */
1567 if (item_pos == 0) {
1568 if (tb->CFL[0]) /* can be 0 in reiserfsck */
1569 replace_key(tb, tb->CFL[0], tb->lkey[0],
1570 tbS0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001573 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001575 case M_PASTE:{ /* append item in S[0] */
1576 struct item_head *pasted;
1577
1578 pasted = B_N_PITEM_HEAD(tbS0, item_pos);
1579 /* when directory, may be new entry already pasted */
1580 if (is_direntry_le_ih(pasted)) {
1581 if (pos_in_item >= 0 &&
1582 pos_in_item <=
1583 ih_entry_count(pasted)) {
1584
1585 RFALSE(!tb->insert_size[0],
1586 "PAP-12260: insert_size is 0 already");
1587
1588 /* prepare space */
1589 bi.tb = tb;
1590 bi.bi_bh = tbS0;
1591 bi.bi_parent =
1592 PATH_H_PPARENT(tb->tb_path,
1593 0);
1594 bi.bi_position =
1595 PATH_H_POSITION(tb->tb_path,
1596 1);
1597 leaf_paste_in_buffer(&bi,
1598 item_pos,
1599 pos_in_item,
1600 tb->
1601 insert_size
1602 [0], body,
1603 zeros_num);
1604
1605 /* paste entry */
Jeff Mahoneyeba00302009-03-30 14:02:18 -04001606 leaf_paste_entries(&bi,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001607 item_pos,
1608 pos_in_item,
1609 1,
1610 (struct
1611 reiserfs_de_head
1612 *)body,
1613 body +
1614 DEH_SIZE,
1615 tb->
1616 insert_size
1617 [0]
1618 );
1619 if (!item_pos && !pos_in_item) {
1620 RFALSE(!tb->CFL[0]
1621 || !tb->L[0],
1622 "PAP-12270: CFL[0]/L[0] must be specified");
1623 if (tb->CFL[0]) {
1624 replace_key(tb,
1625 tb->
1626 CFL
1627 [0],
1628 tb->
1629 lkey
1630 [0],
1631 tbS0,
1632 0);
1633
1634 }
1635 }
1636 tb->insert_size[0] = 0;
1637 }
1638 } else { /* regular object */
1639 if (pos_in_item == ih_item_len(pasted)) {
1640
1641 RFALSE(tb->insert_size[0] <= 0,
1642 "PAP-12275: insert size must not be %d",
1643 tb->insert_size[0]);
1644 bi.tb = tb;
1645 bi.bi_bh = tbS0;
1646 bi.bi_parent =
1647 PATH_H_PPARENT(tb->tb_path,
1648 0);
1649 bi.bi_position =
1650 PATH_H_POSITION(tb->tb_path,
1651 1);
1652 leaf_paste_in_buffer(&bi,
1653 item_pos,
1654 pos_in_item,
1655 tb->
1656 insert_size
1657 [0], body,
1658 zeros_num);
1659
1660 if (is_indirect_le_ih(pasted)) {
1661#if 0
1662 RFALSE(tb->
1663 insert_size[0] !=
1664 UNFM_P_SIZE,
1665 "PAP-12280: insert_size for indirect item must be %d, not %d",
1666 UNFM_P_SIZE,
1667 tb->
1668 insert_size[0]);
1669#endif
1670 set_ih_free_space
1671 (pasted, 0);
1672 }
1673 tb->insert_size[0] = 0;
1674 }
1675#ifdef CONFIG_REISERFS_CHECK
1676 else {
1677 if (tb->insert_size[0]) {
1678 print_cur_tb("12285");
1679 reiserfs_panic(tb->
1680 tb_sb,
1681 "PAP-12285: balance_leaf: insert_size must be 0 (%d)",
1682 tb->
1683 insert_size
1684 [0]);
1685 }
1686 }
1687#endif /* CONFIG_REISERFS_CHECK */
1688
1689 }
1690 } /* case M_PASTE: */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001693#ifdef CONFIG_REISERFS_CHECK
1694 if (flag == M_PASTE && tb->insert_size[0]) {
1695 print_cur_tb("12290");
1696 reiserfs_panic(tb->tb_sb,
1697 "PAP-12290: balance_leaf: insert_size is still not 0 (%d)",
1698 tb->insert_size[0]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001700#endif /* CONFIG_REISERFS_CHECK */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 return 0;
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001703} /* Leaf level of the tree is balanced (end of balance_leaf) */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704
1705/* Make empty node */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001706void make_empty_node(struct buffer_info *bi)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001708 struct block_head *blkh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001710 RFALSE(bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001712 blkh = B_BLK_HEAD(bi->bi_bh);
1713 set_blkh_nr_item(blkh, 0);
1714 set_blkh_free_space(blkh, MAX_CHILD_SIZE(bi->bi_bh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001716 if (bi->bi_parent)
1717 B_N_CHILD(bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718}
1719
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720/* Get first empty buffer */
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001721struct buffer_head *get_FEB(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001723 int i;
1724 struct buffer_head *first_b;
1725 struct buffer_info bi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001727 for (i = 0; i < MAX_FEB_SIZE; i++)
Al Viro9dce07f2008-03-29 03:07:28 +00001728 if (tb->FEB[i] != NULL)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001729 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001731 if (i == MAX_FEB_SIZE)
1732 reiserfs_panic(tb->tb_sb,
1733 "vs-12300: get_FEB: FEB list is empty");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001735 bi.tb = tb;
1736 bi.bi_bh = first_b = tb->FEB[i];
1737 bi.bi_parent = NULL;
1738 bi.bi_position = 0;
1739 make_empty_node(&bi);
1740 set_buffer_uptodate(first_b);
1741 tb->FEB[i] = NULL;
1742 tb->used[i] = first_b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001744 return (first_b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745}
1746
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747/* This is now used because reiserfs_free_block has to be able to
1748** schedule.
1749*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001750static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001752 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001754 if (buffer_dirty(bh))
1755 reiserfs_warning(tb->tb_sb,
1756 "store_thrown deals with dirty buffer");
Ahmed S. Darwish79a81ae2007-02-12 00:52:09 -08001757 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001758 if (!tb->thrown[i]) {
1759 tb->thrown[i] = bh;
1760 get_bh(bh); /* free_thrown puts this */
1761 return;
1762 }
1763 reiserfs_warning(tb->tb_sb, "store_thrown: too many thrown buffers");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764}
1765
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001766static void free_thrown(struct tree_balance *tb)
1767{
1768 int i;
1769 b_blocknr_t blocknr;
Ahmed S. Darwish79a81ae2007-02-12 00:52:09 -08001770 for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001771 if (tb->thrown[i]) {
1772 blocknr = tb->thrown[i]->b_blocknr;
1773 if (buffer_dirty(tb->thrown[i]))
1774 reiserfs_warning(tb->tb_sb,
1775 "free_thrown deals with dirty buffer %d",
1776 blocknr);
1777 brelse(tb->thrown[i]); /* incremented in store_thrown */
1778 reiserfs_free_block(tb->transaction_handle, NULL,
1779 blocknr, 0);
1780 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782}
1783
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001784void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001786 struct block_head *blkh;
1787 blkh = B_BLK_HEAD(bh);
1788 set_blkh_level(blkh, FREE_LEVEL);
1789 set_blkh_nr_item(blkh, 0);
1790
1791 clear_buffer_dirty(bh);
1792 store_thrown(tb, bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793}
1794
1795/* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001796void replace_key(struct tree_balance *tb, struct buffer_head *dest, int n_dest,
1797 struct buffer_head *src, int n_src)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798{
1799
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001800 RFALSE(dest == NULL || src == NULL,
1801 "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
1802 src, dest);
1803 RFALSE(!B_IS_KEYS_LEVEL(dest),
1804 "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
1805 dest);
1806 RFALSE(n_dest < 0 || n_src < 0,
1807 "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
1808 RFALSE(n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
1809 "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
1810 n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001812 if (B_IS_ITEMS_LEVEL(src))
1813 /* source buffer contains leaf node */
1814 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PITEM_HEAD(src, n_src),
1815 KEY_SIZE);
1816 else
1817 memcpy(B_N_PDELIM_KEY(dest, n_dest), B_N_PDELIM_KEY(src, n_src),
1818 KEY_SIZE);
1819
1820 do_balance_mark_internal_dirty(tb, dest, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821}
1822
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001823int get_left_neighbor_position(struct tree_balance *tb, int h)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001825 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826
Al Viro9dce07f2008-03-29 03:07:28 +00001827 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FL[h] == NULL,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001828 "vs-12325: FL[%d](%p) or F[%d](%p) does not exist",
1829 h, tb->FL[h], h, PATH_H_PPARENT(tb->tb_path, h));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001831 if (Sh_position == 0)
1832 return B_NR_ITEMS(tb->FL[h]);
1833 else
1834 return Sh_position - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835}
1836
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001837int get_right_neighbor_position(struct tree_balance *tb, int h)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001839 int Sh_position = PATH_H_POSITION(tb->tb_path, h + 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840
Al Viro9dce07f2008-03-29 03:07:28 +00001841 RFALSE(PATH_H_PPARENT(tb->tb_path, h) == NULL || tb->FR[h] == NULL,
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001842 "vs-12330: F[%d](%p) or FR[%d](%p) does not exist",
1843 h, PATH_H_PPARENT(tb->tb_path, h), h, tb->FR[h]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001845 if (Sh_position == B_NR_ITEMS(PATH_H_PPARENT(tb->tb_path, h)))
1846 return 0;
1847 else
1848 return Sh_position + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849}
1850
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851#ifdef CONFIG_REISERFS_CHECK
1852
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001853int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value);
1854static void check_internal_node(struct super_block *s, struct buffer_head *bh,
1855 char *mes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001857 struct disk_child *dc;
1858 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001860 RFALSE(!bh, "PAP-12336: bh == 0");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001862 if (!bh || !B_IS_IN_TREE(bh))
1863 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001865 RFALSE(!buffer_dirty(bh) &&
1866 !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
1867 "PAP-12337: buffer (%b) must be dirty", bh);
1868 dc = B_N_CHILD(bh, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001870 for (i = 0; i <= B_NR_ITEMS(bh); i++, dc++) {
1871 if (!is_reusable(s, dc_block_number(dc), 1)) {
1872 print_cur_tb(mes);
1873 reiserfs_panic(s,
1874 "PAP-12338: check_internal_node: invalid child pointer %y in %b",
1875 dc, bh);
1876 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001878}
1879
1880static int locked_or_not_in_tree(struct buffer_head *bh, char *which)
1881{
1882 if ((!buffer_journal_prepared(bh) && buffer_locked(bh)) ||
1883 !B_IS_IN_TREE(bh)) {
1884 reiserfs_warning(NULL,
1885 "vs-12339: locked_or_not_in_tree: %s (%b)",
1886 which, bh);
1887 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 }
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001889 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890}
1891
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001892static int check_before_balancing(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001894 int retval = 0;
1895
1896 if (cur_tb) {
1897 reiserfs_panic(tb->tb_sb, "vs-12335: check_before_balancing: "
1898 "suspect that schedule occurred based on cur_tb not being null at this point in code. "
1899 "do_balance cannot properly handle schedule occurring while it runs.");
1900 }
1901
1902 /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
1903 prepped all of these for us). */
1904 if (tb->lnum[0]) {
1905 retval |= locked_or_not_in_tree(tb->L[0], "L[0]");
1906 retval |= locked_or_not_in_tree(tb->FL[0], "FL[0]");
1907 retval |= locked_or_not_in_tree(tb->CFL[0], "CFL[0]");
1908 check_leaf(tb->L[0]);
1909 }
1910 if (tb->rnum[0]) {
1911 retval |= locked_or_not_in_tree(tb->R[0], "R[0]");
1912 retval |= locked_or_not_in_tree(tb->FR[0], "FR[0]");
1913 retval |= locked_or_not_in_tree(tb->CFR[0], "CFR[0]");
1914 check_leaf(tb->R[0]);
1915 }
1916 retval |= locked_or_not_in_tree(PATH_PLAST_BUFFER(tb->tb_path), "S[0]");
1917 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1918
1919 return retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920}
1921
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001922static void check_after_balance_leaf(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001924 if (tb->lnum[0]) {
1925 if (B_FREE_SPACE(tb->L[0]) !=
1926 MAX_CHILD_SIZE(tb->L[0]) -
1927 dc_size(B_N_CHILD
1928 (tb->FL[0], get_left_neighbor_position(tb, 0)))) {
1929 print_cur_tb("12221");
1930 reiserfs_panic(tb->tb_sb,
1931 "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
1932 }
1933 }
1934 if (tb->rnum[0]) {
1935 if (B_FREE_SPACE(tb->R[0]) !=
1936 MAX_CHILD_SIZE(tb->R[0]) -
1937 dc_size(B_N_CHILD
1938 (tb->FR[0], get_right_neighbor_position(tb, 0)))) {
1939 print_cur_tb("12222");
1940 reiserfs_panic(tb->tb_sb,
1941 "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
1942 }
1943 }
1944 if (PATH_H_PBUFFER(tb->tb_path, 1) &&
1945 (B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0)) !=
1946 (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1947 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1948 PATH_H_POSITION(tb->tb_path, 1)))))) {
1949 int left = B_FREE_SPACE(PATH_H_PBUFFER(tb->tb_path, 0));
1950 int right = (MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)) -
1951 dc_size(B_N_CHILD(PATH_H_PBUFFER(tb->tb_path, 1),
1952 PATH_H_POSITION(tb->tb_path,
1953 1))));
1954 print_cur_tb("12223");
1955 reiserfs_warning(tb->tb_sb,
1956 "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
1957 "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d",
1958 left,
1959 MAX_CHILD_SIZE(PATH_H_PBUFFER(tb->tb_path, 0)),
1960 PATH_H_PBUFFER(tb->tb_path, 1),
1961 PATH_H_POSITION(tb->tb_path, 1),
1962 dc_size(B_N_CHILD
1963 (PATH_H_PBUFFER(tb->tb_path, 1),
1964 PATH_H_POSITION(tb->tb_path, 1))),
1965 right);
1966 reiserfs_panic(tb->tb_sb,
1967 "PAP-12365: check_after_balance_leaf: S is incorrect");
1968 }
1969}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07001971static void check_leaf_level(struct tree_balance *tb)
1972{
1973 check_leaf(tb->L[0]);
1974 check_leaf(tb->R[0]);
1975 check_leaf(PATH_PLAST_BUFFER(tb->tb_path));
1976}
1977
1978static void check_internal_levels(struct tree_balance *tb)
1979{
1980 int h;
1981
1982 /* check all internal nodes */
1983 for (h = 1; tb->insert_size[h]; h++) {
1984 check_internal_node(tb->tb_sb, PATH_H_PBUFFER(tb->tb_path, h),
1985 "BAD BUFFER ON PATH");
1986 if (tb->lnum[h])
1987 check_internal_node(tb->tb_sb, tb->L[h], "BAD L");
1988 if (tb->rnum[h])
1989 check_internal_node(tb->tb_sb, tb->R[h], "BAD R");
1990 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991
1992}
1993
1994#endif
1995
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996/* Now we have all of the buffers that must be used in balancing of
1997 the tree. We rely on the assumption that schedule() will not occur
1998 while do_balance works. ( Only interrupt handlers are acceptable.)
1999 We balance the tree according to the analysis made before this,
2000 using buffers already obtained. For SMP support it will someday be
2001 necessary to add ordered locking of tb. */
2002
2003/* Some interesting rules of balancing:
2004
2005 we delete a maximum of two nodes per level per balancing: we never
2006 delete R, when we delete two of three nodes L, S, R then we move
2007 them into R.
2008
2009 we only delete L if we are deleting two nodes, if we delete only
2010 one node we delete S
2011
2012 if we shift leaves then we shift as much as we can: this is a
2013 deliberate policy of extremism in node packing which results in
2014 higher average utilization after repeated random balance operations
2015 at the cost of more memory copies and more balancing as a result of
2016 small insertions to full nodes.
2017
2018 if we shift internal nodes we try to evenly balance the node
2019 utilization, with consequent less balancing at the cost of lower
2020 utilization.
2021
2022 one could argue that the policy for directories in leaves should be
2023 that of internal nodes, but we will wait until another day to
2024 evaluate this.... It would be nice to someday measure and prove
2025 these assumptions as to what is optimal....
2026
2027*/
2028
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002029static inline void do_balance_starts(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002031 /* use print_cur_tb() to see initial state of struct
2032 tree_balance */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002034 /* store_print_tb (tb); */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002036 /* do not delete, just comment it out */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037/* print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb,
2038 "check");*/
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002039 RFALSE(check_before_balancing(tb), "PAP-12340: locked buffers in TB");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002041 cur_tb = tb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042#endif
2043}
2044
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002045static inline void do_balance_completed(struct tree_balance *tb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046{
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002047
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002049 check_leaf_level(tb);
2050 check_internal_levels(tb);
2051 cur_tb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052#endif
2053
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002054 /* reiserfs_free_block is no longer schedule safe. So, we need to
2055 ** put the buffers we want freed on the thrown list during do_balance,
2056 ** and then free them now
2057 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002059 REISERFS_SB(tb->tb_sb)->s_do_balance++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002061 /* release all nodes hold to perform the balancing */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 unfix_nodes(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002064 free_thrown(tb);
2065}
2066
2067void do_balance(struct tree_balance *tb, /* tree_balance structure */
2068 struct item_head *ih, /* item header of inserted item */
2069 const char *body, /* body of inserted item or bytes to paste */
2070 int flag)
2071{ /* i - insert, d - delete
2072 c - cut, p - paste
2073
2074 Cut means delete part of an item
2075 (includes removing an entry from a
2076 directory).
2077
2078 Delete means delete whole item.
2079
2080 Insert means add a new item into the
2081 tree.
2082
2083 Paste means to append to the end of an
2084 existing file or to insert a directory
2085 entry. */
2086 int child_pos, /* position of a child node in its parent */
2087 h; /* level of the tree being processed */
2088 struct item_head insert_key[2]; /* in our processing of one level
2089 we sometimes determine what
2090 must be inserted into the next
2091 higher level. This insertion
2092 consists of a key or two keys
2093 and their corresponding
2094 pointers */
2095 struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
2096 level */
2097
2098 tb->tb_mode = flag;
2099 tb->need_balance_dirty = 0;
2100
2101 if (FILESYSTEM_CHANGED_TB(tb)) {
2102 reiserfs_panic(tb->tb_sb,
2103 "clm-6000: do_balance, fs generation has changed\n");
2104 }
2105 /* if we have no real work to do */
2106 if (!tb->insert_size[0]) {
2107 reiserfs_warning(tb->tb_sb,
2108 "PAP-12350: do_balance: insert_size == 0, mode == %c",
2109 flag);
2110 unfix_nodes(tb);
2111 return;
2112 }
2113
2114 atomic_inc(&(fs_generation(tb->tb_sb)));
2115 do_balance_starts(tb);
2116
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117 /* balance leaf returns 0 except if combining L R and S into
2118 one node. see balance_internal() for explanation of this
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002119 line of code. */
2120 child_pos = PATH_H_B_ITEM_ORDER(tb->tb_path, 0) +
2121 balance_leaf(tb, ih, body, flag, insert_key, insert_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122
2123#ifdef CONFIG_REISERFS_CHECK
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002124 check_after_balance_leaf(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125#endif
2126
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002127 /* Balance internal level of the tree. */
2128 for (h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++)
2129 child_pos =
2130 balance_internal(tb, h, child_pos, insert_key, insert_ptr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131
Linus Torvaldsbd4c6252005-07-12 20:21:28 -07002132 do_balance_completed(tb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133
2134}