blob: 6b1dc3dc3a2d01904094de171cd7fb208d321c85 [file] [log] [blame]
David Teiglandb3b94fa2006-01-16 16:50:04 +00001/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License v.2.
8 */
9
10/*
11* Implements Extendible Hashing as described in:
12* "Extendible Hashing" by Fagin, et al in
13* __ACM Trans. on Database Systems__, Sept 1979.
14*
15*
16* Here's the layout of dirents which is essentially the same as that of ext2
17* within a single block. The field de_name_len is the number of bytes
18* actually required for the name (no null terminator). The field de_rec_len
19* is the number of bytes allocated to the dirent. The offset of the next
20* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
21* deleted, the preceding dirent inherits its allocated space, ie
22* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
23* by adding de_rec_len to the current dirent, this essentially causes the
24* deleted dirent to get jumped over when iterating through all the dirents.
25*
26* When deleting the first dirent in a block, there is no previous dirent so
27* the field de_ino is set to zero to designate it as deleted. When allocating
28* a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the
29* first dirent has (de_ino == 0) and de_rec_len is large enough, this first
30* dirent is allocated. Otherwise it must go through all the 'used' dirents
31* searching for one in which the amount of total space minus the amount of
32* used space will provide enough space for the new dirent.
33*
34* There are two types of blocks in which dirents reside. In a stuffed dinode,
35* the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of
36* the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the
37* beginning of the leaf block. The dirents reside in leaves when
38*
39* dip->i_di.di_flags & GFS2_DIF_EXHASH is true
40*
41* Otherwise, the dirents are "linear", within a single stuffed dinode block.
42*
43* When the dirents are in leaves, the actual contents of the directory file are
44* used as an array of 64-bit block pointers pointing to the leaf blocks. The
45* dirents are NOT in the directory file itself. There can be more than one block
46* pointer in the array that points to the same leaf. In fact, when a directory
47* is first converted from linear to exhash, all of the pointers point to the
48* same leaf.
49*
50* When a leaf is completely full, the size of the hash table can be
51* doubled unless it is already at the maximum size which is hard coded into
52* GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list,
53* but never before the maximum hash table size has been reached.
54*/
55
56#include <linux/sched.h>
57#include <linux/slab.h>
58#include <linux/spinlock.h>
59#include <linux/completion.h>
60#include <linux/buffer_head.h>
61#include <linux/sort.h>
62#include <asm/semaphore.h>
63
64#include "gfs2.h"
65#include "dir.h"
66#include "glock.h"
67#include "inode.h"
68#include "jdata.h"
69#include "meta_io.h"
70#include "quota.h"
71#include "rgrp.h"
72#include "trans.h"
73
74#define IS_LEAF 1 /* Hashed (leaf) directory */
75#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
76
77#if 1
78#define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1)
79#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
80#else
81#define gfs2_disk_hash2offset(h) (((uint64_t)(h)))
82#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
83#endif
84
85typedef int (*leaf_call_t) (struct gfs2_inode *dip,
86 uint32_t index, uint32_t len, uint64_t leaf_no,
87 void *data);
88
89/**
90 * int gfs2_filecmp - Compare two filenames
91 * @file1: The first filename
92 * @file2: The second filename
93 * @len_of_file2: The length of the second file
94 *
95 * This routine compares two filenames and returns 1 if they are equal.
96 *
97 * Returns: 1 if the files are the same, otherwise 0.
98 */
99
100int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2)
101{
102 if (file1->len != len_of_file2)
103 return 0;
104 if (memcmp(file1->name, file2, file1->len))
105 return 0;
106 return 1;
107}
108
109/**
110 * dirent_first - Return the first dirent
111 * @dip: the directory
112 * @bh: The buffer
113 * @dent: Pointer to list of dirents
114 *
115 * return first dirent whether bh points to leaf or stuffed dinode
116 *
117 * Returns: IS_LEAF, IS_DINODE, or -errno
118 */
119
120static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh,
121 struct gfs2_dirent **dent)
122{
123 struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data;
124
125 if (be16_to_cpu(h->mh_type) == GFS2_METATYPE_LF) {
126 if (gfs2_meta_check(dip->i_sbd, bh))
127 return -EIO;
128 *dent = (struct gfs2_dirent *)(bh->b_data +
129 sizeof(struct gfs2_leaf));
130 return IS_LEAF;
131 } else {
132 if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI))
133 return -EIO;
134 *dent = (struct gfs2_dirent *)(bh->b_data +
135 sizeof(struct gfs2_dinode));
136 return IS_DINODE;
137 }
138}
139
140/**
141 * dirent_next - Next dirent
142 * @dip: the directory
143 * @bh: The buffer
144 * @dent: Pointer to list of dirents
145 *
146 * Returns: 0 on success, error code otherwise
147 */
148
149static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh,
150 struct gfs2_dirent **dent)
151{
152 struct gfs2_dirent *tmp, *cur;
153 char *bh_end;
154 uint32_t cur_rec_len;
155
156 cur = *dent;
157 bh_end = bh->b_data + bh->b_size;
158 cur_rec_len = be32_to_cpu(cur->de_rec_len);
159
160 if ((char *)cur + cur_rec_len >= bh_end) {
161 if ((char *)cur + cur_rec_len > bh_end) {
162 gfs2_consist_inode(dip);
163 return -EIO;
164 }
165 return -ENOENT;
166 }
167
168 tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len);
169
170 if ((char *)tmp + be32_to_cpu(tmp->de_rec_len) > bh_end) {
171 gfs2_consist_inode(dip);
172 return -EIO;
173 }
174 /* Only the first dent could ever have de_inum.no_addr == 0 */
175 if (!tmp->de_inum.no_addr) {
176 gfs2_consist_inode(dip);
177 return -EIO;
178 }
179
180 *dent = tmp;
181
182 return 0;
183}
184
185/**
186 * dirent_del - Delete a dirent
187 * @dip: The GFS2 inode
188 * @bh: The buffer
189 * @prev: The previous dirent
190 * @cur: The current dirent
191 *
192 */
193
194static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh,
195 struct gfs2_dirent *prev, struct gfs2_dirent *cur)
196{
197 uint32_t cur_rec_len, prev_rec_len;
198
199 if (!cur->de_inum.no_addr) {
200 gfs2_consist_inode(dip);
201 return;
202 }
203
204 gfs2_trans_add_bh(dip->i_gl, bh);
205
206 /* If there is no prev entry, this is the first entry in the block.
207 The de_rec_len is already as big as it needs to be. Just zero
208 out the inode number and return. */
209
210 if (!prev) {
211 cur->de_inum.no_addr = 0; /* No endianess worries */
212 return;
213 }
214
215 /* Combine this dentry with the previous one. */
216
217 prev_rec_len = be32_to_cpu(prev->de_rec_len);
218 cur_rec_len = be32_to_cpu(cur->de_rec_len);
219
220 if ((char *)prev + prev_rec_len != (char *)cur)
221 gfs2_consist_inode(dip);
222 if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size)
223 gfs2_consist_inode(dip);
224
225 prev_rec_len += cur_rec_len;
226 prev->de_rec_len = cpu_to_be32(prev_rec_len);
227}
228
229/**
230 * gfs2_dirent_alloc - Allocate a directory entry
231 * @dip: The GFS2 inode
232 * @bh: The buffer
233 * @name_len: The length of the name
234 * @dent_out: Pointer to list of dirents
235 *
236 * Returns: 0 on success, error code otherwise
237 */
238
239int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh,
240 int name_len, struct gfs2_dirent **dent_out)
241{
242 struct gfs2_dirent *dent, *new;
243 unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
244 unsigned int entries = 0, offset = 0;
245 int type;
246
247 type = dirent_first(dip, bh, &dent);
248 if (type < 0)
249 return type;
250
251 if (type == IS_LEAF) {
252 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
253 entries = be16_to_cpu(leaf->lf_entries);
254 offset = sizeof(struct gfs2_leaf);
255 } else {
256 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
257 entries = be32_to_cpu(dinode->di_entries);
258 offset = sizeof(struct gfs2_dinode);
259 }
260
261 if (!entries) {
262 if (dent->de_inum.no_addr) {
263 gfs2_consist_inode(dip);
264 return -EIO;
265 }
266
267 gfs2_trans_add_bh(dip->i_gl, bh);
268
269 dent->de_rec_len = bh->b_size - offset;
270 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
271 dent->de_name_len = name_len;
272
273 *dent_out = dent;
274 return 0;
275 }
276
277 do {
278 uint32_t cur_rec_len, cur_name_len;
279
280 cur_rec_len = be32_to_cpu(dent->de_rec_len);
281 cur_name_len = dent->de_name_len;
282
283 if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
284 (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) {
285 gfs2_trans_add_bh(dip->i_gl, bh);
286
287 if (dent->de_inum.no_addr) {
288 new = (struct gfs2_dirent *)((char *)dent +
289 GFS2_DIRENT_SIZE(cur_name_len));
290 memset(new, 0, sizeof(struct gfs2_dirent));
291
292 new->de_rec_len = cur_rec_len - GFS2_DIRENT_SIZE(cur_name_len);
293 new->de_rec_len = cpu_to_be32(new->de_rec_len);
294 new->de_name_len = name_len;
295
296 dent->de_rec_len = cur_rec_len - be32_to_cpu(new->de_rec_len);
297 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
298
299 *dent_out = new;
300 return 0;
301 }
302
303 dent->de_name_len = name_len;
304
305 *dent_out = dent;
306 return 0;
307 }
308 } while (dirent_next(dip, bh, &dent) == 0);
309
310 return -ENOSPC;
311}
312
313/**
314 * dirent_fits - See if we can fit a entry in this buffer
315 * @dip: The GFS2 inode
316 * @bh: The buffer
317 * @name_len: The length of the name
318 *
319 * Returns: 1 if it can fit, 0 otherwise
320 */
321
322static int dirent_fits(struct gfs2_inode *dip, struct buffer_head *bh,
323 int name_len)
324{
325 struct gfs2_dirent *dent;
326 unsigned int rec_len = GFS2_DIRENT_SIZE(name_len);
327 unsigned int entries = 0;
328 int type;
329
330 type = dirent_first(dip, bh, &dent);
331 if (type < 0)
332 return type;
333
334 if (type == IS_LEAF) {
335 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
336 entries = be16_to_cpu(leaf->lf_entries);
337 } else {
338 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
339 entries = be32_to_cpu(dinode->di_entries);
340 }
341
342 if (!entries)
343 return 1;
344
345 do {
346 uint32_t cur_rec_len, cur_name_len;
347
348 cur_rec_len = be32_to_cpu(dent->de_rec_len);
349 cur_name_len = dent->de_name_len;
350
351 if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) ||
352 (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len))
353 return 1;
354 } while (dirent_next(dip, bh, &dent) == 0);
355
356 return 0;
357}
358
359static int leaf_search(struct gfs2_inode *dip, struct buffer_head *bh,
360 struct qstr *filename, struct gfs2_dirent **dent_out,
361 struct gfs2_dirent **dent_prev)
362{
363 uint32_t hash;
364 struct gfs2_dirent *dent, *prev = NULL;
365 unsigned int entries = 0;
366 int type;
367
368 type = dirent_first(dip, bh, &dent);
369 if (type < 0)
370 return type;
371
372 if (type == IS_LEAF) {
373 struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
374 entries = be16_to_cpu(leaf->lf_entries);
375 } else if (type == IS_DINODE) {
376 struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data;
377 entries = be32_to_cpu(dinode->di_entries);
378 }
379
380 hash = gfs2_disk_hash(filename->name, filename->len);
381
382 do {
383 if (!dent->de_inum.no_addr) {
384 prev = dent;
385 continue;
386 }
387
388 if (be32_to_cpu(dent->de_hash) == hash &&
389 gfs2_filecmp(filename, (char *)(dent + 1),
390 dent->de_name_len)) {
391 *dent_out = dent;
392 if (dent_prev)
393 *dent_prev = prev;
394
395 return 0;
396 }
397
398 prev = dent;
399 } while (dirent_next(dip, bh, &dent) == 0);
400
401 return -ENOENT;
402}
403
404static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no,
405 struct buffer_head **bhp)
406{
407 int error;
408
409 error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp);
410 if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF))
411 error = -EIO;
412
413 return error;
414}
415
416/**
417 * get_leaf_nr - Get a leaf number associated with the index
418 * @dip: The GFS2 inode
419 * @index:
420 * @leaf_out:
421 *
422 * Returns: 0 on success, error code otherwise
423 */
424
425static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index,
426 uint64_t *leaf_out)
427{
428 uint64_t leaf_no;
429 int error;
430
431 error = gfs2_jdata_read_mem(dip, (char *)&leaf_no,
432 index * sizeof(uint64_t),
433 sizeof(uint64_t));
434 if (error != sizeof(uint64_t))
435 return (error < 0) ? error : -EIO;
436
437 *leaf_out = be64_to_cpu(leaf_no);
438
439 return 0;
440}
441
442static int get_first_leaf(struct gfs2_inode *dip, uint32_t index,
443 struct buffer_head **bh_out)
444{
445 uint64_t leaf_no;
446 int error;
447
448 error = get_leaf_nr(dip, index, &leaf_no);
449 if (!error)
450 error = get_leaf(dip, leaf_no, bh_out);
451
452 return error;
453}
454
455static int get_next_leaf(struct gfs2_inode *dip, struct buffer_head *bh_in,
456 struct buffer_head **bh_out)
457{
458 struct gfs2_leaf *leaf;
459 int error;
460
461 leaf = (struct gfs2_leaf *)bh_in->b_data;
462
463 if (!leaf->lf_next)
464 error = -ENOENT;
465 else
466 error = get_leaf(dip, be64_to_cpu(leaf->lf_next), bh_out);
467
468 return error;
469}
470
471static int linked_leaf_search(struct gfs2_inode *dip, struct qstr *filename,
472 struct gfs2_dirent **dent_out,
473 struct gfs2_dirent **dent_prev,
474 struct buffer_head **bh_out)
475{
476 struct buffer_head *bh = NULL, *bh_next;
477 uint32_t hsize, index;
478 uint32_t hash;
479 int error;
480
481 hsize = 1 << dip->i_di.di_depth;
482 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
483 gfs2_consist_inode(dip);
484 return -EIO;
485 }
486
487 /* Figure out the address of the leaf node. */
488
489 hash = gfs2_disk_hash(filename->name, filename->len);
490 index = hash >> (32 - dip->i_di.di_depth);
491
492 error = get_first_leaf(dip, index, &bh_next);
493 if (error)
494 return error;
495
496 /* Find the entry */
497
498 do {
499 brelse(bh);
500
501 bh = bh_next;
502
503 error = leaf_search(dip, bh, filename, dent_out, dent_prev);
504 switch (error) {
505 case 0:
506 *bh_out = bh;
507 return 0;
508
509 case -ENOENT:
510 break;
511
512 default:
513 brelse(bh);
514 return error;
515 }
516
517 error = get_next_leaf(dip, bh, &bh_next);
518 }
519 while (!error);
520
521 brelse(bh);
522
523 return error;
524}
525
526/**
527 * dir_make_exhash - Convert a stuffed directory into an ExHash directory
528 * @dip: The GFS2 inode
529 *
530 * Returns: 0 on success, error code otherwise
531 */
532
533static int dir_make_exhash(struct gfs2_inode *dip)
534{
535 struct gfs2_sbd *sdp = dip->i_sbd;
536 struct gfs2_dirent *dent;
537 struct buffer_head *bh, *dibh;
538 struct gfs2_leaf *leaf;
539 int y;
540 uint32_t x;
541 uint64_t *lp, bn;
542 int error;
543
544 error = gfs2_meta_inode_buffer(dip, &dibh);
545 if (error)
546 return error;
547
548 /* Allocate a new block for the first leaf node */
549
550 bn = gfs2_alloc_meta(dip);
551
552 /* Turn over a new leaf */
553
554 bh = gfs2_meta_new(dip->i_gl, bn);
555 gfs2_trans_add_bh(dip->i_gl, bh);
556 gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
557 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
558
559 /* Fill in the leaf structure */
560
561 leaf = (struct gfs2_leaf *)bh->b_data;
562
563 gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16));
564
565 leaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
566 leaf->lf_entries = cpu_to_be16(dip->i_di.di_entries);
567
568 /* Copy dirents */
569
570 gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh,
571 sizeof(struct gfs2_dinode));
572
573 /* Find last entry */
574
575 x = 0;
576 dirent_first(dip, bh, &dent);
577
578 do {
579 if (!dent->de_inum.no_addr)
580 continue;
581 if (++x == dip->i_di.di_entries)
582 break;
583 }
584 while (dirent_next(dip, bh, &dent) == 0);
585
586 /* Adjust the last dirent's record length
587 (Remember that dent still points to the last entry.) */
588
589 dent->de_rec_len = be32_to_cpu(dent->de_rec_len) +
590 sizeof(struct gfs2_dinode) -
591 sizeof(struct gfs2_leaf);
592 dent->de_rec_len = cpu_to_be32(dent->de_rec_len);
593
594 brelse(bh);
595
596 /* We're done with the new leaf block, now setup the new
597 hash table. */
598
599 gfs2_trans_add_bh(dip->i_gl, dibh);
600 gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
601
602 lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode));
603
604 for (x = sdp->sd_hash_ptrs; x--; lp++)
605 *lp = cpu_to_be64(bn);
606
607 dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
608 dip->i_di.di_blocks++;
609 dip->i_di.di_flags |= GFS2_DIF_EXHASH;
610 dip->i_di.di_payload_format = 0;
611
612 for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
613 dip->i_di.di_depth = y;
614
615 gfs2_dinode_out(&dip->i_di, dibh->b_data);
616
617 brelse(dibh);
618
619 return 0;
620}
621
622/**
623 * dir_split_leaf - Split a leaf block into two
624 * @dip: The GFS2 inode
625 * @index:
626 * @leaf_no:
627 *
628 * Returns: 0 on success, error code on failure
629 */
630
631static int dir_split_leaf(struct gfs2_inode *dip, uint32_t index,
632 uint64_t leaf_no)
633{
634 struct buffer_head *nbh, *obh, *dibh;
635 struct gfs2_leaf *nleaf, *oleaf;
636 struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new;
637 uint32_t start, len, half_len, divider;
638 uint64_t bn, *lp;
639 uint32_t name_len;
640 int x, moved = 0;
641 int error;
642
643 /* Allocate the new leaf block */
644
645 bn = gfs2_alloc_meta(dip);
646
647 /* Get the new leaf block */
648
649 nbh = gfs2_meta_new(dip->i_gl, bn);
650 gfs2_trans_add_bh(dip->i_gl, nbh);
651 gfs2_metatype_set(nbh, GFS2_METATYPE_LF, GFS2_FORMAT_LF);
652 gfs2_buffer_clear_tail(nbh, sizeof(struct gfs2_meta_header));
653
654 nleaf = (struct gfs2_leaf *)nbh->b_data;
655
656 nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
657
658 /* Get the old leaf block */
659
660 error = get_leaf(dip, leaf_no, &obh);
661 if (error)
662 goto fail;
663
664 gfs2_trans_add_bh(dip->i_gl, obh);
665
666 oleaf = (struct gfs2_leaf *)obh->b_data;
667
668 /* Compute the start and len of leaf pointers in the hash table. */
669
670 len = 1 << (dip->i_di.di_depth - be16_to_cpu(oleaf->lf_depth));
671 half_len = len >> 1;
672 if (!half_len) {
673 gfs2_consist_inode(dip);
674 error = -EIO;
675 goto fail_brelse;
676 }
677
678 start = (index & ~(len - 1));
679
680 /* Change the pointers.
681 Don't bother distinguishing stuffed from non-stuffed.
682 This code is complicated enough already. */
683
684 lp = kcalloc(half_len, sizeof(uint64_t), GFP_KERNEL | __GFP_NOFAIL);
685
686 error = gfs2_jdata_read_mem(dip, (char *)lp, start * sizeof(uint64_t),
687 half_len * sizeof(uint64_t));
688 if (error != half_len * sizeof(uint64_t)) {
689 if (error >= 0)
690 error = -EIO;
691 goto fail_lpfree;
692 }
693
694 /* Change the pointers */
695
696 for (x = 0; x < half_len; x++)
697 lp[x] = cpu_to_be64(bn);
698
699 error = gfs2_jdata_write_mem(dip, (char *)lp, start * sizeof(uint64_t),
700 half_len * sizeof(uint64_t));
701 if (error != half_len * sizeof(uint64_t)) {
702 if (error >= 0)
703 error = -EIO;
704 goto fail_lpfree;
705 }
706
707 kfree(lp);
708
709 /* Compute the divider */
710
711 divider = (start + half_len) << (32 - dip->i_di.di_depth);
712
713 /* Copy the entries */
714
715 dirent_first(dip, obh, &dent);
716
717 do {
718 next = dent;
719 if (dirent_next(dip, obh, &next))
720 next = NULL;
721
722 if (dent->de_inum.no_addr &&
723 be32_to_cpu(dent->de_hash) < divider) {
724 name_len = dent->de_name_len;
725
726 gfs2_dirent_alloc(dip, nbh, name_len, &new);
727
728 new->de_inum = dent->de_inum; /* No endian worries */
729 new->de_hash = dent->de_hash; /* No endian worries */
730 new->de_type = dent->de_type; /* No endian worries */
731 memcpy((char *)(new + 1), (char *)(dent + 1),
732 name_len);
733
734 nleaf->lf_entries = be16_to_cpu(nleaf->lf_entries)+1;
735 nleaf->lf_entries = cpu_to_be16(nleaf->lf_entries);
736
737 dirent_del(dip, obh, prev, dent);
738
739 if (!oleaf->lf_entries)
740 gfs2_consist_inode(dip);
741 oleaf->lf_entries = be16_to_cpu(oleaf->lf_entries)-1;
742 oleaf->lf_entries = cpu_to_be16(oleaf->lf_entries);
743
744 if (!prev)
745 prev = dent;
746
747 moved = 1;
748 } else
749 prev = dent;
750
751 dent = next;
752 }
753 while (dent);
754
755 /* If none of the entries got moved into the new leaf,
756 artificially fill in the first entry. */
757
758 if (!moved) {
759 gfs2_dirent_alloc(dip, nbh, 0, &new);
760 new->de_inum.no_addr = 0;
761 }
762
763 oleaf->lf_depth = be16_to_cpu(oleaf->lf_depth) + 1;
764 oleaf->lf_depth = cpu_to_be16(oleaf->lf_depth);
765 nleaf->lf_depth = oleaf->lf_depth;
766
767 error = gfs2_meta_inode_buffer(dip, &dibh);
768 if (!gfs2_assert_withdraw(dip->i_sbd, !error)) {
769 dip->i_di.di_blocks++;
770 gfs2_dinode_out(&dip->i_di, dibh->b_data);
771 brelse(dibh);
772 }
773
774 brelse(obh);
775 brelse(nbh);
776
777 return error;
778
779 fail_lpfree:
780 kfree(lp);
781
782 fail_brelse:
783 brelse(obh);
784
785 fail:
786 brelse(nbh);
787 return error;
788}
789
790/**
791 * dir_double_exhash - Double size of ExHash table
792 * @dip: The GFS2 dinode
793 *
794 * Returns: 0 on success, error code on failure
795 */
796
797static int dir_double_exhash(struct gfs2_inode *dip)
798{
799 struct gfs2_sbd *sdp = dip->i_sbd;
800 struct buffer_head *dibh;
801 uint32_t hsize;
802 uint64_t *buf;
803 uint64_t *from, *to;
804 uint64_t block;
805 int x;
806 int error = 0;
807
808 hsize = 1 << dip->i_di.di_depth;
809 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
810 gfs2_consist_inode(dip);
811 return -EIO;
812 }
813
814 /* Allocate both the "from" and "to" buffers in one big chunk */
815
816 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL);
817
818 for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
819 error = gfs2_jdata_read_mem(dip, (char *)buf,
820 block * sdp->sd_hash_bsize,
821 sdp->sd_hash_bsize);
822 if (error != sdp->sd_hash_bsize) {
823 if (error >= 0)
824 error = -EIO;
825 goto fail;
826 }
827
828 from = buf;
829 to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
830
831 for (x = sdp->sd_hash_ptrs; x--; from++) {
832 *to++ = *from; /* No endianess worries */
833 *to++ = *from;
834 }
835
836 error = gfs2_jdata_write_mem(dip,
837 (char *)buf + sdp->sd_hash_bsize,
838 block * sdp->sd_sb.sb_bsize,
839 sdp->sd_sb.sb_bsize);
840 if (error != sdp->sd_sb.sb_bsize) {
841 if (error >= 0)
842 error = -EIO;
843 goto fail;
844 }
845 }
846
847 kfree(buf);
848
849 error = gfs2_meta_inode_buffer(dip, &dibh);
850 if (!gfs2_assert_withdraw(sdp, !error)) {
851 dip->i_di.di_depth++;
852 gfs2_dinode_out(&dip->i_di, dibh->b_data);
853 brelse(dibh);
854 }
855
856 return error;
857
858 fail:
859 kfree(buf);
860
861 return error;
862}
863
864/**
865 * compare_dents - compare directory entries by hash value
866 * @a: first dent
867 * @b: second dent
868 *
869 * When comparing the hash entries of @a to @b:
870 * gt: returns 1
871 * lt: returns -1
872 * eq: returns 0
873 */
874
875static int compare_dents(const void *a, const void *b)
876{
877 struct gfs2_dirent *dent_a, *dent_b;
878 uint32_t hash_a, hash_b;
879 int ret = 0;
880
881 dent_a = *(struct gfs2_dirent **)a;
882 hash_a = dent_a->de_hash;
883 hash_a = be32_to_cpu(hash_a);
884
885 dent_b = *(struct gfs2_dirent **)b;
886 hash_b = dent_b->de_hash;
887 hash_b = be32_to_cpu(hash_b);
888
889 if (hash_a > hash_b)
890 ret = 1;
891 else if (hash_a < hash_b)
892 ret = -1;
893 else {
894 unsigned int len_a = dent_a->de_name_len;
895 unsigned int len_b = dent_b->de_name_len;
896
897 if (len_a > len_b)
898 ret = 1;
899 else if (len_a < len_b)
900 ret = -1;
901 else
902 ret = memcmp((char *)(dent_a + 1),
903 (char *)(dent_b + 1),
904 len_a);
905 }
906
907 return ret;
908}
909
910/**
911 * do_filldir_main - read out directory entries
912 * @dip: The GFS2 inode
913 * @offset: The offset in the file to read from
914 * @opaque: opaque data to pass to filldir
915 * @filldir: The function to pass entries to
916 * @darr: an array of struct gfs2_dirent pointers to read
917 * @entries: the number of entries in darr
918 * @copied: pointer to int that's non-zero if a entry has been copied out
919 *
920 * Jump through some hoops to make sure that if there are hash collsions,
921 * they are read out at the beginning of a buffer. We want to minimize
922 * the possibility that they will fall into different readdir buffers or
923 * that someone will want to seek to that location.
924 *
925 * Returns: errno, >0 on exception from filldir
926 */
927
928static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset,
929 void *opaque, gfs2_filldir_t filldir,
930 struct gfs2_dirent **darr, uint32_t entries,
931 int *copied)
932{
933 struct gfs2_dirent *dent, *dent_next;
934 struct gfs2_inum inum;
935 uint64_t off, off_next;
936 unsigned int x, y;
937 int run = 0;
938 int error = 0;
939
940 sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL);
941
942 dent_next = darr[0];
943 off_next = be32_to_cpu(dent_next->de_hash);
944 off_next = gfs2_disk_hash2offset(off_next);
945
946 for (x = 0, y = 1; x < entries; x++, y++) {
947 dent = dent_next;
948 off = off_next;
949
950 if (y < entries) {
951 dent_next = darr[y];
952 off_next = be32_to_cpu(dent_next->de_hash);
953 off_next = gfs2_disk_hash2offset(off_next);
954
955 if (off < *offset)
956 continue;
957 *offset = off;
958
959 if (off_next == off) {
960 if (*copied && !run)
961 return 1;
962 run = 1;
963 } else
964 run = 0;
965 } else {
966 if (off < *offset)
967 continue;
968 *offset = off;
969 }
970
971 gfs2_inum_in(&inum, (char *)&dent->de_inum);
972
973 error = filldir(opaque, (char *)(dent + 1),
974 dent->de_name_len,
975 off, &inum,
976 dent->de_type);
977 if (error)
978 return 1;
979
980 *copied = 1;
981 }
982
983 /* Increment the *offset by one, so the next time we come into the
984 do_filldir fxn, we get the next entry instead of the last one in the
985 current leaf */
986
987 (*offset)++;
988
989 return 0;
990}
991
992/**
993 * do_filldir_single - Read directory entries out of a single block
994 * @dip: The GFS2 inode
995 * @offset: The offset in the file to read from
996 * @opaque: opaque data to pass to filldir
997 * @filldir: The function to pass entries to
998 * @bh: the block
999 * @entries: the number of entries in the block
1000 * @copied: pointer to int that's non-zero if a entry has been copied out
1001 *
1002 * Returns: errno, >0 on exception from filldir
1003 */
1004
1005static int do_filldir_single(struct gfs2_inode *dip, uint64_t *offset,
1006 void *opaque, gfs2_filldir_t filldir,
1007 struct buffer_head *bh, uint32_t entries,
1008 int *copied)
1009{
1010 struct gfs2_dirent **darr;
1011 struct gfs2_dirent *de;
1012 unsigned int e = 0;
1013 int error;
1014
1015 if (!entries)
1016 return 0;
1017
1018 darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
1019 if (!darr)
1020 return -ENOMEM;
1021
1022 dirent_first(dip, bh, &de);
1023 do {
1024 if (!de->de_inum.no_addr)
1025 continue;
1026 if (e >= entries) {
1027 gfs2_consist_inode(dip);
1028 error = -EIO;
1029 goto out;
1030 }
1031 darr[e++] = de;
1032 }
1033 while (dirent_next(dip, bh, &de) == 0);
1034
1035 if (e != entries) {
1036 gfs2_consist_inode(dip);
1037 error = -EIO;
1038 goto out;
1039 }
1040
1041 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1042 entries, copied);
1043
1044 out:
1045 kfree(darr);
1046
1047 return error;
1048}
1049
1050/**
1051 * do_filldir_multi - Read directory entries out of a linked leaf list
1052 * @dip: The GFS2 inode
1053 * @offset: The offset in the file to read from
1054 * @opaque: opaque data to pass to filldir
1055 * @filldir: The function to pass entries to
1056 * @bh: the first leaf in the list
1057 * @copied: pointer to int that's non-zero if a entry has been copied out
1058 *
1059 * Returns: errno, >0 on exception from filldir
1060 */
1061
1062static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset,
1063 void *opaque, gfs2_filldir_t filldir,
1064 struct buffer_head *bh, int *copied)
1065{
1066 struct buffer_head **larr = NULL;
1067 struct gfs2_dirent **darr;
1068 struct gfs2_leaf *leaf;
1069 struct buffer_head *tmp_bh;
1070 struct gfs2_dirent *de;
1071 unsigned int entries, e = 0;
1072 unsigned int leaves = 0, l = 0;
1073 unsigned int x;
1074 uint64_t ln;
1075 int error = 0;
1076
1077 /* Count leaves and entries */
1078
1079 leaf = (struct gfs2_leaf *)bh->b_data;
1080 entries = be16_to_cpu(leaf->lf_entries);
1081 ln = leaf->lf_next;
1082
1083 while (ln) {
1084 ln = be64_to_cpu(ln);
1085
1086 error = get_leaf(dip, ln, &tmp_bh);
1087 if (error)
1088 return error;
1089
1090 leaf = (struct gfs2_leaf *)tmp_bh->b_data;
1091 if (leaf->lf_entries) {
1092 entries += be16_to_cpu(leaf->lf_entries);
1093 leaves++;
1094 }
1095 ln = leaf->lf_next;
1096
1097 brelse(tmp_bh);
1098 }
1099
1100 if (!entries)
1101 return 0;
1102
1103 if (leaves) {
1104 larr = kcalloc(leaves, sizeof(struct buffer_head *),GFP_KERNEL);
1105 if (!larr)
1106 return -ENOMEM;
1107 }
1108
1109 darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL);
1110 if (!darr) {
1111 kfree(larr);
1112 return -ENOMEM;
1113 }
1114
1115 leaf = (struct gfs2_leaf *)bh->b_data;
1116 if (leaf->lf_entries) {
1117 dirent_first(dip, bh, &de);
1118 do {
1119 if (!de->de_inum.no_addr)
1120 continue;
1121 if (e >= entries) {
1122 gfs2_consist_inode(dip);
1123 error = -EIO;
1124 goto out;
1125 }
1126 darr[e++] = de;
1127 }
1128 while (dirent_next(dip, bh, &de) == 0);
1129 }
1130 ln = leaf->lf_next;
1131
1132 while (ln) {
1133 ln = be64_to_cpu(ln);
1134
1135 error = get_leaf(dip, ln, &tmp_bh);
1136 if (error)
1137 goto out;
1138
1139 leaf = (struct gfs2_leaf *)tmp_bh->b_data;
1140 if (leaf->lf_entries) {
1141 dirent_first(dip, tmp_bh, &de);
1142 do {
1143 if (!de->de_inum.no_addr)
1144 continue;
1145 if (e >= entries) {
1146 gfs2_consist_inode(dip);
1147 error = -EIO;
1148 goto out;
1149 }
1150 darr[e++] = de;
1151 }
1152 while (dirent_next(dip, tmp_bh, &de) == 0);
1153
1154 larr[l++] = tmp_bh;
1155
1156 ln = leaf->lf_next;
1157 } else {
1158 ln = leaf->lf_next;
1159 brelse(tmp_bh);
1160 }
1161 }
1162
1163 if (gfs2_assert_withdraw(dip->i_sbd, l == leaves)) {
1164 error = -EIO;
1165 goto out;
1166 }
1167 if (e != entries) {
1168 gfs2_consist_inode(dip);
1169 error = -EIO;
1170 goto out;
1171 }
1172
1173 error = do_filldir_main(dip, offset, opaque, filldir, darr,
1174 entries, copied);
1175
1176 out:
1177 kfree(darr);
1178 for (x = 0; x < l; x++)
1179 brelse(larr[x]);
1180 kfree(larr);
1181
1182 return error;
1183}
1184
1185/**
1186 * dir_e_search - Search exhash (leaf) dir for inode matching name
1187 * @dip: The GFS2 inode
1188 * @filename: Filename string
1189 * @inode: If non-NULL, function fills with formal inode # and block address
1190 * @type: If non-NULL, function fills with DT_... dinode type
1191 *
1192 * Returns:
1193 */
1194
1195static int dir_e_search(struct gfs2_inode *dip, struct qstr *filename,
1196 struct gfs2_inum *inum, unsigned int *type)
1197{
1198 struct buffer_head *bh;
1199 struct gfs2_dirent *dent;
1200 int error;
1201
1202 error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
1203 if (error)
1204 return error;
1205
1206 if (inum)
1207 gfs2_inum_in(inum, (char *)&dent->de_inum);
1208 if (type)
1209 *type = dent->de_type;
1210
1211 brelse(bh);
1212
1213 return 0;
1214}
1215
1216static int dir_e_add(struct gfs2_inode *dip, struct qstr *filename,
1217 struct gfs2_inum *inum, unsigned int type)
1218{
1219 struct buffer_head *bh, *nbh, *dibh;
1220 struct gfs2_leaf *leaf, *nleaf;
1221 struct gfs2_dirent *dent;
1222 uint32_t hsize, index;
1223 uint32_t hash;
1224 uint64_t leaf_no, bn;
1225 int error;
1226
1227 restart:
1228 hsize = 1 << dip->i_di.di_depth;
1229 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1230 gfs2_consist_inode(dip);
1231 return -EIO;
1232 }
1233
1234 /* Figure out the address of the leaf node. */
1235
1236 hash = gfs2_disk_hash(filename->name, filename->len);
1237 index = hash >> (32 - dip->i_di.di_depth);
1238
1239 error = get_leaf_nr(dip, index, &leaf_no);
1240 if (error)
1241 return error;
1242
1243 /* Add entry to the leaf */
1244
1245 for (;;) {
1246 error = get_leaf(dip, leaf_no, &bh);
1247 if (error)
1248 return error;
1249
1250 leaf = (struct gfs2_leaf *)bh->b_data;
1251
1252 if (gfs2_dirent_alloc(dip, bh, filename->len, &dent)) {
1253
1254 if (be16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) {
1255 /* Can we split the leaf? */
1256
1257 brelse(bh);
1258
1259 error = dir_split_leaf(dip, index, leaf_no);
1260 if (error)
1261 return error;
1262
1263 goto restart;
1264
1265 } else if (dip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) {
1266 /* Can we double the hash table? */
1267
1268 brelse(bh);
1269
1270 error = dir_double_exhash(dip);
1271 if (error)
1272 return error;
1273
1274 goto restart;
1275
1276 } else if (leaf->lf_next) {
1277 /* Can we try the next leaf in the list? */
1278 leaf_no = be64_to_cpu(leaf->lf_next);
1279 brelse(bh);
1280 continue;
1281
1282 } else {
1283 /* Create a new leaf and add it to the list. */
1284
1285 bn = gfs2_alloc_meta(dip);
1286
1287 nbh = gfs2_meta_new(dip->i_gl, bn);
1288 gfs2_trans_add_bh(dip->i_gl, nbh);
1289 gfs2_metatype_set(nbh,
1290 GFS2_METATYPE_LF,
1291 GFS2_FORMAT_LF);
1292 gfs2_buffer_clear_tail(nbh,
1293 sizeof(struct gfs2_meta_header));
1294
1295 gfs2_trans_add_bh(dip->i_gl, bh);
1296 leaf->lf_next = cpu_to_be64(bn);
1297
1298 nleaf = (struct gfs2_leaf *)nbh->b_data;
1299 nleaf->lf_depth = leaf->lf_depth;
1300 nleaf->lf_dirent_format = cpu_to_be32(GFS2_FORMAT_DE);
1301
1302 gfs2_dirent_alloc(dip, nbh, filename->len,
1303 &dent);
1304
1305 dip->i_di.di_blocks++;
1306
1307 brelse(bh);
1308
1309 bh = nbh;
1310 leaf = nleaf;
1311 }
1312 }
1313
1314 /* If the gfs2_dirent_alloc() succeeded, it pinned the "bh" */
1315
1316 gfs2_inum_out(inum, (char *)&dent->de_inum);
1317 dent->de_hash = cpu_to_be32(hash);
1318 dent->de_type = type;
1319 memcpy((char *)(dent + 1), filename->name, filename->len);
1320
1321 leaf->lf_entries = be16_to_cpu(leaf->lf_entries) + 1;
1322 leaf->lf_entries = cpu_to_be16(leaf->lf_entries);
1323
1324 brelse(bh);
1325
1326 error = gfs2_meta_inode_buffer(dip, &dibh);
1327 if (error)
1328 return error;
1329
1330 dip->i_di.di_entries++;
1331 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1332
1333 gfs2_trans_add_bh(dip->i_gl, dibh);
1334 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1335 brelse(dibh);
1336
1337 return 0;
1338 }
1339
1340 return -ENOENT;
1341}
1342
1343static int dir_e_del(struct gfs2_inode *dip, struct qstr *filename)
1344{
1345 struct buffer_head *bh, *dibh;
1346 struct gfs2_dirent *dent, *prev;
1347 struct gfs2_leaf *leaf;
1348 unsigned int entries;
1349 int error;
1350
1351 error = linked_leaf_search(dip, filename, &dent, &prev, &bh);
1352 if (error == -ENOENT) {
1353 gfs2_consist_inode(dip);
1354 return -EIO;
1355 }
1356 if (error)
1357 return error;
1358
1359 dirent_del(dip, bh, prev, dent); /* Pins bh */
1360
1361 leaf = (struct gfs2_leaf *)bh->b_data;
1362 entries = be16_to_cpu(leaf->lf_entries);
1363 if (!entries)
1364 gfs2_consist_inode(dip);
1365 entries--;
1366 leaf->lf_entries = cpu_to_be16(entries);
1367
1368 brelse(bh);
1369
1370 error = gfs2_meta_inode_buffer(dip, &dibh);
1371 if (error)
1372 return error;
1373
1374 if (!dip->i_di.di_entries)
1375 gfs2_consist_inode(dip);
1376 dip->i_di.di_entries--;
1377 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1378
1379 gfs2_trans_add_bh(dip->i_gl, dibh);
1380 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1381 brelse(dibh);
1382
1383 return 0;
1384}
1385
1386/**
1387 * dir_e_read - Reads the entries from a directory into a filldir buffer
1388 * @dip: dinode pointer
1389 * @offset: the hash of the last entry read shifted to the right once
1390 * @opaque: buffer for the filldir function to fill
1391 * @filldir: points to the filldir function to use
1392 *
1393 * Returns: errno
1394 */
1395
1396static int dir_e_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1397 gfs2_filldir_t filldir)
1398{
1399 struct gfs2_sbd *sdp = dip->i_sbd;
1400 struct buffer_head *bh;
1401 struct gfs2_leaf leaf;
1402 uint32_t hsize, len;
1403 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1404 uint32_t hash, index;
1405 uint64_t *lp;
1406 int copied = 0;
1407 int error = 0;
1408
1409 hsize = 1 << dip->i_di.di_depth;
1410 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1411 gfs2_consist_inode(dip);
1412 return -EIO;
1413 }
1414
1415 hash = gfs2_dir_offset2hash(*offset);
1416 index = hash >> (32 - dip->i_di.di_depth);
1417
1418 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1419 if (!lp)
1420 return -ENOMEM;
1421
1422 while (index < hsize) {
1423 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1424 ht_offset = index - lp_offset;
1425
1426 if (ht_offset_cur != ht_offset) {
1427 error = gfs2_jdata_read_mem(dip, (char *)lp,
1428 ht_offset * sizeof(uint64_t),
1429 sdp->sd_hash_bsize);
1430 if (error != sdp->sd_hash_bsize) {
1431 if (error >= 0)
1432 error = -EIO;
1433 goto out;
1434 }
1435 ht_offset_cur = ht_offset;
1436 }
1437
1438 error = get_leaf(dip, be64_to_cpu(lp[lp_offset]), &bh);
1439 if (error)
1440 goto out;
1441
1442 gfs2_leaf_in(&leaf, bh->b_data);
1443
1444 if (leaf.lf_next)
1445 error = do_filldir_multi(dip, offset, opaque, filldir,
1446 bh, &copied);
1447 else
1448 error = do_filldir_single(dip, offset, opaque, filldir,
1449 bh, leaf.lf_entries, &copied);
1450
1451 brelse(bh);
1452
1453 if (error) {
1454 if (error > 0)
1455 error = 0;
1456 goto out;
1457 }
1458
1459 len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
1460 index = (index & ~(len - 1)) + len;
1461 }
1462
1463 out:
1464 kfree(lp);
1465
1466 return error;
1467}
1468
1469static int dir_e_mvino(struct gfs2_inode *dip, struct qstr *filename,
1470 struct gfs2_inum *inum, unsigned int new_type)
1471{
1472 struct buffer_head *bh, *dibh;
1473 struct gfs2_dirent *dent;
1474 int error;
1475
1476 error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
1477 if (error == -ENOENT) {
1478 gfs2_consist_inode(dip);
1479 return -EIO;
1480 }
1481 if (error)
1482 return error;
1483
1484 gfs2_trans_add_bh(dip->i_gl, bh);
1485
1486 gfs2_inum_out(inum, (char *)&dent->de_inum);
1487 dent->de_type = new_type;
1488
1489 brelse(bh);
1490
1491 error = gfs2_meta_inode_buffer(dip, &dibh);
1492 if (error)
1493 return error;
1494
1495 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1496
1497 gfs2_trans_add_bh(dip->i_gl, dibh);
1498 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1499 brelse(dibh);
1500
1501 return 0;
1502}
1503
1504/**
1505 * dir_l_search - Search linear (stuffed dinode) dir for inode matching name
1506 * @dip: The GFS2 inode
1507 * @filename: Filename string
1508 * @inode: If non-NULL, function fills with formal inode # and block address
1509 * @type: If non-NULL, function fills with DT_... dinode type
1510 *
1511 * Returns:
1512 */
1513
1514static int dir_l_search(struct gfs2_inode *dip, struct qstr *filename,
1515 struct gfs2_inum *inum, unsigned int *type)
1516{
1517 struct buffer_head *dibh;
1518 struct gfs2_dirent *dent;
1519 int error;
1520
1521 if (!gfs2_is_stuffed(dip)) {
1522 gfs2_consist_inode(dip);
1523 return -EIO;
1524 }
1525
1526 error = gfs2_meta_inode_buffer(dip, &dibh);
1527 if (error)
1528 return error;
1529
1530 error = leaf_search(dip, dibh, filename, &dent, NULL);
1531 if (!error) {
1532 if (inum)
1533 gfs2_inum_in(inum, (char *)&dent->de_inum);
1534 if (type)
1535 *type = dent->de_type;
1536 }
1537
1538 brelse(dibh);
1539
1540 return error;
1541}
1542
1543static int dir_l_add(struct gfs2_inode *dip, struct qstr *filename,
1544 struct gfs2_inum *inum, unsigned int type)
1545{
1546 struct buffer_head *dibh;
1547 struct gfs2_dirent *dent;
1548 int error;
1549
1550 if (!gfs2_is_stuffed(dip)) {
1551 gfs2_consist_inode(dip);
1552 return -EIO;
1553 }
1554
1555 error = gfs2_meta_inode_buffer(dip, &dibh);
1556 if (error)
1557 return error;
1558
1559 if (gfs2_dirent_alloc(dip, dibh, filename->len, &dent)) {
1560 brelse(dibh);
1561
1562 error = dir_make_exhash(dip);
1563 if (!error)
1564 error = dir_e_add(dip, filename, inum, type);
1565
1566 return error;
1567 }
1568
1569 /* gfs2_dirent_alloc() pins */
1570
1571 gfs2_inum_out(inum, (char *)&dent->de_inum);
1572 dent->de_hash = gfs2_disk_hash(filename->name, filename->len);
1573 dent->de_hash = cpu_to_be32(dent->de_hash);
1574 dent->de_type = type;
1575 memcpy((char *)(dent + 1), filename->name, filename->len);
1576
1577 dip->i_di.di_entries++;
1578 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1579
1580 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1581 brelse(dibh);
1582
1583 return 0;
1584}
1585
1586static int dir_l_del(struct gfs2_inode *dip, struct qstr *filename)
1587{
1588 struct buffer_head *dibh;
1589 struct gfs2_dirent *dent, *prev;
1590 int error;
1591
1592 if (!gfs2_is_stuffed(dip)) {
1593 gfs2_consist_inode(dip);
1594 return -EIO;
1595 }
1596
1597 error = gfs2_meta_inode_buffer(dip, &dibh);
1598 if (error)
1599 return error;
1600
1601 error = leaf_search(dip, dibh, filename, &dent, &prev);
1602 if (error == -ENOENT) {
1603 gfs2_consist_inode(dip);
1604 error = -EIO;
1605 goto out;
1606 }
1607 if (error)
1608 goto out;
1609
1610 dirent_del(dip, dibh, prev, dent);
1611
1612 /* dirent_del() pins */
1613
1614 if (!dip->i_di.di_entries)
1615 gfs2_consist_inode(dip);
1616 dip->i_di.di_entries--;
1617
1618 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1619
1620 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1621
1622 out:
1623 brelse(dibh);
1624
1625 return error;
1626}
1627
1628static int dir_l_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1629 gfs2_filldir_t filldir)
1630{
1631 struct buffer_head *dibh;
1632 int copied = 0;
1633 int error;
1634
1635 if (!gfs2_is_stuffed(dip)) {
1636 gfs2_consist_inode(dip);
1637 return -EIO;
1638 }
1639
1640 if (!dip->i_di.di_entries)
1641 return 0;
1642
1643 error = gfs2_meta_inode_buffer(dip, &dibh);
1644 if (error)
1645 return error;
1646
1647 error = do_filldir_single(dip, offset,
1648 opaque, filldir,
1649 dibh, dip->i_di.di_entries,
1650 &copied);
1651 if (error > 0)
1652 error = 0;
1653
1654 brelse(dibh);
1655
1656 return error;
1657}
1658
1659static int dir_l_mvino(struct gfs2_inode *dip, struct qstr *filename,
1660 struct gfs2_inum *inum, unsigned int new_type)
1661{
1662 struct buffer_head *dibh;
1663 struct gfs2_dirent *dent;
1664 int error;
1665
1666 if (!gfs2_is_stuffed(dip)) {
1667 gfs2_consist_inode(dip);
1668 return -EIO;
1669 }
1670
1671 error = gfs2_meta_inode_buffer(dip, &dibh);
1672 if (error)
1673 return error;
1674
1675 error = leaf_search(dip, dibh, filename, &dent, NULL);
1676 if (error == -ENOENT) {
1677 gfs2_consist_inode(dip);
1678 error = -EIO;
1679 goto out;
1680 }
1681 if (error)
1682 goto out;
1683
1684 gfs2_trans_add_bh(dip->i_gl, dibh);
1685
1686 gfs2_inum_out(inum, (char *)&dent->de_inum);
1687 dent->de_type = new_type;
1688
1689 dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
1690
1691 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1692
1693 out:
1694 brelse(dibh);
1695
1696 return error;
1697}
1698
1699/**
1700 * gfs2_dir_search - Search a directory
1701 * @dip: The GFS2 inode
1702 * @filename:
1703 * @inode:
1704 *
1705 * This routine searches a directory for a file or another directory.
1706 * Assumes a glock is held on dip.
1707 *
1708 * Returns: errno
1709 */
1710
1711int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename,
1712 struct gfs2_inum *inum, unsigned int *type)
1713{
1714 int error;
1715
1716 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1717 error = dir_e_search(dip, filename, inum, type);
1718 else
1719 error = dir_l_search(dip, filename, inum, type);
1720
1721 return error;
1722}
1723
1724/**
1725 * gfs2_dir_add - Add new filename into directory
1726 * @dip: The GFS2 inode
1727 * @filename: The new name
1728 * @inode: The inode number of the entry
1729 * @type: The type of the entry
1730 *
1731 * Returns: 0 on success, error code on failure
1732 */
1733
1734int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename,
1735 struct gfs2_inum *inum, unsigned int type)
1736{
1737 int error;
1738
1739 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1740 error = dir_e_add(dip, filename, inum, type);
1741 else
1742 error = dir_l_add(dip, filename, inum, type);
1743
1744 return error;
1745}
1746
1747/**
1748 * gfs2_dir_del - Delete a directory entry
1749 * @dip: The GFS2 inode
1750 * @filename: The filename
1751 *
1752 * Returns: 0 on success, error code on failure
1753 */
1754
1755int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename)
1756{
1757 int error;
1758
1759 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1760 error = dir_e_del(dip, filename);
1761 else
1762 error = dir_l_del(dip, filename);
1763
1764 return error;
1765}
1766
1767int gfs2_dir_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque,
1768 gfs2_filldir_t filldir)
1769{
1770 int error;
1771
1772 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1773 error = dir_e_read(dip, offset, opaque, filldir);
1774 else
1775 error = dir_l_read(dip, offset, opaque, filldir);
1776
1777 return error;
1778}
1779
1780/**
1781 * gfs2_dir_mvino - Change inode number of directory entry
1782 * @dip: The GFS2 inode
1783 * @filename:
1784 * @new_inode:
1785 *
1786 * This routine changes the inode number of a directory entry. It's used
1787 * by rename to change ".." when a directory is moved.
1788 * Assumes a glock is held on dvp.
1789 *
1790 * Returns: errno
1791 */
1792
1793int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename,
1794 struct gfs2_inum *inum, unsigned int new_type)
1795{
1796 int error;
1797
1798 if (dip->i_di.di_flags & GFS2_DIF_EXHASH)
1799 error = dir_e_mvino(dip, filename, inum, new_type);
1800 else
1801 error = dir_l_mvino(dip, filename, inum, new_type);
1802
1803 return error;
1804}
1805
1806/**
1807 * foreach_leaf - call a function for each leaf in a directory
1808 * @dip: the directory
1809 * @lc: the function to call for each each
1810 * @data: private data to pass to it
1811 *
1812 * Returns: errno
1813 */
1814
1815static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data)
1816{
1817 struct gfs2_sbd *sdp = dip->i_sbd;
1818 struct buffer_head *bh;
1819 struct gfs2_leaf leaf;
1820 uint32_t hsize, len;
1821 uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
1822 uint32_t index = 0;
1823 uint64_t *lp;
1824 uint64_t leaf_no;
1825 int error = 0;
1826
1827 hsize = 1 << dip->i_di.di_depth;
1828 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
1829 gfs2_consist_inode(dip);
1830 return -EIO;
1831 }
1832
1833 lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL);
1834 if (!lp)
1835 return -ENOMEM;
1836
1837 while (index < hsize) {
1838 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1839 ht_offset = index - lp_offset;
1840
1841 if (ht_offset_cur != ht_offset) {
1842 error = gfs2_jdata_read_mem(dip, (char *)lp,
1843 ht_offset * sizeof(uint64_t),
1844 sdp->sd_hash_bsize);
1845 if (error != sdp->sd_hash_bsize) {
1846 if (error >= 0)
1847 error = -EIO;
1848 goto out;
1849 }
1850 ht_offset_cur = ht_offset;
1851 }
1852
1853 leaf_no = be64_to_cpu(lp[lp_offset]);
1854 if (leaf_no) {
1855 error = get_leaf(dip, leaf_no, &bh);
1856 if (error)
1857 goto out;
1858 gfs2_leaf_in(&leaf, bh->b_data);
1859 brelse(bh);
1860
1861 len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
1862
1863 error = lc(dip, index, len, leaf_no, data);
1864 if (error)
1865 goto out;
1866
1867 index = (index & ~(len - 1)) + len;
1868 } else
1869 index++;
1870 }
1871
1872 if (index != hsize) {
1873 gfs2_consist_inode(dip);
1874 error = -EIO;
1875 }
1876
1877 out:
1878 kfree(lp);
1879
1880 return error;
1881}
1882
1883/**
1884 * leaf_dealloc - Deallocate a directory leaf
1885 * @dip: the directory
1886 * @index: the hash table offset in the directory
1887 * @len: the number of pointers to this leaf
1888 * @leaf_no: the leaf number
1889 * @data: not used
1890 *
1891 * Returns: errno
1892 */
1893
1894static int leaf_dealloc(struct gfs2_inode *dip, uint32_t index, uint32_t len,
1895 uint64_t leaf_no, void *data)
1896{
1897 struct gfs2_sbd *sdp = dip->i_sbd;
1898 struct gfs2_leaf tmp_leaf;
1899 struct gfs2_rgrp_list rlist;
1900 struct buffer_head *bh, *dibh;
1901 uint64_t blk;
1902 unsigned int rg_blocks = 0, l_blocks = 0;
1903 char *ht;
1904 unsigned int x, size = len * sizeof(uint64_t);
1905 int error;
1906
1907 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
1908
1909 ht = kzalloc(size, GFP_KERNEL);
1910 if (!ht)
1911 return -ENOMEM;
1912
1913 gfs2_alloc_get(dip);
1914
1915 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1916 if (error)
1917 goto out;
1918
1919 error = gfs2_rindex_hold(sdp, &dip->i_alloc.al_ri_gh);
1920 if (error)
1921 goto out_qs;
1922
1923 /* Count the number of leaves */
1924
1925 for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
1926 error = get_leaf(dip, blk, &bh);
1927 if (error)
1928 goto out_rlist;
1929 gfs2_leaf_in(&tmp_leaf, (bh)->b_data);
1930 brelse(bh);
1931
1932 gfs2_rlist_add(sdp, &rlist, blk);
1933 l_blocks++;
1934 }
1935
1936 gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1937
1938 for (x = 0; x < rlist.rl_rgrps; x++) {
1939 struct gfs2_rgrpd *rgd;
1940 rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl);
1941 rg_blocks += rgd->rd_ri.ri_length;
1942 }
1943
1944 error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1945 if (error)
1946 goto out_rlist;
1947
1948 error = gfs2_trans_begin(sdp,
1949 rg_blocks + (DIV_RU(size, sdp->sd_jbsize) + 1) +
1950 RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
1951 if (error)
1952 goto out_rg_gunlock;
1953
1954 for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
1955 error = get_leaf(dip, blk, &bh);
1956 if (error)
1957 goto out_end_trans;
1958 gfs2_leaf_in(&tmp_leaf, bh->b_data);
1959 brelse(bh);
1960
1961 gfs2_free_meta(dip, blk, 1);
1962
1963 if (!dip->i_di.di_blocks)
1964 gfs2_consist_inode(dip);
1965 dip->i_di.di_blocks--;
1966 }
1967
1968 error = gfs2_jdata_write_mem(dip, ht, index * sizeof(uint64_t), size);
1969 if (error != size) {
1970 if (error >= 0)
1971 error = -EIO;
1972 goto out_end_trans;
1973 }
1974
1975 error = gfs2_meta_inode_buffer(dip, &dibh);
1976 if (error)
1977 goto out_end_trans;
1978
1979 gfs2_trans_add_bh(dip->i_gl, dibh);
1980 gfs2_dinode_out(&dip->i_di, dibh->b_data);
1981 brelse(dibh);
1982
1983 out_end_trans:
1984 gfs2_trans_end(sdp);
1985
1986 out_rg_gunlock:
1987 gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
1988
1989 out_rlist:
1990 gfs2_rlist_free(&rlist);
1991 gfs2_glock_dq_uninit(&dip->i_alloc.al_ri_gh);
1992
1993 out_qs:
1994 gfs2_quota_unhold(dip);
1995
1996 out:
1997 gfs2_alloc_put(dip);
1998 kfree(ht);
1999
2000 return error;
2001}
2002
2003/**
2004 * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory
2005 * @dip: the directory
2006 *
2007 * Dealloc all on-disk directory leaves to FREEMETA state
2008 * Change on-disk inode type to "regular file"
2009 *
2010 * Returns: errno
2011 */
2012
2013int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
2014{
2015 struct gfs2_sbd *sdp = dip->i_sbd;
2016 struct buffer_head *bh;
2017 int error;
2018
2019 /* Dealloc on-disk leaves to FREEMETA state */
2020 error = foreach_leaf(dip, leaf_dealloc, NULL);
2021 if (error)
2022 return error;
2023
2024 /* Make this a regular file in case we crash.
2025 (We don't want to free these blocks a second time.) */
2026
2027 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
2028 if (error)
2029 return error;
2030
2031 error = gfs2_meta_inode_buffer(dip, &bh);
2032 if (!error) {
2033 gfs2_trans_add_bh(dip->i_gl, bh);
2034 ((struct gfs2_dinode *)bh->b_data)->di_mode = cpu_to_be32(S_IFREG);
2035 brelse(bh);
2036 }
2037
2038 gfs2_trans_end(sdp);
2039
2040 return error;
2041}
2042
2043/**
2044 * gfs2_diradd_alloc_required - find if adding entry will require an allocation
2045 * @ip: the file being written to
2046 * @filname: the filename that's going to be added
2047 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
2048 *
2049 * Returns: errno
2050 */
2051
2052int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
2053 int *alloc_required)
2054{
2055 struct buffer_head *bh = NULL, *bh_next;
2056 uint32_t hsize, hash, index;
2057 int error = 0;
2058
2059 *alloc_required = 0;
2060
2061 if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
2062 hsize = 1 << dip->i_di.di_depth;
2063 if (hsize * sizeof(uint64_t) != dip->i_di.di_size) {
2064 gfs2_consist_inode(dip);
2065 return -EIO;
2066 }
2067
2068 hash = gfs2_disk_hash(filename->name, filename->len);
2069 index = hash >> (32 - dip->i_di.di_depth);
2070
2071 error = get_first_leaf(dip, index, &bh_next);
2072 if (error)
2073 return error;
2074
2075 do {
2076 brelse(bh);
2077
2078 bh = bh_next;
2079
2080 if (dirent_fits(dip, bh, filename->len))
2081 break;
2082
2083 error = get_next_leaf(dip, bh, &bh_next);
2084 if (error == -ENOENT) {
2085 *alloc_required = 1;
2086 error = 0;
2087 break;
2088 }
2089 }
2090 while (!error);
2091
2092 brelse(bh);
2093 } else {
2094 error = gfs2_meta_inode_buffer(dip, &bh);
2095 if (error)
2096 return error;
2097
2098 if (!dirent_fits(dip, bh, filename->len))
2099 *alloc_required = 1;
2100
2101 brelse(bh);
2102 }
2103
2104 return error;
2105}
2106
2107/**
2108 * do_gdm - copy out one leaf (or list of leaves)
2109 * @dip: the directory
2110 * @index: the hash table offset in the directory
2111 * @len: the number of pointers to this leaf
2112 * @leaf_no: the leaf number
2113 * @data: a pointer to a struct gfs2_user_buffer structure
2114 *
2115 * Returns: errno
2116 */
2117
2118static int do_gdm(struct gfs2_inode *dip, uint32_t index, uint32_t len,
2119 uint64_t leaf_no, void *data)
2120{
2121 struct gfs2_user_buffer *ub = (struct gfs2_user_buffer *)data;
2122 struct gfs2_leaf leaf;
2123 struct buffer_head *bh;
2124 uint64_t blk;
2125 int error = 0;
2126
2127 for (blk = leaf_no; blk; blk = leaf.lf_next) {
2128 error = get_leaf(dip, blk, &bh);
2129 if (error)
2130 break;
2131
2132 gfs2_leaf_in(&leaf, bh->b_data);
2133
2134 error = gfs2_add_bh_to_ub(ub, bh);
2135
2136 brelse(bh);
2137
2138 if (error)
2139 break;
2140 }
2141
2142 return error;
2143}
2144
2145/**
2146 * gfs2_get_dir_meta - return all the leaf blocks of a directory
2147 * @dip: the directory
2148 * @ub: the structure representing the meta
2149 *
2150 * Returns: errno
2151 */
2152
2153int gfs2_get_dir_meta(struct gfs2_inode *dip, struct gfs2_user_buffer *ub)
2154{
2155 return foreach_leaf(dip, do_gdm, ub);
2156}
2157