blob: 5f4f705eebbb71aaf7c30255318ed09c709a122f [file] [log] [blame]
Dave Chinner4a8af272013-08-12 20:49:36 +10001/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * Copyright (c) 2013 Red Hat, Inc.
4 * All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19#include "xfs.h"
20#include "xfs_fs.h"
21#include "xfs_types.h"
22#include "xfs_bit.h"
23#include "xfs_log.h"
24#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
27#include "xfs_mount.h"
28#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h"
30#include "xfs_dinode.h"
31#include "xfs_inode.h"
32#include "xfs_dir2_format.h"
Dave Chinner2b9ab5a2013-08-12 20:49:37 +100033#include "xfs_dir2.h"
Dave Chinner4a8af272013-08-12 20:49:36 +100034#include "xfs_dir2_priv.h"
35#include "xfs_error.h"
36#include "xfs_trace.h"
37#include "xfs_bmap.h"
38
39STATIC int
40xfs_dir2_sf_getdents(
41 xfs_inode_t *dp, /* incore directory inode */
42 struct dir_context *ctx)
43{
44 int i; /* shortform entry number */
45 xfs_mount_t *mp; /* filesystem mount point */
46 xfs_dir2_dataptr_t off; /* current entry's offset */
47 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
48 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
49 xfs_dir2_dataptr_t dot_offset;
50 xfs_dir2_dataptr_t dotdot_offset;
51 xfs_ino_t ino;
52
53 mp = dp->i_mount;
54
55 ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
56 /*
57 * Give up if the directory is way too short.
58 */
59 if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
60 ASSERT(XFS_FORCED_SHUTDOWN(mp));
61 return XFS_ERROR(EIO);
62 }
63
64 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
65 ASSERT(dp->i_df.if_u1.if_data != NULL);
66
67 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
68
69 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
70
71 /*
72 * If the block number in the offset is out of range, we're done.
73 */
74 if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
75 return 0;
76
77 /*
78 * Precalculate offsets for . and .. as we will always need them.
79 *
80 * XXX(hch): the second argument is sometimes 0 and sometimes
81 * mp->m_dirdatablk.
82 */
83 dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
84 XFS_DIR3_DATA_DOT_OFFSET(mp));
85 dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
86 XFS_DIR3_DATA_DOTDOT_OFFSET(mp));
87
88 /*
89 * Put . entry unless we're starting past it.
90 */
91 if (ctx->pos <= dot_offset) {
92 ctx->pos = dot_offset & 0x7fffffff;
93 if (!dir_emit(ctx, ".", 1, dp->i_ino, DT_DIR))
94 return 0;
95 }
96
97 /*
98 * Put .. entry unless we're starting past it.
99 */
100 if (ctx->pos <= dotdot_offset) {
101 ino = xfs_dir2_sf_get_parent_ino(sfp);
102 ctx->pos = dotdot_offset & 0x7fffffff;
103 if (!dir_emit(ctx, "..", 2, ino, DT_DIR))
104 return 0;
105 }
106
107 /*
108 * Loop while there are more entries and put'ing works.
109 */
110 sfep = xfs_dir2_sf_firstentry(sfp);
111 for (i = 0; i < sfp->count; i++) {
112 off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
113 xfs_dir2_sf_get_offset(sfep));
114
115 if (ctx->pos > off) {
116 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
117 continue;
118 }
119
120 ino = xfs_dir2_sfe_get_ino(sfp, sfep);
121 ctx->pos = off & 0x7fffffff;
122 if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen,
123 ino, DT_UNKNOWN))
124 return 0;
125 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
126 }
127
128 ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
129 0x7fffffff;
130 return 0;
131}
132
133/*
134 * Readdir for block directories.
135 */
136STATIC int
137xfs_dir2_block_getdents(
138 xfs_inode_t *dp, /* incore inode */
139 struct dir_context *ctx)
140{
141 xfs_dir2_data_hdr_t *hdr; /* block header */
142 struct xfs_buf *bp; /* buffer for block */
143 xfs_dir2_block_tail_t *btp; /* block tail */
144 xfs_dir2_data_entry_t *dep; /* block data entry */
145 xfs_dir2_data_unused_t *dup; /* block unused entry */
146 char *endptr; /* end of the data entries */
147 int error; /* error return value */
148 xfs_mount_t *mp; /* filesystem mount point */
149 char *ptr; /* current data entry */
150 int wantoff; /* starting block offset */
151 xfs_off_t cook;
152
153 mp = dp->i_mount;
154 /*
155 * If the block number in the offset is out of range, we're done.
156 */
157 if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk)
158 return 0;
159
160 error = xfs_dir3_block_read(NULL, dp, &bp);
161 if (error)
162 return error;
163
164 /*
165 * Extract the byte offset we start at from the seek pointer.
166 * We'll skip entries before this.
167 */
168 wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos);
169 hdr = bp->b_addr;
170 xfs_dir3_data_check(dp, bp);
171 /*
172 * Set up values for the loop.
173 */
174 btp = xfs_dir2_block_tail_p(mp, hdr);
175 ptr = (char *)xfs_dir3_data_entry_p(hdr);
176 endptr = (char *)xfs_dir2_block_leaf_p(btp);
177
178 /*
179 * Loop over the data portion of the block.
180 * Each object is a real entry (dep) or an unused one (dup).
181 */
182 while (ptr < endptr) {
183 dup = (xfs_dir2_data_unused_t *)ptr;
184 /*
185 * Unused, skip it.
186 */
187 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
188 ptr += be16_to_cpu(dup->length);
189 continue;
190 }
191
192 dep = (xfs_dir2_data_entry_t *)ptr;
193
194 /*
195 * Bump pointer for the next iteration.
196 */
197 ptr += xfs_dir2_data_entsize(dep->namelen);
198 /*
199 * The entry is before the desired starting point, skip it.
200 */
201 if ((char *)dep - (char *)hdr < wantoff)
202 continue;
203
204 cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
205 (char *)dep - (char *)hdr);
206
207 ctx->pos = cook & 0x7fffffff;
208 /*
209 * If it didn't fit, set the final offset to here & return.
210 */
211 if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
212 be64_to_cpu(dep->inumber), DT_UNKNOWN)) {
213 xfs_trans_brelse(NULL, bp);
214 return 0;
215 }
216 }
217
218 /*
219 * Reached the end of the block.
220 * Set the offset to a non-existent block 1 and return.
221 */
222 ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
223 0x7fffffff;
224 xfs_trans_brelse(NULL, bp);
225 return 0;
226}
227
228struct xfs_dir2_leaf_map_info {
229 xfs_extlen_t map_blocks; /* number of fsbs in map */
230 xfs_dablk_t map_off; /* last mapped file offset */
231 int map_size; /* total entries in *map */
232 int map_valid; /* valid entries in *map */
233 int nmap; /* mappings to ask xfs_bmapi */
234 xfs_dir2_db_t curdb; /* db for current block */
235 int ra_current; /* number of read-ahead blks */
236 int ra_index; /* *map index for read-ahead */
237 int ra_offset; /* map entry offset for ra */
238 int ra_want; /* readahead count wanted */
239 struct xfs_bmbt_irec map[]; /* map vector for blocks */
240};
241
242STATIC int
243xfs_dir2_leaf_readbuf(
244 struct xfs_inode *dp,
245 size_t bufsize,
246 struct xfs_dir2_leaf_map_info *mip,
247 xfs_dir2_off_t *curoff,
248 struct xfs_buf **bpp)
249{
250 struct xfs_mount *mp = dp->i_mount;
251 struct xfs_buf *bp = *bpp;
252 struct xfs_bmbt_irec *map = mip->map;
253 struct blk_plug plug;
254 int error = 0;
255 int length;
256 int i;
257 int j;
258
259 /*
260 * If we have a buffer, we need to release it and
261 * take it out of the mapping.
262 */
263
264 if (bp) {
265 xfs_trans_brelse(NULL, bp);
266 bp = NULL;
267 mip->map_blocks -= mp->m_dirblkfsbs;
268 /*
269 * Loop to get rid of the extents for the
270 * directory block.
271 */
272 for (i = mp->m_dirblkfsbs; i > 0; ) {
273 j = min_t(int, map->br_blockcount, i);
274 map->br_blockcount -= j;
275 map->br_startblock += j;
276 map->br_startoff += j;
277 /*
278 * If mapping is done, pitch it from
279 * the table.
280 */
281 if (!map->br_blockcount && --mip->map_valid)
282 memmove(&map[0], &map[1],
283 sizeof(map[0]) * mip->map_valid);
284 i -= j;
285 }
286 }
287
288 /*
289 * Recalculate the readahead blocks wanted.
290 */
291 mip->ra_want = howmany(bufsize + mp->m_dirblksize,
292 mp->m_sb.sb_blocksize) - 1;
293 ASSERT(mip->ra_want >= 0);
294
295 /*
296 * If we don't have as many as we want, and we haven't
297 * run out of data blocks, get some more mappings.
298 */
299 if (1 + mip->ra_want > mip->map_blocks &&
300 mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
301 /*
302 * Get more bmaps, fill in after the ones
303 * we already have in the table.
304 */
305 mip->nmap = mip->map_size - mip->map_valid;
306 error = xfs_bmapi_read(dp, mip->map_off,
307 xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
308 mip->map_off,
309 &map[mip->map_valid], &mip->nmap, 0);
310
311 /*
312 * Don't know if we should ignore this or try to return an
313 * error. The trouble with returning errors is that readdir
314 * will just stop without actually passing the error through.
315 */
316 if (error)
317 goto out; /* XXX */
318
319 /*
320 * If we got all the mappings we asked for, set the final map
321 * offset based on the last bmap value received. Otherwise,
322 * we've reached the end.
323 */
324 if (mip->nmap == mip->map_size - mip->map_valid) {
325 i = mip->map_valid + mip->nmap - 1;
326 mip->map_off = map[i].br_startoff + map[i].br_blockcount;
327 } else
328 mip->map_off = xfs_dir2_byte_to_da(mp,
329 XFS_DIR2_LEAF_OFFSET);
330
331 /*
332 * Look for holes in the mapping, and eliminate them. Count up
333 * the valid blocks.
334 */
335 for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
336 if (map[i].br_startblock == HOLESTARTBLOCK) {
337 mip->nmap--;
338 length = mip->map_valid + mip->nmap - i;
339 if (length)
340 memmove(&map[i], &map[i + 1],
341 sizeof(map[i]) * length);
342 } else {
343 mip->map_blocks += map[i].br_blockcount;
344 i++;
345 }
346 }
347 mip->map_valid += mip->nmap;
348 }
349
350 /*
351 * No valid mappings, so no more data blocks.
352 */
353 if (!mip->map_valid) {
354 *curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
355 goto out;
356 }
357
358 /*
359 * Read the directory block starting at the first mapping.
360 */
361 mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
362 error = xfs_dir3_data_read(NULL, dp, map->br_startoff,
363 map->br_blockcount >= mp->m_dirblkfsbs ?
364 XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp);
365
366 /*
367 * Should just skip over the data block instead of giving up.
368 */
369 if (error)
370 goto out; /* XXX */
371
372 /*
373 * Adjust the current amount of read-ahead: we just read a block that
374 * was previously ra.
375 */
376 if (mip->ra_current)
377 mip->ra_current -= mp->m_dirblkfsbs;
378
379 /*
380 * Do we need more readahead?
381 */
382 blk_start_plug(&plug);
383 for (mip->ra_index = mip->ra_offset = i = 0;
384 mip->ra_want > mip->ra_current && i < mip->map_blocks;
385 i += mp->m_dirblkfsbs) {
386 ASSERT(mip->ra_index < mip->map_valid);
387 /*
388 * Read-ahead a contiguous directory block.
389 */
390 if (i > mip->ra_current &&
391 map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
392 xfs_dir3_data_readahead(NULL, dp,
393 map[mip->ra_index].br_startoff + mip->ra_offset,
394 XFS_FSB_TO_DADDR(mp,
395 map[mip->ra_index].br_startblock +
396 mip->ra_offset));
397 mip->ra_current = i;
398 }
399
400 /*
401 * Read-ahead a non-contiguous directory block. This doesn't
402 * use our mapping, but this is a very rare case.
403 */
404 else if (i > mip->ra_current) {
405 xfs_dir3_data_readahead(NULL, dp,
406 map[mip->ra_index].br_startoff +
407 mip->ra_offset, -1);
408 mip->ra_current = i;
409 }
410
411 /*
412 * Advance offset through the mapping table.
413 */
414 for (j = 0; j < mp->m_dirblkfsbs; j++) {
415 /*
416 * The rest of this extent but not more than a dir
417 * block.
418 */
419 length = min_t(int, mp->m_dirblkfsbs,
420 map[mip->ra_index].br_blockcount -
421 mip->ra_offset);
422 j += length;
423 mip->ra_offset += length;
424
425 /*
426 * Advance to the next mapping if this one is used up.
427 */
428 if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
429 mip->ra_offset = 0;
430 mip->ra_index++;
431 }
432 }
433 }
434 blk_finish_plug(&plug);
435
436out:
437 *bpp = bp;
438 return error;
439}
440
441/*
442 * Getdents (readdir) for leaf and node directories.
443 * This reads the data blocks only, so is the same for both forms.
444 */
445STATIC int
446xfs_dir2_leaf_getdents(
447 xfs_inode_t *dp, /* incore directory inode */
448 struct dir_context *ctx,
449 size_t bufsize)
450{
451 struct xfs_buf *bp = NULL; /* data block buffer */
452 xfs_dir2_data_hdr_t *hdr; /* data block header */
453 xfs_dir2_data_entry_t *dep; /* data entry */
454 xfs_dir2_data_unused_t *dup; /* unused entry */
455 int error = 0; /* error return value */
456 int length; /* temporary length value */
457 xfs_mount_t *mp; /* filesystem mount point */
458 int byteoff; /* offset in current block */
459 xfs_dir2_off_t curoff; /* current overall offset */
460 xfs_dir2_off_t newoff; /* new curoff after new blk */
461 char *ptr = NULL; /* pointer to current data */
462 struct xfs_dir2_leaf_map_info *map_info;
463
464 /*
465 * If the offset is at or past the largest allowed value,
466 * give up right away.
467 */
468 if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)
469 return 0;
470
471 mp = dp->i_mount;
472
473 /*
474 * Set up to bmap a number of blocks based on the caller's
475 * buffer size, the directory block size, and the filesystem
476 * block size.
477 */
478 length = howmany(bufsize + mp->m_dirblksize,
479 mp->m_sb.sb_blocksize);
480 map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
481 (length * sizeof(struct xfs_bmbt_irec)),
482 KM_SLEEP | KM_NOFS);
483 map_info->map_size = length;
484
485 /*
486 * Inside the loop we keep the main offset value as a byte offset
487 * in the directory file.
488 */
489 curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos);
490
491 /*
492 * Force this conversion through db so we truncate the offset
493 * down to get the start of the data block.
494 */
495 map_info->map_off = xfs_dir2_db_to_da(mp,
496 xfs_dir2_byte_to_db(mp, curoff));
497
498 /*
499 * Loop over directory entries until we reach the end offset.
500 * Get more blocks and readahead as necessary.
501 */
502 while (curoff < XFS_DIR2_LEAF_OFFSET) {
503 /*
504 * If we have no buffer, or we're off the end of the
505 * current buffer, need to get another one.
506 */
507 if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
508
509 error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
510 &curoff, &bp);
511 if (error || !map_info->map_valid)
512 break;
513
514 /*
515 * Having done a read, we need to set a new offset.
516 */
517 newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
518 /*
519 * Start of the current block.
520 */
521 if (curoff < newoff)
522 curoff = newoff;
523 /*
524 * Make sure we're in the right block.
525 */
526 else if (curoff > newoff)
527 ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
528 map_info->curdb);
529 hdr = bp->b_addr;
530 xfs_dir3_data_check(dp, bp);
531 /*
532 * Find our position in the block.
533 */
534 ptr = (char *)xfs_dir3_data_entry_p(hdr);
535 byteoff = xfs_dir2_byte_to_off(mp, curoff);
536 /*
537 * Skip past the header.
538 */
539 if (byteoff == 0)
540 curoff += xfs_dir3_data_entry_offset(hdr);
541 /*
542 * Skip past entries until we reach our offset.
543 */
544 else {
545 while ((char *)ptr - (char *)hdr < byteoff) {
546 dup = (xfs_dir2_data_unused_t *)ptr;
547
548 if (be16_to_cpu(dup->freetag)
549 == XFS_DIR2_DATA_FREE_TAG) {
550
551 length = be16_to_cpu(dup->length);
552 ptr += length;
553 continue;
554 }
555 dep = (xfs_dir2_data_entry_t *)ptr;
556 length =
557 xfs_dir2_data_entsize(dep->namelen);
558 ptr += length;
559 }
560 /*
561 * Now set our real offset.
562 */
563 curoff =
564 xfs_dir2_db_off_to_byte(mp,
565 xfs_dir2_byte_to_db(mp, curoff),
566 (char *)ptr - (char *)hdr);
567 if (ptr >= (char *)hdr + mp->m_dirblksize) {
568 continue;
569 }
570 }
571 }
572 /*
573 * We have a pointer to an entry.
574 * Is it a live one?
575 */
576 dup = (xfs_dir2_data_unused_t *)ptr;
577 /*
578 * No, it's unused, skip over it.
579 */
580 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
581 length = be16_to_cpu(dup->length);
582 ptr += length;
583 curoff += length;
584 continue;
585 }
586
587 dep = (xfs_dir2_data_entry_t *)ptr;
588 length = xfs_dir2_data_entsize(dep->namelen);
589
590 ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
591 if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
592 be64_to_cpu(dep->inumber), DT_UNKNOWN))
593 break;
594
595 /*
596 * Advance to next entry in the block.
597 */
598 ptr += length;
599 curoff += length;
600 /* bufsize may have just been a guess; don't go negative */
601 bufsize = bufsize > length ? bufsize - length : 0;
602 }
603
604 /*
605 * All done. Set output offset value to current offset.
606 */
607 if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR))
608 ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
609 else
610 ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
611 kmem_free(map_info);
612 if (bp)
613 xfs_trans_brelse(NULL, bp);
614 return error;
615}
616
617/*
618 * Read a directory.
619 */
620int
621xfs_readdir(
622 xfs_inode_t *dp,
623 struct dir_context *ctx,
624 size_t bufsize)
625{
626 int rval; /* return value */
627 int v; /* type-checking value */
628
629 trace_xfs_readdir(dp);
630
631 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
632 return XFS_ERROR(EIO);
633
634 ASSERT(S_ISDIR(dp->i_d.di_mode));
635 XFS_STATS_INC(xs_dir_getdents);
636
637 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
638 rval = xfs_dir2_sf_getdents(dp, ctx);
639 else if ((rval = xfs_dir2_isblock(NULL, dp, &v)))
640 ;
641 else if (v)
642 rval = xfs_dir2_block_getdents(dp, ctx);
643 else
644 rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize);
645 return rval;
646}