| /* |
| * Copyright 2017 Omnibond Systems, L.L.C. |
| */ |
| |
| #include "protocol.h" |
| #include "orangefs-kernel.h" |
| #include "orangefs-bufmap.h" |
| |
| struct orangefs_dir_part { |
| struct orangefs_dir_part *next; |
| size_t len; |
| }; |
| |
| struct orangefs_dir { |
| __u64 token; |
| struct orangefs_dir_part *part; |
| loff_t end; |
| int error; |
| }; |
| |
| #define PART_SHIFT (24) |
| #define PART_SIZE (1<<24) |
| #define PART_MASK (~(PART_SIZE - 1)) |
| |
| /* |
| * There can be up to 512 directory entries. Each entry is encoded as |
| * follows: |
| * 4 bytes: string size (n) |
| * n bytes: string |
| * 1 byte: trailing zero |
| * padding to 8 bytes |
| * 16 bytes: khandle |
| * padding to 8 bytes |
| * |
| * The trailer_buf starts with a struct orangefs_readdir_response_s |
| * which must be skipped to get to the directory data. |
| * |
| * The data which is received from the userspace daemon is termed a |
| * part and is stored in a linked list in case more than one part is |
| * needed for a large directory. |
| * |
| * The position pointer (ctx->pos) encodes the part and offset on which |
| * to begin reading at. Bits above PART_SHIFT encode the part and bits |
| * below PART_SHIFT encode the offset. Parts are stored in a linked |
| * list which grows as data is received from the server. The overhead |
| * associated with managing the list is presumed to be small compared to |
| * the overhead of communicating with the server. |
| * |
| * As data is received from the server, it is placed at the end of the |
| * part list. Data is parsed from the current position as it is needed. |
| * When data is determined to be corrupt, it is either because the |
| * userspace component has sent back corrupt data or because the file |
| * pointer has been moved to an invalid location. Since the two cannot |
| * be differentiated, return EIO. |
| * |
| * Part zero is synthesized to contains `.' and `..'. Part one is the |
| * first part of the part list. |
| */ |
| |
| static int do_readdir(struct orangefs_inode_s *oi, |
| struct orangefs_dir *od, struct dentry *dentry, |
| struct orangefs_kernel_op_s *op) |
| { |
| struct orangefs_readdir_response_s *resp; |
| int bufi, r; |
| |
| /* |
| * Despite the badly named field, readdir does not use shared |
| * memory. However, there are a limited number of readdir |
| * slots, which must be allocated here. This flag simply tells |
| * the op scheduler to return the op here for retry. |
| */ |
| op->uses_shared_memory = 1; |
| op->upcall.req.readdir.refn = oi->refn; |
| op->upcall.req.readdir.token = od->token; |
| op->upcall.req.readdir.max_dirent_count = |
| ORANGEFS_MAX_DIRENT_COUNT_READDIR; |
| |
| again: |
| bufi = orangefs_readdir_index_get(); |
| if (bufi < 0) { |
| od->error = bufi; |
| return bufi; |
| } |
| |
| op->upcall.req.readdir.buf_index = bufi; |
| |
| r = service_operation(op, "orangefs_readdir", |
| get_interruptible_flag(dentry->d_inode)); |
| |
| orangefs_readdir_index_put(bufi); |
| |
| if (op_state_purged(op)) { |
| if (r == -EAGAIN) { |
| vfree(op->downcall.trailer_buf); |
| goto again; |
| } else if (r == -EIO) { |
| vfree(op->downcall.trailer_buf); |
| od->error = r; |
| return r; |
| } |
| } |
| |
| if (r < 0) { |
| vfree(op->downcall.trailer_buf); |
| od->error = r; |
| return r; |
| } else if (op->downcall.status) { |
| vfree(op->downcall.trailer_buf); |
| od->error = op->downcall.status; |
| return op->downcall.status; |
| } |
| |
| /* |
| * The maximum size is size per entry times the 512 entries plus |
| * the header. This is well under the limit. |
| */ |
| if (op->downcall.trailer_size > PART_SIZE) { |
| vfree(op->downcall.trailer_buf); |
| od->error = -EIO; |
| return -EIO; |
| } |
| |
| resp = (struct orangefs_readdir_response_s *) |
| op->downcall.trailer_buf; |
| od->token = resp->token; |
| return 0; |
| } |
| |
| static int parse_readdir(struct orangefs_dir *od, |
| struct orangefs_kernel_op_s *op) |
| { |
| struct orangefs_dir_part *part, *new; |
| size_t count; |
| |
| count = 1; |
| part = od->part; |
| while (part && part->next) { |
| part = part->next; |
| count++; |
| } |
| |
| new = (void *)op->downcall.trailer_buf; |
| new->next = NULL; |
| new->len = op->downcall.trailer_size - |
| sizeof(struct orangefs_readdir_response_s); |
| if (!od->part) |
| od->part = new; |
| else |
| part->next = new; |
| count++; |
| od->end = count << PART_SHIFT; |
| |
| return 0; |
| } |
| |
| static int orangefs_dir_more(struct orangefs_inode_s *oi, |
| struct orangefs_dir *od, struct dentry *dentry) |
| { |
| struct orangefs_kernel_op_s *op; |
| int r; |
| |
| op = op_alloc(ORANGEFS_VFS_OP_READDIR); |
| if (!op) { |
| od->error = -ENOMEM; |
| return -ENOMEM; |
| } |
| r = do_readdir(oi, od, dentry, op); |
| if (r) { |
| od->error = r; |
| goto out; |
| } |
| r = parse_readdir(od, op); |
| if (r) { |
| od->error = r; |
| goto out; |
| } |
| |
| od->error = 0; |
| out: |
| op_release(op); |
| return od->error; |
| } |
| |
| static int fill_from_part(struct orangefs_dir_part *part, |
| struct dir_context *ctx) |
| { |
| const int offset = sizeof(struct orangefs_readdir_response_s); |
| struct orangefs_khandle *khandle; |
| __u32 *len, padlen; |
| loff_t i; |
| char *s; |
| i = ctx->pos & ~PART_MASK; |
| |
| /* The file offset from userspace is too large. */ |
| if (i > part->len) |
| return -EIO; |
| |
| while (i < part->len) { |
| if (part->len < i + sizeof *len) |
| return -EIO; |
| len = (void *)part + offset + i; |
| /* |
| * len is the size of the string itself. padlen is the |
| * total size of the encoded string. |
| */ |
| padlen = (sizeof *len + *len + 1) + |
| (8 - (sizeof *len + *len + 1)%8)%8; |
| if (part->len < i + padlen + sizeof *khandle) |
| return -EIO; |
| s = (void *)part + offset + i + sizeof *len; |
| if (s[*len] != 0) |
| return -EIO; |
| khandle = (void *)part + offset + i + padlen; |
| if (!dir_emit(ctx, s, *len, |
| orangefs_khandle_to_ino(khandle), |
| DT_UNKNOWN)) |
| return 0; |
| i += padlen + sizeof *khandle; |
| i = i + (8 - i%8)%8; |
| BUG_ON(i > part->len); |
| ctx->pos = (ctx->pos & PART_MASK) | i; |
| } |
| return 1; |
| } |
| |
| static int orangefs_dir_fill(struct orangefs_inode_s *oi, |
| struct orangefs_dir *od, struct dentry *dentry, |
| struct dir_context *ctx) |
| { |
| struct orangefs_dir_part *part; |
| size_t count; |
| |
| count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1; |
| |
| part = od->part; |
| while (part->next && count) { |
| count--; |
| part = part->next; |
| } |
| /* This means the userspace file offset is invalid. */ |
| if (count) { |
| od->error = -EIO; |
| return -EIO; |
| } |
| |
| while (part && part->len) { |
| int r; |
| r = fill_from_part(part, ctx); |
| if (r < 0) { |
| od->error = r; |
| return r; |
| } else if (r == 0) { |
| /* Userspace buffer is full. */ |
| break; |
| } else { |
| /* |
| * The part ran out of data. Move to the next |
| * part. */ |
| ctx->pos = (ctx->pos & PART_MASK) + |
| (1 << PART_SHIFT); |
| part = part->next; |
| } |
| } |
| return 0; |
| } |
| |
| static int orangefs_dir_iterate(struct file *file, |
| struct dir_context *ctx) |
| { |
| struct orangefs_inode_s *oi; |
| struct orangefs_dir *od; |
| struct dentry *dentry; |
| int r; |
| |
| dentry = file->f_path.dentry; |
| oi = ORANGEFS_I(dentry->d_inode); |
| od = file->private_data; |
| |
| if (od->error) |
| return od->error; |
| |
| if (ctx->pos == 0) { |
| if (!dir_emit_dot(file, ctx)) |
| return 0; |
| ctx->pos++; |
| } |
| if (ctx->pos == 1) { |
| if (!dir_emit_dotdot(file, ctx)) |
| return 0; |
| ctx->pos = 1 << PART_SHIFT; |
| } |
| |
| /* |
| * The seek position is in the first synthesized part but is not |
| * valid. |
| */ |
| if ((ctx->pos & PART_MASK) == 0) |
| return -EIO; |
| |
| r = 0; |
| |
| /* |
| * Must read more if the user has sought past what has been read |
| * so far. Stop a user who has sought past the end. |
| */ |
| while (od->token != ORANGEFS_READDIR_END && |
| ctx->pos > od->end) { |
| r = orangefs_dir_more(oi, od, dentry); |
| if (r) |
| return r; |
| } |
| if (od->token == ORANGEFS_READDIR_END && ctx->pos > od->end) |
| return -EIO; |
| |
| /* Then try to fill if there's any left in the buffer. */ |
| if (ctx->pos < od->end) { |
| r = orangefs_dir_fill(oi, od, dentry, ctx); |
| if (r) |
| return r; |
| } |
| |
| /* Finally get some more and try to fill. */ |
| if (od->token != ORANGEFS_READDIR_END) { |
| r = orangefs_dir_more(oi, od, dentry); |
| if (r) |
| return r; |
| r = orangefs_dir_fill(oi, od, dentry, ctx); |
| } |
| |
| return r; |
| } |
| |
| static int orangefs_dir_open(struct inode *inode, struct file *file) |
| { |
| struct orangefs_dir *od; |
| file->private_data = kmalloc(sizeof(struct orangefs_dir), |
| GFP_KERNEL); |
| if (!file->private_data) |
| return -ENOMEM; |
| od = file->private_data; |
| od->token = ORANGEFS_READDIR_START; |
| od->part = NULL; |
| od->end = 1 << PART_SHIFT; |
| od->error = 0; |
| return 0; |
| } |
| |
| static int orangefs_dir_release(struct inode *inode, struct file *file) |
| { |
| struct orangefs_dir *od = file->private_data; |
| struct orangefs_dir_part *part = od->part; |
| orangefs_flush_inode(inode); |
| while (part) { |
| struct orangefs_dir_part *next = part->next; |
| vfree(part); |
| part = next; |
| } |
| kfree(od); |
| return 0; |
| } |
| |
| const struct file_operations orangefs_dir_operations = { |
| .llseek = default_llseek, |
| .read = generic_read_dir, |
| .iterate = orangefs_dir_iterate, |
| .open = orangefs_dir_open, |
| .release = orangefs_dir_release |
| }; |