Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame^] | 1 | /* |
| 2 | * (C) 2001 Clemson University and The University of Chicago |
| 3 | * |
| 4 | * See COPYING in top-level directory. |
| 5 | */ |
| 6 | |
| 7 | /* |
| 8 | * Linux VFS inode operations. |
| 9 | */ |
| 10 | |
| 11 | #include "protocol.h" |
| 12 | #include "pvfs2-kernel.h" |
| 13 | #include "pvfs2-bufmap.h" |
| 14 | |
| 15 | static int read_one_page(struct page *page) |
| 16 | { |
| 17 | void *page_data; |
| 18 | int ret; |
| 19 | int max_block; |
| 20 | ssize_t bytes_read = 0; |
| 21 | struct inode *inode = page->mapping->host; |
| 22 | const __u32 blocksize = PAGE_CACHE_SIZE; /* inode->i_blksize */ |
| 23 | const __u32 blockbits = PAGE_CACHE_SHIFT; /* inode->i_blkbits */ |
| 24 | |
| 25 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 26 | "pvfs2_readpage called with page %p\n", |
| 27 | page); |
| 28 | page_data = pvfs2_kmap(page); |
| 29 | |
| 30 | max_block = ((inode->i_size / blocksize) + 1); |
| 31 | |
| 32 | if (page->index < max_block) { |
| 33 | loff_t blockptr_offset = (((loff_t) page->index) << blockbits); |
| 34 | |
| 35 | bytes_read = pvfs2_inode_read(inode, |
| 36 | page_data, |
| 37 | blocksize, |
| 38 | &blockptr_offset, |
| 39 | inode->i_size); |
| 40 | } |
| 41 | /* only zero remaining unread portions of the page data */ |
| 42 | if (bytes_read > 0) |
| 43 | memset(page_data + bytes_read, 0, blocksize - bytes_read); |
| 44 | else |
| 45 | memset(page_data, 0, blocksize); |
| 46 | /* takes care of potential aliasing */ |
| 47 | flush_dcache_page(page); |
| 48 | if (bytes_read < 0) { |
| 49 | ret = bytes_read; |
| 50 | SetPageError(page); |
| 51 | } else { |
| 52 | SetPageUptodate(page); |
| 53 | if (PageError(page)) |
| 54 | ClearPageError(page); |
| 55 | ret = 0; |
| 56 | } |
| 57 | pvfs2_kunmap(page); |
| 58 | /* unlock the page after the ->readpage() routine completes */ |
| 59 | unlock_page(page); |
| 60 | return ret; |
| 61 | } |
| 62 | |
| 63 | static int pvfs2_readpage(struct file *file, struct page *page) |
| 64 | { |
| 65 | return read_one_page(page); |
| 66 | } |
| 67 | |
| 68 | static int pvfs2_readpages(struct file *file, |
| 69 | struct address_space *mapping, |
| 70 | struct list_head *pages, |
| 71 | unsigned nr_pages) |
| 72 | { |
| 73 | int page_idx; |
| 74 | int ret; |
| 75 | |
| 76 | gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_readpages called\n"); |
| 77 | |
| 78 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
| 79 | struct page *page; |
| 80 | |
| 81 | page = list_entry(pages->prev, struct page, lru); |
| 82 | list_del(&page->lru); |
| 83 | if (!add_to_page_cache(page, |
| 84 | mapping, |
| 85 | page->index, |
| 86 | GFP_KERNEL)) { |
| 87 | ret = read_one_page(page); |
| 88 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 89 | "failure adding page to cache, read_one_page returned: %d\n", |
| 90 | ret); |
| 91 | } else { |
| 92 | page_cache_release(page); |
| 93 | } |
| 94 | } |
| 95 | BUG_ON(!list_empty(pages)); |
| 96 | return 0; |
| 97 | } |
| 98 | |
| 99 | static void pvfs2_invalidatepage(struct page *page, |
| 100 | unsigned int offset, |
| 101 | unsigned int length) |
| 102 | { |
| 103 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 104 | "pvfs2_invalidatepage called on page %p " |
| 105 | "(offset is %u)\n", |
| 106 | page, |
| 107 | offset); |
| 108 | |
| 109 | ClearPageUptodate(page); |
| 110 | ClearPageMappedToDisk(page); |
| 111 | return; |
| 112 | |
| 113 | } |
| 114 | |
| 115 | static int pvfs2_releasepage(struct page *page, gfp_t foo) |
| 116 | { |
| 117 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 118 | "pvfs2_releasepage called on page %p\n", |
| 119 | page); |
| 120 | return 0; |
| 121 | } |
| 122 | |
| 123 | /* |
| 124 | * Having a direct_IO entry point in the address_space_operations |
| 125 | * struct causes the kernel to allows us to use O_DIRECT on |
| 126 | * open. Nothing will ever call this thing, but in the future we |
| 127 | * will need to be able to use O_DIRECT on open in order to support |
| 128 | * AIO. Modeled after NFS, they do this too. |
| 129 | */ |
| 130 | /* |
| 131 | static ssize_t pvfs2_direct_IO(int rw, |
| 132 | struct kiocb *iocb, |
| 133 | struct iov_iter *iter, |
| 134 | loff_t offset) |
| 135 | { |
| 136 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 137 | "pvfs2_direct_IO: %s\n", |
| 138 | iocb->ki_filp->f_path.dentry->d_name.name); |
| 139 | |
| 140 | return -EINVAL; |
| 141 | } |
| 142 | */ |
| 143 | |
| 144 | struct backing_dev_info pvfs2_backing_dev_info = { |
| 145 | .name = "pvfs2", |
| 146 | .ra_pages = 0, |
| 147 | .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, |
| 148 | }; |
| 149 | |
| 150 | /** PVFS2 implementation of address space operations */ |
| 151 | const struct address_space_operations pvfs2_address_operations = { |
| 152 | .readpage = pvfs2_readpage, |
| 153 | .readpages = pvfs2_readpages, |
| 154 | .invalidatepage = pvfs2_invalidatepage, |
| 155 | .releasepage = pvfs2_releasepage, |
| 156 | /* .direct_IO = pvfs2_direct_IO */ |
| 157 | }; |
| 158 | |
| 159 | static int pvfs2_setattr_size(struct inode *inode, struct iattr *iattr) |
| 160 | { |
| 161 | struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode); |
| 162 | struct pvfs2_kernel_op_s *new_op; |
| 163 | loff_t orig_size = i_size_read(inode); |
| 164 | int ret = -EINVAL; |
| 165 | |
| 166 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 167 | "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n", |
| 168 | __func__, |
| 169 | get_khandle_from_ino(inode), |
| 170 | &pvfs2_inode->refn.khandle, |
| 171 | pvfs2_inode->refn.fs_id, |
| 172 | iattr->ia_size); |
| 173 | |
| 174 | truncate_setsize(inode, iattr->ia_size); |
| 175 | |
| 176 | new_op = op_alloc(PVFS2_VFS_OP_TRUNCATE); |
| 177 | if (!new_op) |
| 178 | return -ENOMEM; |
| 179 | |
| 180 | new_op->upcall.req.truncate.refn = pvfs2_inode->refn; |
| 181 | new_op->upcall.req.truncate.size = (__s64) iattr->ia_size; |
| 182 | |
| 183 | ret = service_operation(new_op, __func__, |
| 184 | get_interruptible_flag(inode)); |
| 185 | |
| 186 | /* |
| 187 | * the truncate has no downcall members to retrieve, but |
| 188 | * the status value tells us if it went through ok or not |
| 189 | */ |
| 190 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 191 | "pvfs2: pvfs2_truncate got return value of %d\n", |
| 192 | ret); |
| 193 | |
| 194 | op_release(new_op); |
| 195 | |
| 196 | if (ret != 0) |
| 197 | return ret; |
| 198 | |
| 199 | /* |
| 200 | * Only change the c/mtime if we are changing the size or we are |
| 201 | * explicitly asked to change it. This handles the semantic difference |
| 202 | * between truncate() and ftruncate() as implemented in the VFS. |
| 203 | * |
| 204 | * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a |
| 205 | * special case where we need to update the times despite not having |
| 206 | * these flags set. For all other operations the VFS set these flags |
| 207 | * explicitly if it wants a timestamp update. |
| 208 | */ |
| 209 | if (orig_size != i_size_read(inode) && |
| 210 | !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { |
| 211 | iattr->ia_ctime = iattr->ia_mtime = |
| 212 | current_fs_time(inode->i_sb); |
| 213 | iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; |
| 214 | } |
| 215 | |
| 216 | return ret; |
| 217 | } |
| 218 | |
| 219 | /* |
| 220 | * Change attributes of an object referenced by dentry. |
| 221 | */ |
| 222 | int pvfs2_setattr(struct dentry *dentry, struct iattr *iattr) |
| 223 | { |
| 224 | int ret = -EINVAL; |
| 225 | struct inode *inode = dentry->d_inode; |
| 226 | |
| 227 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 228 | "pvfs2_setattr: called on %s\n", |
| 229 | dentry->d_name.name); |
| 230 | |
| 231 | ret = inode_change_ok(inode, iattr); |
| 232 | if (ret) |
| 233 | goto out; |
| 234 | |
| 235 | if ((iattr->ia_valid & ATTR_SIZE) && |
| 236 | iattr->ia_size != i_size_read(inode)) { |
| 237 | ret = pvfs2_setattr_size(inode, iattr); |
| 238 | if (ret) |
| 239 | goto out; |
| 240 | } |
| 241 | |
| 242 | setattr_copy(inode, iattr); |
| 243 | mark_inode_dirty(inode); |
| 244 | |
| 245 | ret = pvfs2_inode_setattr(inode, iattr); |
| 246 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 247 | "pvfs2_setattr: inode_setattr returned %d\n", |
| 248 | ret); |
| 249 | |
| 250 | if (!ret && (iattr->ia_valid & ATTR_MODE)) |
| 251 | /* change mod on a file that has ACLs */ |
| 252 | ret = posix_acl_chmod(inode, inode->i_mode); |
| 253 | |
| 254 | out: |
| 255 | gossip_debug(GOSSIP_INODE_DEBUG, "pvfs2_setattr: returning %d\n", ret); |
| 256 | return ret; |
| 257 | } |
| 258 | |
| 259 | /* |
| 260 | * Obtain attributes of an object given a dentry |
| 261 | */ |
| 262 | int pvfs2_getattr(struct vfsmount *mnt, |
| 263 | struct dentry *dentry, |
| 264 | struct kstat *kstat) |
| 265 | { |
| 266 | int ret = -ENOENT; |
| 267 | struct inode *inode = dentry->d_inode; |
| 268 | struct pvfs2_inode_s *pvfs2_inode = NULL; |
| 269 | |
| 270 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 271 | "pvfs2_getattr: called on %s\n", |
| 272 | dentry->d_name.name); |
| 273 | |
| 274 | /* |
| 275 | * Similar to the above comment, a getattr also expects that all |
| 276 | * fields/attributes of the inode would be refreshed. So again, we |
| 277 | * dont have too much of a choice but refresh all the attributes. |
| 278 | */ |
| 279 | ret = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); |
| 280 | if (ret == 0) { |
| 281 | generic_fillattr(inode, kstat); |
| 282 | /* override block size reported to stat */ |
| 283 | pvfs2_inode = PVFS2_I(inode); |
| 284 | kstat->blksize = pvfs2_inode->blksize; |
| 285 | } else { |
| 286 | /* assume an I/O error and flag inode as bad */ |
| 287 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 288 | "%s:%s:%d calling make bad inode\n", |
| 289 | __FILE__, |
| 290 | __func__, |
| 291 | __LINE__); |
| 292 | pvfs2_make_bad_inode(inode); |
| 293 | } |
| 294 | return ret; |
| 295 | } |
| 296 | |
| 297 | /* PVFS2 implementation of VFS inode operations for files */ |
| 298 | struct inode_operations pvfs2_file_inode_operations = { |
| 299 | .get_acl = pvfs2_get_acl, |
| 300 | .set_acl = pvfs2_set_acl, |
| 301 | .setattr = pvfs2_setattr, |
| 302 | .getattr = pvfs2_getattr, |
| 303 | .setxattr = generic_setxattr, |
| 304 | .getxattr = generic_getxattr, |
| 305 | .listxattr = pvfs2_listxattr, |
| 306 | .removexattr = generic_removexattr, |
| 307 | }; |
| 308 | |
| 309 | static int pvfs2_init_iops(struct inode *inode) |
| 310 | { |
| 311 | inode->i_mapping->a_ops = &pvfs2_address_operations; |
| 312 | |
| 313 | switch (inode->i_mode & S_IFMT) { |
| 314 | case S_IFREG: |
| 315 | inode->i_op = &pvfs2_file_inode_operations; |
| 316 | inode->i_fop = &pvfs2_file_operations; |
| 317 | inode->i_blkbits = PAGE_CACHE_SHIFT; |
| 318 | break; |
| 319 | case S_IFLNK: |
| 320 | inode->i_op = &pvfs2_symlink_inode_operations; |
| 321 | break; |
| 322 | case S_IFDIR: |
| 323 | inode->i_op = &pvfs2_dir_inode_operations; |
| 324 | inode->i_fop = &pvfs2_dir_operations; |
| 325 | break; |
| 326 | default: |
| 327 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 328 | "%s: unsupported mode\n", |
| 329 | __func__); |
| 330 | return -EINVAL; |
| 331 | } |
| 332 | |
| 333 | return 0; |
| 334 | } |
| 335 | |
| 336 | /* |
| 337 | * Given a PVFS2 object identifier (fsid, handle), convert it into a ino_t type |
| 338 | * that will be used as a hash-index from where the handle will |
| 339 | * be searched for in the VFS hash table of inodes. |
| 340 | */ |
| 341 | static inline ino_t pvfs2_handle_hash(struct pvfs2_object_kref *ref) |
| 342 | { |
| 343 | if (!ref) |
| 344 | return 0; |
| 345 | return pvfs2_khandle_to_ino(&(ref->khandle)); |
| 346 | } |
| 347 | |
| 348 | /* |
| 349 | * Called to set up an inode from iget5_locked. |
| 350 | */ |
| 351 | static int pvfs2_set_inode(struct inode *inode, void *data) |
| 352 | { |
| 353 | struct pvfs2_object_kref *ref = (struct pvfs2_object_kref *) data; |
| 354 | struct pvfs2_inode_s *pvfs2_inode = NULL; |
| 355 | |
| 356 | /* Make sure that we have sane parameters */ |
| 357 | if (!data || !inode) |
| 358 | return 0; |
| 359 | pvfs2_inode = PVFS2_I(inode); |
| 360 | if (!pvfs2_inode) |
| 361 | return 0; |
| 362 | pvfs2_inode->refn.fs_id = ref->fs_id; |
| 363 | pvfs2_inode->refn.khandle = ref->khandle; |
| 364 | return 0; |
| 365 | } |
| 366 | |
| 367 | /* |
| 368 | * Called to determine if handles match. |
| 369 | */ |
| 370 | static int pvfs2_test_inode(struct inode *inode, void *data) |
| 371 | { |
| 372 | struct pvfs2_object_kref *ref = (struct pvfs2_object_kref *) data; |
| 373 | struct pvfs2_inode_s *pvfs2_inode = NULL; |
| 374 | |
| 375 | pvfs2_inode = PVFS2_I(inode); |
| 376 | return (!PVFS_khandle_cmp(&(pvfs2_inode->refn.khandle), &(ref->khandle)) |
| 377 | && pvfs2_inode->refn.fs_id == ref->fs_id); |
| 378 | } |
| 379 | |
| 380 | /* |
| 381 | * Front-end to lookup the inode-cache maintained by the VFS using the PVFS2 |
| 382 | * file handle. |
| 383 | * |
| 384 | * @sb: the file system super block instance. |
| 385 | * @ref: The PVFS2 object for which we are trying to locate an inode structure. |
| 386 | */ |
| 387 | struct inode *pvfs2_iget(struct super_block *sb, struct pvfs2_object_kref *ref) |
| 388 | { |
| 389 | struct inode *inode = NULL; |
| 390 | unsigned long hash; |
| 391 | int error; |
| 392 | |
| 393 | hash = pvfs2_handle_hash(ref); |
| 394 | inode = iget5_locked(sb, hash, pvfs2_test_inode, pvfs2_set_inode, ref); |
| 395 | if (!inode || !(inode->i_state & I_NEW)) |
| 396 | return inode; |
| 397 | |
| 398 | error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); |
| 399 | if (error) { |
| 400 | iget_failed(inode); |
| 401 | return ERR_PTR(error); |
| 402 | } |
| 403 | |
| 404 | inode->i_ino = hash; /* needed for stat etc */ |
| 405 | pvfs2_init_iops(inode); |
| 406 | unlock_new_inode(inode); |
| 407 | |
| 408 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 409 | "iget handle %pU, fsid %d hash %ld i_ino %lu\n", |
| 410 | &ref->khandle, |
| 411 | ref->fs_id, |
| 412 | hash, |
| 413 | inode->i_ino); |
| 414 | |
| 415 | return inode; |
| 416 | } |
| 417 | |
| 418 | /* |
| 419 | * Allocate an inode for a newly created file and insert it into the inode hash. |
| 420 | */ |
| 421 | struct inode *pvfs2_new_inode(struct super_block *sb, struct inode *dir, |
| 422 | int mode, dev_t dev, struct pvfs2_object_kref *ref) |
| 423 | { |
| 424 | unsigned long hash = pvfs2_handle_hash(ref); |
| 425 | struct inode *inode; |
| 426 | int error; |
| 427 | |
| 428 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 429 | "pvfs2_get_custom_inode_common: called\n" |
| 430 | "(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n", |
| 431 | sb, |
| 432 | MAJOR(dev), |
| 433 | MINOR(dev), |
| 434 | mode); |
| 435 | |
| 436 | inode = new_inode(sb); |
| 437 | if (!inode) |
| 438 | return NULL; |
| 439 | |
| 440 | pvfs2_set_inode(inode, ref); |
| 441 | inode->i_ino = hash; /* needed for stat etc */ |
| 442 | |
| 443 | error = pvfs2_inode_getattr(inode, PVFS_ATTR_SYS_ALL_NOHINT); |
| 444 | if (error) |
| 445 | goto out_iput; |
| 446 | |
| 447 | pvfs2_init_iops(inode); |
| 448 | |
| 449 | inode->i_mode = mode; |
| 450 | inode->i_uid = current_fsuid(); |
| 451 | inode->i_gid = current_fsgid(); |
| 452 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
| 453 | inode->i_size = PAGE_CACHE_SIZE; |
| 454 | inode->i_rdev = dev; |
| 455 | |
| 456 | error = insert_inode_locked4(inode, hash, pvfs2_test_inode, ref); |
| 457 | if (error < 0) |
| 458 | goto out_iput; |
| 459 | |
| 460 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 461 | "Initializing ACL's for inode %pU\n", |
| 462 | get_khandle_from_ino(inode)); |
| 463 | pvfs2_init_acl(inode, dir); |
| 464 | return inode; |
| 465 | |
| 466 | out_iput: |
| 467 | iput(inode); |
| 468 | return ERR_PTR(error); |
| 469 | } |