vfs: optimize inode cache access patterns
The inode structure layout is largely random, and some of the vfs paths
really do care. The path lookup in particular is already quite D$
intensive, and profiles show that accessing the 'inode->i_op->xyz'
fields is quite costly.
We already optimized the dcache to not unnecessarily load the d_op
structure for members that are often NULL using the DCACHE_OP_xyz bits
in dentry->d_flags, and this does something very similar for the inode
ops that are used during pathname lookup.
It also re-orders the fields so that the fields accessed by 'stat' are
together at the beginning of the inode structure, and roughly in the
order accessed.
The effect of this seems to be in the 1-2% range for an empty kernel
"make -j" run (which is fairly kernel-intensive, mostly in filename
lookup), so it's visible. The numbers are fairly noisy, though, and
likely depend a lot on exact microarchitecture. So there's more tuning
to be done.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 786b3b1..178cdb4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -738,22 +738,54 @@
struct posix_acl;
#define ACL_NOT_CACHED ((void *)(-1))
+#define IOP_FASTPERM 0x0001
+#define IOP_LOOKUP 0x0002
+#define IOP_NOFOLLOW 0x0004
+
+/*
+ * Keep mostly read-only and often accessed (especially for
+ * the RCU path lookup and 'stat' data) fields at the beginning
+ * of the 'struct inode'
+ */
struct inode {
- /* RCU path lookup touches following: */
umode_t i_mode;
+ unsigned short i_opflags;
uid_t i_uid;
gid_t i_gid;
+ unsigned int i_flags;
+
+#ifdef CONFIG_FS_POSIX_ACL
+ struct posix_acl *i_acl;
+ struct posix_acl *i_default_acl;
+#endif
+
const struct inode_operations *i_op;
struct super_block *i_sb;
+ struct address_space *i_mapping;
- spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
- unsigned int i_flags;
- unsigned long i_state;
#ifdef CONFIG_SECURITY
void *i_security;
#endif
- struct mutex i_mutex;
+ /* Stat data, not accessed from path walking */
+ unsigned long i_ino;
+ unsigned int i_nlink;
+ dev_t i_rdev;
+ loff_t i_size;
+ struct timespec i_atime;
+ struct timespec i_mtime;
+ struct timespec i_ctime;
+ unsigned int i_blkbits;
+ blkcnt_t i_blocks;
+
+#ifdef __NEED_I_SIZE_ORDERED
+ seqcount_t i_size_seqcount;
+#endif
+
+ /* Misc */
+ unsigned long i_state;
+ spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
+ struct mutex i_mutex;
unsigned long dirtied_when; /* jiffies of first dirtying */
@@ -765,25 +797,12 @@
struct list_head i_dentry;
struct rcu_head i_rcu;
};
- unsigned long i_ino;
atomic_t i_count;
- unsigned int i_nlink;
- dev_t i_rdev;
- unsigned int i_blkbits;
u64 i_version;
- loff_t i_size;
-#ifdef __NEED_I_SIZE_ORDERED
- seqcount_t i_size_seqcount;
-#endif
- struct timespec i_atime;
- struct timespec i_mtime;
- struct timespec i_ctime;
- blkcnt_t i_blocks;
unsigned short i_bytes;
atomic_t i_dio_count;
const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
struct file_lock *i_flock;
- struct address_space *i_mapping;
struct address_space i_data;
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
@@ -806,10 +825,6 @@
atomic_t i_readcount; /* struct files open RO */
#endif
atomic_t i_writecount;
-#ifdef CONFIG_FS_POSIX_ACL
- struct posix_acl *i_acl;
- struct posix_acl *i_default_acl;
-#endif
void *i_private; /* fs or device private pointer */
};