fs: allow for more than 2^31 files
Robin Holt tried to boot a 16TB system and found af_unix was overflowing
a 32bit value :
<quote>
We were seeing a failure which prevented boot. The kernel was incapable
of creating either a named pipe or unix domain socket. This comes down
to a common kernel function called unix_create1() which does:
atomic_inc(&unix_nr_socks);
if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
The function get_max_files() is a simple return of files_stat.max_files.
files_stat.max_files is a signed integer and is computed in
fs/file_table.c's files_init().
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = n;
In our case, mempages (total_ram_pages) is approx 3,758,096,384
(0xe0000000). That leaves max_files at approximately 1,503,238,553.
This causes 2 * get_max_files() to integer overflow.
</quote>
Fix is to let /proc/sys/fs/file-nr & /proc/sys/fs/file-max use long
integers, and change af_unix to use an atomic_long_t instead of atomic_t.
get_max_files() is changed to return an unsigned long. get_nr_files() is
changed to return a long.
unix_nr_socks is changed from atomic_t to atomic_long_t, while not
strictly needed to address Robin problem.
Before patch (on a 64bit kernel) :
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
-18446744071562067968
After patch:
# echo 2147483648 >/proc/sys/fs/file-max
# cat /proc/sys/fs/file-max
2147483648
# cat /proc/sys/fs/file-nr
704 0 2147483648
Reported-by: Robin Holt <holt@sgi.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: David Miller <davem@davemloft.net>
Reviewed-by: Robin Holt <holt@sgi.com>
Tested-by: Robin Holt <holt@sgi.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/file_table.c b/fs/file_table.c
index a04bdd8..c3dee38 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -60,7 +60,7 @@
/*
* Return the total number of open files in the system
*/
-static int get_nr_files(void)
+static long get_nr_files(void)
{
return percpu_counter_read_positive(&nr_files);
}
@@ -68,7 +68,7 @@
/*
* Return the maximum number of open files in the system
*/
-int get_max_files(void)
+unsigned long get_max_files(void)
{
return files_stat.max_files;
}
@@ -82,7 +82,7 @@
void __user *buffer, size_t *lenp, loff_t *ppos)
{
files_stat.nr_files = get_nr_files();
- return proc_dointvec(table, write, buffer, lenp, ppos);
+ return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
}
#else
int proc_nr_files(ctl_table *table, int write,
@@ -105,7 +105,7 @@
struct file *get_empty_filp(void)
{
const struct cred *cred = current_cred();
- static int old_max;
+ static long old_max;
struct file * f;
/*
@@ -140,8 +140,7 @@
over:
/* Ran out of filps - report that */
if (get_nr_files() > old_max) {
- printk(KERN_INFO "VFS: file-max limit %d reached\n",
- get_max_files());
+ pr_info("VFS: file-max limit %lu reached\n", get_max_files());
old_max = get_nr_files();
}
goto fail;
@@ -487,7 +486,7 @@
void __init files_init(unsigned long mempages)
{
- int n;
+ unsigned long n;
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -498,9 +497,7 @@
*/
n = (mempages * (PAGE_SIZE / 1024)) / 10;
- files_stat.max_files = n;
- if (files_stat.max_files < NR_FILE)
- files_stat.max_files = NR_FILE;
+ files_stat.max_files = max_t(unsigned long, n, NR_FILE);
files_defer_init();
lg_lock_init(files_lglock);
percpu_counter_init(&nr_files, 0);