Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * linux/fs/file.c |
| 3 | * |
| 4 | * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes |
| 5 | * |
| 6 | * Manage the dynamic fd arrays in the process files_struct. |
| 7 | */ |
| 8 | |
| 9 | #include <linux/fs.h> |
| 10 | #include <linux/mm.h> |
| 11 | #include <linux/time.h> |
| 12 | #include <linux/slab.h> |
| 13 | #include <linux/vmalloc.h> |
| 14 | #include <linux/file.h> |
| 15 | #include <linux/bitops.h> |
| 16 | |
| 17 | |
| 18 | /* |
| 19 | * Allocate an fd array, using kmalloc or vmalloc. |
| 20 | * Note: the array isn't cleared at allocation time. |
| 21 | */ |
| 22 | struct file ** alloc_fd_array(int num) |
| 23 | { |
| 24 | struct file **new_fds; |
| 25 | int size = num * sizeof(struct file *); |
| 26 | |
| 27 | if (size <= PAGE_SIZE) |
| 28 | new_fds = (struct file **) kmalloc(size, GFP_KERNEL); |
| 29 | else |
| 30 | new_fds = (struct file **) vmalloc(size); |
| 31 | return new_fds; |
| 32 | } |
| 33 | |
| 34 | void free_fd_array(struct file **array, int num) |
| 35 | { |
| 36 | int size = num * sizeof(struct file *); |
| 37 | |
| 38 | if (!array) { |
| 39 | printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num); |
| 40 | return; |
| 41 | } |
| 42 | |
| 43 | if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */ |
| 44 | return; |
| 45 | else if (size <= PAGE_SIZE) |
| 46 | kfree(array); |
| 47 | else |
| 48 | vfree(array); |
| 49 | } |
| 50 | |
| 51 | /* |
| 52 | * Expand the fd array in the files_struct. Called with the files |
| 53 | * spinlock held for write. |
| 54 | */ |
| 55 | |
| 56 | static int expand_fd_array(struct files_struct *files, int nr) |
| 57 | __releases(files->file_lock) |
| 58 | __acquires(files->file_lock) |
| 59 | { |
| 60 | struct file **new_fds; |
| 61 | int error, nfds; |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 62 | struct fdtable *fdt; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 63 | |
| 64 | |
| 65 | error = -EMFILE; |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 66 | fdt = files_fdtable(files); |
| 67 | if (fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 68 | goto out; |
| 69 | |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 70 | nfds = fdt->max_fds; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 71 | spin_unlock(&files->file_lock); |
| 72 | |
| 73 | /* |
| 74 | * Expand to the max in easy steps, and keep expanding it until |
| 75 | * we have enough for the requested fd array size. |
| 76 | */ |
| 77 | |
| 78 | do { |
| 79 | #if NR_OPEN_DEFAULT < 256 |
| 80 | if (nfds < 256) |
| 81 | nfds = 256; |
| 82 | else |
| 83 | #endif |
| 84 | if (nfds < (PAGE_SIZE / sizeof(struct file *))) |
| 85 | nfds = PAGE_SIZE / sizeof(struct file *); |
| 86 | else { |
| 87 | nfds = nfds * 2; |
| 88 | if (nfds > NR_OPEN) |
| 89 | nfds = NR_OPEN; |
| 90 | } |
| 91 | } while (nfds <= nr); |
| 92 | |
| 93 | error = -ENOMEM; |
| 94 | new_fds = alloc_fd_array(nfds); |
| 95 | spin_lock(&files->file_lock); |
| 96 | if (!new_fds) |
| 97 | goto out; |
| 98 | |
| 99 | /* Copy the existing array and install the new pointer */ |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 100 | fdt = files_fdtable(files); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 101 | |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 102 | if (nfds > fdt->max_fds) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 103 | struct file **old_fds; |
| 104 | int i; |
| 105 | |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 106 | old_fds = xchg(&fdt->fd, new_fds); |
| 107 | i = xchg(&fdt->max_fds, nfds); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 108 | |
| 109 | /* Don't copy/clear the array if we are creating a new |
| 110 | fd array for fork() */ |
| 111 | if (i) { |
| 112 | memcpy(new_fds, old_fds, i * sizeof(struct file *)); |
| 113 | /* clear the remainder of the array */ |
| 114 | memset(&new_fds[i], 0, |
| 115 | (nfds-i) * sizeof(struct file *)); |
| 116 | |
| 117 | spin_unlock(&files->file_lock); |
| 118 | free_fd_array(old_fds, i); |
| 119 | spin_lock(&files->file_lock); |
| 120 | } |
| 121 | } else { |
| 122 | /* Somebody expanded the array while we slept ... */ |
| 123 | spin_unlock(&files->file_lock); |
| 124 | free_fd_array(new_fds, nfds); |
| 125 | spin_lock(&files->file_lock); |
| 126 | } |
| 127 | error = 0; |
| 128 | out: |
| 129 | return error; |
| 130 | } |
| 131 | |
| 132 | /* |
| 133 | * Allocate an fdset array, using kmalloc or vmalloc. |
| 134 | * Note: the array isn't cleared at allocation time. |
| 135 | */ |
| 136 | fd_set * alloc_fdset(int num) |
| 137 | { |
| 138 | fd_set *new_fdset; |
| 139 | int size = num / 8; |
| 140 | |
| 141 | if (size <= PAGE_SIZE) |
| 142 | new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL); |
| 143 | else |
| 144 | new_fdset = (fd_set *) vmalloc(size); |
| 145 | return new_fdset; |
| 146 | } |
| 147 | |
| 148 | void free_fdset(fd_set *array, int num) |
| 149 | { |
| 150 | int size = num / 8; |
| 151 | |
| 152 | if (num <= __FD_SETSIZE) /* Don't free an embedded fdset */ |
| 153 | return; |
| 154 | else if (size <= PAGE_SIZE) |
| 155 | kfree(array); |
| 156 | else |
| 157 | vfree(array); |
| 158 | } |
| 159 | |
| 160 | /* |
| 161 | * Expand the fdset in the files_struct. Called with the files spinlock |
| 162 | * held for write. |
| 163 | */ |
| 164 | static int expand_fdset(struct files_struct *files, int nr) |
| 165 | __releases(file->file_lock) |
| 166 | __acquires(file->file_lock) |
| 167 | { |
| 168 | fd_set *new_openset = NULL, *new_execset = NULL; |
| 169 | int error, nfds = 0; |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 170 | struct fdtable *fdt; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 171 | |
| 172 | error = -EMFILE; |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 173 | fdt = files_fdtable(files); |
| 174 | if (fdt->max_fdset >= NR_OPEN || nr >= NR_OPEN) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 175 | goto out; |
| 176 | |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 177 | nfds = fdt->max_fdset; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 178 | spin_unlock(&files->file_lock); |
| 179 | |
| 180 | /* Expand to the max in easy steps */ |
| 181 | do { |
| 182 | if (nfds < (PAGE_SIZE * 8)) |
| 183 | nfds = PAGE_SIZE * 8; |
| 184 | else { |
| 185 | nfds = nfds * 2; |
| 186 | if (nfds > NR_OPEN) |
| 187 | nfds = NR_OPEN; |
| 188 | } |
| 189 | } while (nfds <= nr); |
| 190 | |
| 191 | error = -ENOMEM; |
| 192 | new_openset = alloc_fdset(nfds); |
| 193 | new_execset = alloc_fdset(nfds); |
| 194 | spin_lock(&files->file_lock); |
| 195 | if (!new_openset || !new_execset) |
| 196 | goto out; |
| 197 | |
| 198 | error = 0; |
| 199 | |
| 200 | /* Copy the existing tables and install the new pointers */ |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 201 | fdt = files_fdtable(files); |
| 202 | if (nfds > fdt->max_fdset) { |
| 203 | int i = fdt->max_fdset / (sizeof(unsigned long) * 8); |
| 204 | int count = (nfds - fdt->max_fdset) / 8; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 205 | |
| 206 | /* |
| 207 | * Don't copy the entire array if the current fdset is |
| 208 | * not yet initialised. |
| 209 | */ |
| 210 | if (i) { |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 211 | memcpy (new_openset, fdt->open_fds, fdt->max_fdset/8); |
| 212 | memcpy (new_execset, fdt->close_on_exec, fdt->max_fdset/8); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 213 | memset (&new_openset->fds_bits[i], 0, count); |
| 214 | memset (&new_execset->fds_bits[i], 0, count); |
| 215 | } |
| 216 | |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 217 | nfds = xchg(&fdt->max_fdset, nfds); |
| 218 | new_openset = xchg(&fdt->open_fds, new_openset); |
| 219 | new_execset = xchg(&fdt->close_on_exec, new_execset); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 220 | spin_unlock(&files->file_lock); |
| 221 | free_fdset (new_openset, nfds); |
| 222 | free_fdset (new_execset, nfds); |
| 223 | spin_lock(&files->file_lock); |
| 224 | return 0; |
| 225 | } |
| 226 | /* Somebody expanded the array while we slept ... */ |
| 227 | |
| 228 | out: |
| 229 | spin_unlock(&files->file_lock); |
| 230 | if (new_openset) |
| 231 | free_fdset(new_openset, nfds); |
| 232 | if (new_execset) |
| 233 | free_fdset(new_execset, nfds); |
| 234 | spin_lock(&files->file_lock); |
| 235 | return error; |
| 236 | } |
| 237 | |
| 238 | /* |
| 239 | * Expand files. |
| 240 | * Return <0 on error; 0 nothing done; 1 files expanded, we may have blocked. |
| 241 | * Should be called with the files->file_lock spinlock held for write. |
| 242 | */ |
| 243 | int expand_files(struct files_struct *files, int nr) |
| 244 | { |
| 245 | int err, expand = 0; |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 246 | struct fdtable *fdt; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 247 | |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 248 | fdt = files_fdtable(files); |
| 249 | if (nr >= fdt->max_fdset) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 250 | expand = 1; |
| 251 | if ((err = expand_fdset(files, nr))) |
| 252 | goto out; |
| 253 | } |
Dipankar Sarma | badf166 | 2005-09-09 13:04:10 -0700 | [diff] [blame] | 254 | if (nr >= fdt->max_fds) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 255 | expand = 1; |
| 256 | if ((err = expand_fd_array(files, nr))) |
| 257 | goto out; |
| 258 | } |
| 259 | err = expand; |
| 260 | out: |
| 261 | return err; |
| 262 | } |