blob: 6c6315d04028711151627a09a29c186fec7d856a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * proc/fs/generic.c --- generic routines for the proc-fs
3 *
4 * This file contains generic proc-fs routines for handling
5 * directories and files.
6 *
7 * Copyright (C) 1991, 1992 Linus Torvalds.
8 * Copyright (C) 1997 Theodore Ts'o
9 */
10
11#include <linux/errno.h>
12#include <linux/time.h>
13#include <linux/proc_fs.h>
14#include <linux/stat.h>
15#include <linux/module.h>
16#include <linux/mount.h>
17#include <linux/smp_lock.h>
18#include <linux/init.h>
19#include <linux/idr.h>
20#include <linux/namei.h>
21#include <linux/bitops.h>
22#include <asm/uaccess.h>
23
24static ssize_t proc_file_read(struct file *file, char __user *buf,
25 size_t nbytes, loff_t *ppos);
26static ssize_t proc_file_write(struct file *file, const char __user *buffer,
27 size_t count, loff_t *ppos);
28static loff_t proc_file_lseek(struct file *, loff_t, int);
29
30int proc_match(int len, const char *name, struct proc_dir_entry *de)
31{
32 if (de->namelen != len)
33 return 0;
34 return !memcmp(name, de->name, len);
35}
36
37static struct file_operations proc_file_operations = {
38 .llseek = proc_file_lseek,
39 .read = proc_file_read,
40 .write = proc_file_write,
41};
42
43/* buffer size is one page but our output routines use some slack for overruns */
44#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
45
46static ssize_t
47proc_file_read(struct file *file, char __user *buf, size_t nbytes,
48 loff_t *ppos)
49{
50 struct inode * inode = file->f_dentry->d_inode;
51 char *page;
52 ssize_t retval=0;
53 int eof=0;
54 ssize_t n, count;
55 char *start;
56 struct proc_dir_entry * dp;
57
58 dp = PDE(inode);
59 if (!(page = (char*) __get_free_page(GFP_KERNEL)))
60 return -ENOMEM;
61
62 while ((nbytes > 0) && !eof) {
63 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
64
65 start = NULL;
66 if (dp->get_info) {
67 /* Handle old net routines */
68 n = dp->get_info(page, &start, *ppos, count);
69 if (n < count)
70 eof = 1;
71 } else if (dp->read_proc) {
72 /*
73 * How to be a proc read function
74 * ------------------------------
75 * Prototype:
76 * int f(char *buffer, char **start, off_t offset,
77 * int count, int *peof, void *dat)
78 *
79 * Assume that the buffer is "count" bytes in size.
80 *
81 * If you know you have supplied all the data you
82 * have, set *peof.
83 *
84 * You have three ways to return data:
85 * 0) Leave *start = NULL. (This is the default.)
86 * Put the data of the requested offset at that
87 * offset within the buffer. Return the number (n)
88 * of bytes there are from the beginning of the
89 * buffer up to the last byte of data. If the
90 * number of supplied bytes (= n - offset) is
91 * greater than zero and you didn't signal eof
92 * and the reader is prepared to take more data
93 * you will be called again with the requested
94 * offset advanced by the number of bytes
95 * absorbed. This interface is useful for files
96 * no larger than the buffer.
97 * 1) Set *start = an unsigned long value less than
98 * the buffer address but greater than zero.
99 * Put the data of the requested offset at the
100 * beginning of the buffer. Return the number of
101 * bytes of data placed there. If this number is
102 * greater than zero and you didn't signal eof
103 * and the reader is prepared to take more data
104 * you will be called again with the requested
105 * offset advanced by *start. This interface is
106 * useful when you have a large file consisting
107 * of a series of blocks which you want to count
108 * and return as wholes.
109 * (Hack by Paul.Russell@rustcorp.com.au)
110 * 2) Set *start = an address within the buffer.
111 * Put the data of the requested offset at *start.
112 * Return the number of bytes of data placed there.
113 * If this number is greater than zero and you
114 * didn't signal eof and the reader is prepared to
115 * take more data you will be called again with the
116 * requested offset advanced by the number of bytes
117 * absorbed.
118 */
119 n = dp->read_proc(page, &start, *ppos,
120 count, &eof, dp->data);
121 } else
122 break;
123
124 if (n == 0) /* end of file */
125 break;
126 if (n < 0) { /* error */
127 if (retval == 0)
128 retval = n;
129 break;
130 }
131
132 if (start == NULL) {
133 if (n > PAGE_SIZE) {
134 printk(KERN_ERR
135 "proc_file_read: Apparent buffer overflow!\n");
136 n = PAGE_SIZE;
137 }
138 n -= *ppos;
139 if (n <= 0)
140 break;
141 if (n > count)
142 n = count;
143 start = page + *ppos;
144 } else if (start < page) {
145 if (n > PAGE_SIZE) {
146 printk(KERN_ERR
147 "proc_file_read: Apparent buffer overflow!\n");
148 n = PAGE_SIZE;
149 }
150 if (n > count) {
151 /*
152 * Don't reduce n because doing so might
153 * cut off part of a data block.
154 */
155 printk(KERN_WARNING
156 "proc_file_read: Read count exceeded\n");
157 }
158 } else /* start >= page */ {
159 unsigned long startoff = (unsigned long)(start - page);
160 if (n > (PAGE_SIZE - startoff)) {
161 printk(KERN_ERR
162 "proc_file_read: Apparent buffer overflow!\n");
163 n = PAGE_SIZE - startoff;
164 }
165 if (n > count)
166 n = count;
167 }
168
169 n -= copy_to_user(buf, start < page ? page : start, n);
170 if (n == 0) {
171 if (retval == 0)
172 retval = -EFAULT;
173 break;
174 }
175
176 *ppos += start < page ? (unsigned long)start : n;
177 nbytes -= n;
178 buf += n;
179 retval += n;
180 }
181 free_page((unsigned long) page);
182 return retval;
183}
184
185static ssize_t
186proc_file_write(struct file *file, const char __user *buffer,
187 size_t count, loff_t *ppos)
188{
189 struct inode *inode = file->f_dentry->d_inode;
190 struct proc_dir_entry * dp;
191
192 dp = PDE(inode);
193
194 if (!dp->write_proc)
195 return -EIO;
196
197 /* FIXME: does this routine need ppos? probably... */
198 return dp->write_proc(file, buffer, count, dp->data);
199}
200
201
202static loff_t
203proc_file_lseek(struct file *file, loff_t offset, int orig)
204{
205 lock_kernel();
206
207 switch (orig) {
208 case 0:
209 if (offset < 0)
210 goto out;
211 file->f_pos = offset;
212 unlock_kernel();
213 return(file->f_pos);
214 case 1:
215 if (offset + file->f_pos < 0)
216 goto out;
217 file->f_pos += offset;
218 unlock_kernel();
219 return(file->f_pos);
220 case 2:
221 goto out;
222 default:
223 goto out;
224 }
225
226out:
227 unlock_kernel();
228 return -EINVAL;
229}
230
231static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
232{
233 struct inode *inode = dentry->d_inode;
234 struct proc_dir_entry *de = PDE(inode);
235 int error;
236
237 error = inode_change_ok(inode, iattr);
238 if (error)
239 goto out;
240
241 error = inode_setattr(inode, iattr);
242 if (error)
243 goto out;
244
245 de->uid = inode->i_uid;
246 de->gid = inode->i_gid;
247 de->mode = inode->i_mode;
248out:
249 return error;
250}
251
252static struct inode_operations proc_file_inode_operations = {
253 .setattr = proc_notify_change,
254};
255
256/*
257 * This function parses a name such as "tty/driver/serial", and
258 * returns the struct proc_dir_entry for "/proc/tty/driver", and
259 * returns "serial" in residual.
260 */
261static int xlate_proc_name(const char *name,
262 struct proc_dir_entry **ret, const char **residual)
263{
264 const char *cp = name, *next;
265 struct proc_dir_entry *de;
266 int len;
267
268 de = &proc_root;
269 while (1) {
270 next = strchr(cp, '/');
271 if (!next)
272 break;
273
274 len = next - cp;
275 for (de = de->subdir; de ; de = de->next) {
276 if (proc_match(len, cp, de))
277 break;
278 }
279 if (!de)
280 return -ENOENT;
281 cp += len + 1;
282 }
283 *residual = cp;
284 *ret = de;
285 return 0;
286}
287
288static DEFINE_IDR(proc_inum_idr);
289static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
290
291#define PROC_DYNAMIC_FIRST 0xF0000000UL
292
293/*
294 * Return an inode number between PROC_DYNAMIC_FIRST and
295 * 0xffffffff, or zero on failure.
296 */
297static unsigned int get_inode_number(void)
298{
299 int i, inum = 0;
300 int error;
301
302retry:
303 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0)
304 return 0;
305
306 spin_lock(&proc_inum_lock);
307 error = idr_get_new(&proc_inum_idr, NULL, &i);
308 spin_unlock(&proc_inum_lock);
309 if (error == -EAGAIN)
310 goto retry;
311 else if (error)
312 return 0;
313
314 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST;
315
316 /* inum will never be more than 0xf0ffffff, so no check
317 * for overflow.
318 */
319
320 return inum;
321}
322
323static void release_inode_number(unsigned int inum)
324{
325 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
326
327 spin_lock(&proc_inum_lock);
328 idr_remove(&proc_inum_idr, id);
329 spin_unlock(&proc_inum_lock);
330}
331
332static int proc_follow_link(struct dentry *dentry, struct nameidata *nd)
333{
334 nd_set_link(nd, PDE(dentry->d_inode)->data);
335 return 0;
336}
337
338static struct inode_operations proc_link_inode_operations = {
339 .readlink = generic_readlink,
340 .follow_link = proc_follow_link,
341};
342
343/*
344 * As some entries in /proc are volatile, we want to
345 * get rid of unused dentries. This could be made
346 * smarter: we could keep a "volatile" flag in the
347 * inode to indicate which ones to keep.
348 */
349static int proc_delete_dentry(struct dentry * dentry)
350{
351 return 1;
352}
353
354static struct dentry_operations proc_dentry_operations =
355{
356 .d_delete = proc_delete_dentry,
357};
358
359/*
360 * Don't create negative dentries here, return -ENOENT by hand
361 * instead.
362 */
363struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
364{
365 struct inode *inode = NULL;
366 struct proc_dir_entry * de;
367 int error = -ENOENT;
368
369 lock_kernel();
370 de = PDE(dir);
371 if (de) {
372 for (de = de->subdir; de ; de = de->next) {
373 if (de->namelen != dentry->d_name.len)
374 continue;
375 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
376 unsigned int ino = de->low_ino;
377
378 error = -EINVAL;
379 inode = proc_get_inode(dir->i_sb, ino, de);
380 break;
381 }
382 }
383 }
384 unlock_kernel();
385
386 if (inode) {
387 dentry->d_op = &proc_dentry_operations;
388 d_add(dentry, inode);
389 return NULL;
390 }
391 return ERR_PTR(error);
392}
393
394/*
395 * This returns non-zero if at EOF, so that the /proc
396 * root directory can use this and check if it should
397 * continue with the <pid> entries..
398 *
399 * Note that the VFS-layer doesn't care about the return
400 * value of the readdir() call, as long as it's non-negative
401 * for success..
402 */
403int proc_readdir(struct file * filp,
404 void * dirent, filldir_t filldir)
405{
406 struct proc_dir_entry * de;
407 unsigned int ino;
408 int i;
409 struct inode *inode = filp->f_dentry->d_inode;
410 int ret = 0;
411
412 lock_kernel();
413
414 ino = inode->i_ino;
415 de = PDE(inode);
416 if (!de) {
417 ret = -EINVAL;
418 goto out;
419 }
420 i = filp->f_pos;
421 switch (i) {
422 case 0:
423 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
424 goto out;
425 i++;
426 filp->f_pos++;
427 /* fall through */
428 case 1:
429 if (filldir(dirent, "..", 2, i,
430 parent_ino(filp->f_dentry),
431 DT_DIR) < 0)
432 goto out;
433 i++;
434 filp->f_pos++;
435 /* fall through */
436 default:
437 de = de->subdir;
438 i -= 2;
439 for (;;) {
440 if (!de) {
441 ret = 1;
442 goto out;
443 }
444 if (!i)
445 break;
446 de = de->next;
447 i--;
448 }
449
450 do {
451 if (filldir(dirent, de->name, de->namelen, filp->f_pos,
452 de->low_ino, de->mode >> 12) < 0)
453 goto out;
454 filp->f_pos++;
455 de = de->next;
456 } while (de);
457 }
458 ret = 1;
459out: unlock_kernel();
460 return ret;
461}
462
463/*
464 * These are the generic /proc directory operations. They
465 * use the in-memory "struct proc_dir_entry" tree to parse
466 * the /proc directory.
467 */
468static struct file_operations proc_dir_operations = {
469 .read = generic_read_dir,
470 .readdir = proc_readdir,
471};
472
473/*
474 * proc directories can do almost nothing..
475 */
476static struct inode_operations proc_dir_inode_operations = {
477 .lookup = proc_lookup,
478 .setattr = proc_notify_change,
479};
480
481static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
482{
483 unsigned int i;
484
485 i = get_inode_number();
486 if (i == 0)
487 return -EAGAIN;
488 dp->low_ino = i;
489 dp->next = dir->subdir;
490 dp->parent = dir;
491 dir->subdir = dp;
492 if (S_ISDIR(dp->mode)) {
493 if (dp->proc_iops == NULL) {
494 dp->proc_fops = &proc_dir_operations;
495 dp->proc_iops = &proc_dir_inode_operations;
496 }
497 dir->nlink++;
498 } else if (S_ISLNK(dp->mode)) {
499 if (dp->proc_iops == NULL)
500 dp->proc_iops = &proc_link_inode_operations;
501 } else if (S_ISREG(dp->mode)) {
502 if (dp->proc_fops == NULL)
503 dp->proc_fops = &proc_file_operations;
504 if (dp->proc_iops == NULL)
505 dp->proc_iops = &proc_file_inode_operations;
506 }
507 return 0;
508}
509
510/*
511 * Kill an inode that got unregistered..
512 */
513static void proc_kill_inodes(struct proc_dir_entry *de)
514{
515 struct list_head *p;
516 struct super_block *sb = proc_mnt->mnt_sb;
517
518 /*
519 * Actually it's a partial revoke().
520 */
521 file_list_lock();
522 list_for_each(p, &sb->s_files) {
523 struct file * filp = list_entry(p, struct file, f_list);
524 struct dentry * dentry = filp->f_dentry;
525 struct inode * inode;
526 struct file_operations *fops;
527
528 if (dentry->d_op != &proc_dentry_operations)
529 continue;
530 inode = dentry->d_inode;
531 if (PDE(inode) != de)
532 continue;
533 fops = filp->f_op;
534 filp->f_op = NULL;
535 fops_put(fops);
536 }
537 file_list_unlock();
538}
539
540static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent,
541 const char *name,
542 mode_t mode,
543 nlink_t nlink)
544{
545 struct proc_dir_entry *ent = NULL;
546 const char *fn = name;
547 int len;
548
549 /* make sure name is valid */
550 if (!name || !strlen(name)) goto out;
551
552 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0)
553 goto out;
554
555 /* At this point there must not be any '/' characters beyond *fn */
556 if (strchr(fn, '/'))
557 goto out;
558
559 len = strlen(fn);
560
561 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL);
562 if (!ent) goto out;
563
564 memset(ent, 0, sizeof(struct proc_dir_entry));
565 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1);
566 ent->name = ((char *) ent) + sizeof(*ent);
567 ent->namelen = len;
568 ent->mode = mode;
569 ent->nlink = nlink;
570 out:
571 return ent;
572}
573
574struct proc_dir_entry *proc_symlink(const char *name,
575 struct proc_dir_entry *parent, const char *dest)
576{
577 struct proc_dir_entry *ent;
578
579 ent = proc_create(&parent,name,
580 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1);
581
582 if (ent) {
583 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL);
584 if (ent->data) {
585 strcpy((char*)ent->data,dest);
586 if (proc_register(parent, ent) < 0) {
587 kfree(ent->data);
588 kfree(ent);
589 ent = NULL;
590 }
591 } else {
592 kfree(ent);
593 ent = NULL;
594 }
595 }
596 return ent;
597}
598
599struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
600 struct proc_dir_entry *parent)
601{
602 struct proc_dir_entry *ent;
603
604 ent = proc_create(&parent, name, S_IFDIR | mode, 2);
605 if (ent) {
606 ent->proc_fops = &proc_dir_operations;
607 ent->proc_iops = &proc_dir_inode_operations;
608
609 if (proc_register(parent, ent) < 0) {
610 kfree(ent);
611 ent = NULL;
612 }
613 }
614 return ent;
615}
616
617struct proc_dir_entry *proc_mkdir(const char *name,
618 struct proc_dir_entry *parent)
619{
620 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
621}
622
623struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
624 struct proc_dir_entry *parent)
625{
626 struct proc_dir_entry *ent;
627 nlink_t nlink;
628
629 if (S_ISDIR(mode)) {
630 if ((mode & S_IALLUGO) == 0)
631 mode |= S_IRUGO | S_IXUGO;
632 nlink = 2;
633 } else {
634 if ((mode & S_IFMT) == 0)
635 mode |= S_IFREG;
636 if ((mode & S_IALLUGO) == 0)
637 mode |= S_IRUGO;
638 nlink = 1;
639 }
640
641 ent = proc_create(&parent,name,mode,nlink);
642 if (ent) {
643 if (S_ISDIR(mode)) {
644 ent->proc_fops = &proc_dir_operations;
645 ent->proc_iops = &proc_dir_inode_operations;
646 }
647 if (proc_register(parent, ent) < 0) {
648 kfree(ent);
649 ent = NULL;
650 }
651 }
652 return ent;
653}
654
655void free_proc_entry(struct proc_dir_entry *de)
656{
657 unsigned int ino = de->low_ino;
658
659 if (ino < PROC_DYNAMIC_FIRST)
660 return;
661
662 release_inode_number(ino);
663
664 if (S_ISLNK(de->mode) && de->data)
665 kfree(de->data);
666 kfree(de);
667}
668
669/*
670 * Remove a /proc entry and free it if it's not currently in use.
671 * If it is in use, we set the 'deleted' flag.
672 */
673void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
674{
675 struct proc_dir_entry **p;
676 struct proc_dir_entry *de;
677 const char *fn = name;
678 int len;
679
680 if (!parent && xlate_proc_name(name, &parent, &fn) != 0)
681 goto out;
682 len = strlen(fn);
683 for (p = &parent->subdir; *p; p=&(*p)->next ) {
684 if (!proc_match(len, fn, *p))
685 continue;
686 de = *p;
687 *p = de->next;
688 de->next = NULL;
689 if (S_ISDIR(de->mode))
690 parent->nlink--;
691 proc_kill_inodes(de);
692 de->nlink = 0;
693 WARN_ON(de->subdir);
694 if (!atomic_read(&de->count))
695 free_proc_entry(de);
696 else {
697 de->deleted = 1;
698 printk("remove_proc_entry: %s/%s busy, count=%d\n",
699 parent->name, de->name, atomic_read(&de->count));
700 }
701 break;
702 }
703out:
704 return;
705}