blob: 6bfa78c66c2546722dc2a7d309715fbbd1dbe80f [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/config.h>
80#include <linux/cpu.h>
81#include <linux/types.h>
82#include <linux/kernel.h>
83#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080084#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/notifier.h>
95#include <linux/skbuff.h>
96#include <net/sock.h>
97#include <linux/rtnetlink.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/stat.h>
101#include <linux/if_bridge.h>
102#include <linux/divert.h>
103#include <net/dst.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
106#include <linux/highmem.h>
107#include <linux/init.h>
108#include <linux/kmod.h>
109#include <linux/module.h>
110#include <linux/kallsyms.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Adrian Bunkd86b5e02006-01-21 00:46:55 +0100114#include <linux/wireless.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120/*
121 * The list of packet types we will receive (as opposed to discard)
122 * and the routines to invoke.
123 *
124 * Why 16. Because with 16 the only overlap we get on a hash of the
125 * low nibble of the protocol value is RARP/SNAP/X.25.
126 *
127 * NOTE: That is no longer true with the addition of VLAN tags. Not
128 * sure which should go first, but I bet it won't make much
129 * difference if we are running VLANs. The good news is that
130 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700131 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 * --BLG
133 *
134 * 0800 IP
135 * 8100 802.1Q VLAN
136 * 0001 802.3
137 * 0002 AX.25
138 * 0004 802.2
139 * 8035 RARP
140 * 0005 SNAP
141 * 0805 X.25
142 * 0806 ARP
143 * 8137 IPX
144 * 0009 Localtalk
145 * 86DD IPv6
146 */
147
148static DEFINE_SPINLOCK(ptype_lock);
149static struct list_head ptype_base[16]; /* 16 way hashed list */
150static struct list_head ptype_all; /* Taps */
151
Chris Leechdb217332006-06-17 21:24:58 -0700152#ifdef CONFIG_NET_DMA
153static struct dma_client *net_dma_client;
154static unsigned int net_dma_count;
155static spinlock_t net_dma_event_lock;
156#endif
157
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158/*
Stephen Hemminger3041a062006-05-26 13:25:24 -0700159 * The @dev_base list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 * semaphore.
161 *
162 * Pure readers hold dev_base_lock for reading.
163 *
164 * Writers must hold the rtnl semaphore while they loop through the
165 * dev_base list, and hold dev_base_lock for writing when they do the
166 * actual updates. This allows pure readers to access the list even
167 * while a writer is preparing to update it.
168 *
169 * To put it another way, dev_base_lock is held for writing only to
170 * protect against pure readers; the rtnl semaphore provides the
171 * protection against other writers.
172 *
173 * See, for example usages, register_netdevice() and
174 * unregister_netdevice(), which must be called with the rtnl
175 * semaphore held.
176 */
177struct net_device *dev_base;
178static struct net_device **dev_tail = &dev_base;
179DEFINE_RWLOCK(dev_base_lock);
180
181EXPORT_SYMBOL(dev_base);
182EXPORT_SYMBOL(dev_base_lock);
183
184#define NETDEV_HASHBITS 8
185static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
186static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
187
188static inline struct hlist_head *dev_name_hash(const char *name)
189{
190 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
191 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
192}
193
194static inline struct hlist_head *dev_index_hash(int ifindex)
195{
196 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
197}
198
199/*
200 * Our notifier list
201 */
202
Alan Sternf07d5b92006-05-09 15:23:03 -0700203static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204
205/*
206 * Device drivers call our routines to queue packets here. We empty the
207 * queue in the local softnet handler.
208 */
Stephen Hemminger31aa02c2005-06-23 20:12:48 -0700209DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210
211#ifdef CONFIG_SYSFS
212extern int netdev_sysfs_init(void);
213extern int netdev_register_sysfs(struct net_device *);
214extern void netdev_unregister_sysfs(struct net_device *);
215#else
216#define netdev_sysfs_init() (0)
217#define netdev_register_sysfs(dev) (0)
218#define netdev_unregister_sysfs(dev) do { } while(0)
219#endif
220
221
222/*******************************************************************************
223
224 Protocol management and registration routines
225
226*******************************************************************************/
227
228/*
229 * For efficiency
230 */
231
232int netdev_nit;
233
234/*
235 * Add a protocol ID to the list. Now that the input handler is
236 * smarter we can dispense with all the messy stuff that used to be
237 * here.
238 *
239 * BEWARE!!! Protocol handlers, mangling input packets,
240 * MUST BE last in hash buckets and checking protocol handlers
241 * MUST start from promiscuous ptype_all chain in net_bh.
242 * It is true now, do not change it.
243 * Explanation follows: if protocol handler, mangling packet, will
244 * be the first on list, it is not able to sense, that packet
245 * is cloned and should be copied-on-write, so that it will
246 * change it and subsequent readers will get broken packet.
247 * --ANK (980803)
248 */
249
250/**
251 * dev_add_pack - add packet handler
252 * @pt: packet type declaration
253 *
254 * Add a protocol handler to the networking stack. The passed &packet_type
255 * is linked into kernel lists and may not be freed until it has been
256 * removed from the kernel lists.
257 *
258 * This call does not sleep therefore it can not
259 * guarantee all CPU's that are in middle of receiving packets
260 * will see the new packet type (until the next received packet).
261 */
262
263void dev_add_pack(struct packet_type *pt)
264{
265 int hash;
266
267 spin_lock_bh(&ptype_lock);
268 if (pt->type == htons(ETH_P_ALL)) {
269 netdev_nit++;
270 list_add_rcu(&pt->list, &ptype_all);
271 } else {
272 hash = ntohs(pt->type) & 15;
273 list_add_rcu(&pt->list, &ptype_base[hash]);
274 }
275 spin_unlock_bh(&ptype_lock);
276}
277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278/**
279 * __dev_remove_pack - remove packet handler
280 * @pt: packet type declaration
281 *
282 * Remove a protocol handler that was previously added to the kernel
283 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
284 * from the kernel lists and can be freed or reused once this function
285 * returns.
286 *
287 * The packet type might still be in use by receivers
288 * and must not be freed until after all the CPU's have gone
289 * through a quiescent state.
290 */
291void __dev_remove_pack(struct packet_type *pt)
292{
293 struct list_head *head;
294 struct packet_type *pt1;
295
296 spin_lock_bh(&ptype_lock);
297
298 if (pt->type == htons(ETH_P_ALL)) {
299 netdev_nit--;
300 head = &ptype_all;
301 } else
302 head = &ptype_base[ntohs(pt->type) & 15];
303
304 list_for_each_entry(pt1, head, list) {
305 if (pt == pt1) {
306 list_del_rcu(&pt->list);
307 goto out;
308 }
309 }
310
311 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
312out:
313 spin_unlock_bh(&ptype_lock);
314}
315/**
316 * dev_remove_pack - remove packet handler
317 * @pt: packet type declaration
318 *
319 * Remove a protocol handler that was previously added to the kernel
320 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
321 * from the kernel lists and can be freed or reused once this function
322 * returns.
323 *
324 * This call sleeps to guarantee that no CPU is looking at the packet
325 * type after return.
326 */
327void dev_remove_pack(struct packet_type *pt)
328{
329 __dev_remove_pack(pt);
330
331 synchronize_net();
332}
333
334/******************************************************************************
335
336 Device Boot-time Settings Routines
337
338*******************************************************************************/
339
340/* Boot time configuration table */
341static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
342
343/**
344 * netdev_boot_setup_add - add new setup entry
345 * @name: name of the device
346 * @map: configured settings for the device
347 *
348 * Adds new setup entry to the dev_boot_setup list. The function
349 * returns 0 on error and 1 on success. This is a generic routine to
350 * all netdevices.
351 */
352static int netdev_boot_setup_add(char *name, struct ifmap *map)
353{
354 struct netdev_boot_setup *s;
355 int i;
356
357 s = dev_boot_setup;
358 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
359 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
360 memset(s[i].name, 0, sizeof(s[i].name));
361 strcpy(s[i].name, name);
362 memcpy(&s[i].map, map, sizeof(s[i].map));
363 break;
364 }
365 }
366
367 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
368}
369
370/**
371 * netdev_boot_setup_check - check boot time settings
372 * @dev: the netdevice
373 *
374 * Check boot time settings for the device.
375 * The found settings are set for the device to be used
376 * later in the device probing.
377 * Returns 0 if no settings found, 1 if they are.
378 */
379int netdev_boot_setup_check(struct net_device *dev)
380{
381 struct netdev_boot_setup *s = dev_boot_setup;
382 int i;
383
384 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
385 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
386 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
387 dev->irq = s[i].map.irq;
388 dev->base_addr = s[i].map.base_addr;
389 dev->mem_start = s[i].map.mem_start;
390 dev->mem_end = s[i].map.mem_end;
391 return 1;
392 }
393 }
394 return 0;
395}
396
397
398/**
399 * netdev_boot_base - get address from boot time settings
400 * @prefix: prefix for network device
401 * @unit: id for network device
402 *
403 * Check boot time settings for the base address of device.
404 * The found settings are set for the device to be used
405 * later in the device probing.
406 * Returns 0 if no settings found.
407 */
408unsigned long netdev_boot_base(const char *prefix, int unit)
409{
410 const struct netdev_boot_setup *s = dev_boot_setup;
411 char name[IFNAMSIZ];
412 int i;
413
414 sprintf(name, "%s%d", prefix, unit);
415
416 /*
417 * If device already registered then return base of 1
418 * to indicate not to probe for this interface
419 */
420 if (__dev_get_by_name(name))
421 return 1;
422
423 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
424 if (!strcmp(name, s[i].name))
425 return s[i].map.base_addr;
426 return 0;
427}
428
429/*
430 * Saves at boot time configured settings for any netdevice.
431 */
432int __init netdev_boot_setup(char *str)
433{
434 int ints[5];
435 struct ifmap map;
436
437 str = get_options(str, ARRAY_SIZE(ints), ints);
438 if (!str || !*str)
439 return 0;
440
441 /* Save settings */
442 memset(&map, 0, sizeof(map));
443 if (ints[0] > 0)
444 map.irq = ints[1];
445 if (ints[0] > 1)
446 map.base_addr = ints[2];
447 if (ints[0] > 2)
448 map.mem_start = ints[3];
449 if (ints[0] > 3)
450 map.mem_end = ints[4];
451
452 /* Add new entry to the list */
453 return netdev_boot_setup_add(str, &map);
454}
455
456__setup("netdev=", netdev_boot_setup);
457
458/*******************************************************************************
459
460 Device Interface Subroutines
461
462*******************************************************************************/
463
464/**
465 * __dev_get_by_name - find a device by its name
466 * @name: name to find
467 *
468 * Find an interface by name. Must be called under RTNL semaphore
469 * or @dev_base_lock. If the name is found a pointer to the device
470 * is returned. If the name is not found then %NULL is returned. The
471 * reference counters are not incremented so the caller must be
472 * careful with locks.
473 */
474
475struct net_device *__dev_get_by_name(const char *name)
476{
477 struct hlist_node *p;
478
479 hlist_for_each(p, dev_name_hash(name)) {
480 struct net_device *dev
481 = hlist_entry(p, struct net_device, name_hlist);
482 if (!strncmp(dev->name, name, IFNAMSIZ))
483 return dev;
484 }
485 return NULL;
486}
487
488/**
489 * dev_get_by_name - find a device by its name
490 * @name: name to find
491 *
492 * Find an interface by name. This can be called from any
493 * context and does its own locking. The returned handle has
494 * the usage count incremented and the caller must use dev_put() to
495 * release it when it is no longer needed. %NULL is returned if no
496 * matching device is found.
497 */
498
499struct net_device *dev_get_by_name(const char *name)
500{
501 struct net_device *dev;
502
503 read_lock(&dev_base_lock);
504 dev = __dev_get_by_name(name);
505 if (dev)
506 dev_hold(dev);
507 read_unlock(&dev_base_lock);
508 return dev;
509}
510
511/**
512 * __dev_get_by_index - find a device by its ifindex
513 * @ifindex: index of device
514 *
515 * Search for an interface by index. Returns %NULL if the device
516 * is not found or a pointer to the device. The device has not
517 * had its reference counter increased so the caller must be careful
518 * about locking. The caller must hold either the RTNL semaphore
519 * or @dev_base_lock.
520 */
521
522struct net_device *__dev_get_by_index(int ifindex)
523{
524 struct hlist_node *p;
525
526 hlist_for_each(p, dev_index_hash(ifindex)) {
527 struct net_device *dev
528 = hlist_entry(p, struct net_device, index_hlist);
529 if (dev->ifindex == ifindex)
530 return dev;
531 }
532 return NULL;
533}
534
535
536/**
537 * dev_get_by_index - find a device by its ifindex
538 * @ifindex: index of device
539 *
540 * Search for an interface by index. Returns NULL if the device
541 * is not found or a pointer to the device. The device returned has
542 * had a reference added and the pointer is safe until the user calls
543 * dev_put to indicate they have finished with it.
544 */
545
546struct net_device *dev_get_by_index(int ifindex)
547{
548 struct net_device *dev;
549
550 read_lock(&dev_base_lock);
551 dev = __dev_get_by_index(ifindex);
552 if (dev)
553 dev_hold(dev);
554 read_unlock(&dev_base_lock);
555 return dev;
556}
557
558/**
559 * dev_getbyhwaddr - find a device by its hardware address
560 * @type: media type of device
561 * @ha: hardware address
562 *
563 * Search for an interface by MAC address. Returns NULL if the device
564 * is not found or a pointer to the device. The caller must hold the
565 * rtnl semaphore. The returned device has not had its ref count increased
566 * and the caller must therefore be careful about locking
567 *
568 * BUGS:
569 * If the API was consistent this would be __dev_get_by_hwaddr
570 */
571
572struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
573{
574 struct net_device *dev;
575
576 ASSERT_RTNL();
577
578 for (dev = dev_base; dev; dev = dev->next)
579 if (dev->type == type &&
580 !memcmp(dev->dev_addr, ha, dev->addr_len))
581 break;
582 return dev;
583}
584
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300585EXPORT_SYMBOL(dev_getbyhwaddr);
586
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587struct net_device *dev_getfirstbyhwtype(unsigned short type)
588{
589 struct net_device *dev;
590
591 rtnl_lock();
592 for (dev = dev_base; dev; dev = dev->next) {
593 if (dev->type == type) {
594 dev_hold(dev);
595 break;
596 }
597 }
598 rtnl_unlock();
599 return dev;
600}
601
602EXPORT_SYMBOL(dev_getfirstbyhwtype);
603
604/**
605 * dev_get_by_flags - find any device with given flags
606 * @if_flags: IFF_* values
607 * @mask: bitmask of bits in if_flags to check
608 *
609 * Search for any interface with the given flags. Returns NULL if a device
610 * is not found or a pointer to the device. The device returned has
611 * had a reference added and the pointer is safe until the user calls
612 * dev_put to indicate they have finished with it.
613 */
614
615struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
616{
617 struct net_device *dev;
618
619 read_lock(&dev_base_lock);
620 for (dev = dev_base; dev != NULL; dev = dev->next) {
621 if (((dev->flags ^ if_flags) & mask) == 0) {
622 dev_hold(dev);
623 break;
624 }
625 }
626 read_unlock(&dev_base_lock);
627 return dev;
628}
629
630/**
631 * dev_valid_name - check if name is okay for network device
632 * @name: name string
633 *
634 * Network device names need to be valid file names to
635 * to allow sysfs to work
636 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800637int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638{
639 return !(*name == '\0'
640 || !strcmp(name, ".")
641 || !strcmp(name, "..")
642 || strchr(name, '/'));
643}
644
645/**
646 * dev_alloc_name - allocate a name for a device
647 * @dev: device
648 * @name: name format string
649 *
650 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700651 * id. It scans list of devices to build up a free map, then chooses
652 * the first empty slot. The caller must hold the dev_base or rtnl lock
653 * while allocating the name and adding the device in order to avoid
654 * duplicates.
655 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
656 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 */
658
659int dev_alloc_name(struct net_device *dev, const char *name)
660{
661 int i = 0;
662 char buf[IFNAMSIZ];
663 const char *p;
664 const int max_netdevices = 8*PAGE_SIZE;
665 long *inuse;
666 struct net_device *d;
667
668 p = strnchr(name, IFNAMSIZ-1, '%');
669 if (p) {
670 /*
671 * Verify the string as this thing may have come from
672 * the user. There must be either one "%d" and no other "%"
673 * characters.
674 */
675 if (p[1] != 'd' || strchr(p + 2, '%'))
676 return -EINVAL;
677
678 /* Use one page as a bit array of possible slots */
679 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
680 if (!inuse)
681 return -ENOMEM;
682
683 for (d = dev_base; d; d = d->next) {
684 if (!sscanf(d->name, name, &i))
685 continue;
686 if (i < 0 || i >= max_netdevices)
687 continue;
688
689 /* avoid cases where sscanf is not exact inverse of printf */
690 snprintf(buf, sizeof(buf), name, i);
691 if (!strncmp(buf, d->name, IFNAMSIZ))
692 set_bit(i, inuse);
693 }
694
695 i = find_first_zero_bit(inuse, max_netdevices);
696 free_page((unsigned long) inuse);
697 }
698
699 snprintf(buf, sizeof(buf), name, i);
700 if (!__dev_get_by_name(buf)) {
701 strlcpy(dev->name, buf, IFNAMSIZ);
702 return i;
703 }
704
705 /* It is possible to run out of possible slots
706 * when the name is long and there isn't enough space left
707 * for the digits, or if all bits are used.
708 */
709 return -ENFILE;
710}
711
712
713/**
714 * dev_change_name - change name of a device
715 * @dev: device
716 * @newname: name (or format string) must be at least IFNAMSIZ
717 *
718 * Change name of a device, can pass format strings "eth%d".
719 * for wildcarding.
720 */
721int dev_change_name(struct net_device *dev, char *newname)
722{
723 int err = 0;
724
725 ASSERT_RTNL();
726
727 if (dev->flags & IFF_UP)
728 return -EBUSY;
729
730 if (!dev_valid_name(newname))
731 return -EINVAL;
732
733 if (strchr(newname, '%')) {
734 err = dev_alloc_name(dev, newname);
735 if (err < 0)
736 return err;
737 strcpy(newname, dev->name);
738 }
739 else if (__dev_get_by_name(newname))
740 return -EEXIST;
741 else
742 strlcpy(dev->name, newname, IFNAMSIZ);
743
744 err = class_device_rename(&dev->class_dev, dev->name);
745 if (!err) {
746 hlist_del(&dev->name_hlist);
747 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
Alan Sternf07d5b92006-05-09 15:23:03 -0700748 raw_notifier_call_chain(&netdev_chain,
Alan Sterne041c682006-03-27 01:16:30 -0800749 NETDEV_CHANGENAME, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 }
751
752 return err;
753}
754
755/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700756 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700757 * @dev: device to cause notification
758 *
759 * Called to indicate a device has changed features.
760 */
761void netdev_features_change(struct net_device *dev)
762{
Alan Sternf07d5b92006-05-09 15:23:03 -0700763 raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700764}
765EXPORT_SYMBOL(netdev_features_change);
766
767/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 * netdev_state_change - device changes state
769 * @dev: device to cause notification
770 *
771 * Called to indicate a device has changed state. This function calls
772 * the notifier chains for netdev_chain and sends a NEWLINK message
773 * to the routing socket.
774 */
775void netdev_state_change(struct net_device *dev)
776{
777 if (dev->flags & IFF_UP) {
Alan Sternf07d5b92006-05-09 15:23:03 -0700778 raw_notifier_call_chain(&netdev_chain,
Alan Sterne041c682006-03-27 01:16:30 -0800779 NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
781 }
782}
783
784/**
785 * dev_load - load a network module
786 * @name: name of interface
787 *
788 * If a network interface is not present and the process has suitable
789 * privileges this function loads the module. If module loading is not
790 * available in this kernel then it becomes a nop.
791 */
792
793void dev_load(const char *name)
794{
795 struct net_device *dev;
796
797 read_lock(&dev_base_lock);
798 dev = __dev_get_by_name(name);
799 read_unlock(&dev_base_lock);
800
801 if (!dev && capable(CAP_SYS_MODULE))
802 request_module("%s", name);
803}
804
805static int default_rebuild_header(struct sk_buff *skb)
806{
807 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
808 skb->dev ? skb->dev->name : "NULL!!!");
809 kfree_skb(skb);
810 return 1;
811}
812
813
814/**
815 * dev_open - prepare an interface for use.
816 * @dev: device to open
817 *
818 * Takes a device from down to up state. The device's private open
819 * function is invoked and then the multicast lists are loaded. Finally
820 * the device is moved into the up state and a %NETDEV_UP message is
821 * sent to the netdev notifier chain.
822 *
823 * Calling this function on an active interface is a nop. On a failure
824 * a negative errno code is returned.
825 */
826int dev_open(struct net_device *dev)
827{
828 int ret = 0;
829
830 /*
831 * Is it already up?
832 */
833
834 if (dev->flags & IFF_UP)
835 return 0;
836
837 /*
838 * Is it even present?
839 */
840 if (!netif_device_present(dev))
841 return -ENODEV;
842
843 /*
844 * Call device private open method
845 */
846 set_bit(__LINK_STATE_START, &dev->state);
847 if (dev->open) {
848 ret = dev->open(dev);
849 if (ret)
850 clear_bit(__LINK_STATE_START, &dev->state);
851 }
852
853 /*
854 * If it went open OK then:
855 */
856
857 if (!ret) {
858 /*
859 * Set the flags.
860 */
861 dev->flags |= IFF_UP;
862
863 /*
864 * Initialize multicasting status
865 */
866 dev_mc_upload(dev);
867
868 /*
869 * Wakeup transmit queue engine
870 */
871 dev_activate(dev);
872
873 /*
874 * ... and announce new interface.
875 */
Alan Sternf07d5b92006-05-09 15:23:03 -0700876 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 }
878 return ret;
879}
880
881/**
882 * dev_close - shutdown an interface.
883 * @dev: device to shutdown
884 *
885 * This function moves an active device into down state. A
886 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
887 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
888 * chain.
889 */
890int dev_close(struct net_device *dev)
891{
892 if (!(dev->flags & IFF_UP))
893 return 0;
894
895 /*
896 * Tell people we are going down, so that they can
897 * prepare to death, when device is still operating.
898 */
Alan Sternf07d5b92006-05-09 15:23:03 -0700899 raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900
901 dev_deactivate(dev);
902
903 clear_bit(__LINK_STATE_START, &dev->state);
904
905 /* Synchronize to scheduled poll. We cannot touch poll list,
906 * it can be even on different cpu. So just clear netif_running(),
907 * and wait when poll really will happen. Actually, the best place
908 * for this is inside dev->stop() after device stopped its irq
909 * engine, but this requires more changes in devices. */
910
911 smp_mb__after_clear_bit(); /* Commit netif_running(). */
912 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
913 /* No hurry. */
David S. Miller6192b542005-07-28 12:12:58 -0700914 msleep(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 }
916
917 /*
918 * Call the device specific close. This cannot fail.
919 * Only if device is UP
920 *
921 * We allow it to be called even after a DETACH hot-plug
922 * event.
923 */
924 if (dev->stop)
925 dev->stop(dev);
926
927 /*
928 * Device is now down.
929 */
930
931 dev->flags &= ~IFF_UP;
932
933 /*
934 * Tell people we are down
935 */
Alan Sternf07d5b92006-05-09 15:23:03 -0700936 raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938 return 0;
939}
940
941
942/*
943 * Device change register/unregister. These are not inline or static
944 * as we export them to the world.
945 */
946
947/**
948 * register_netdevice_notifier - register a network notifier block
949 * @nb: notifier
950 *
951 * Register a notifier to be called when network device events occur.
952 * The notifier passed is linked into the kernel structures and must
953 * not be reused until it has been unregistered. A negative errno code
954 * is returned on a failure.
955 *
956 * When registered all registration and up events are replayed
957 * to the new notifier to allow device to have a race free
958 * view of the network device list.
959 */
960
961int register_netdevice_notifier(struct notifier_block *nb)
962{
963 struct net_device *dev;
964 int err;
965
966 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -0700967 err = raw_notifier_chain_register(&netdev_chain, nb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 if (!err) {
969 for (dev = dev_base; dev; dev = dev->next) {
970 nb->notifier_call(nb, NETDEV_REGISTER, dev);
971
972 if (dev->flags & IFF_UP)
973 nb->notifier_call(nb, NETDEV_UP, dev);
974 }
975 }
976 rtnl_unlock();
977 return err;
978}
979
980/**
981 * unregister_netdevice_notifier - unregister a network notifier block
982 * @nb: notifier
983 *
984 * Unregister a notifier previously registered by
985 * register_netdevice_notifier(). The notifier is unlinked into the
986 * kernel structures and may then be reused. A negative errno code
987 * is returned on a failure.
988 */
989
990int unregister_netdevice_notifier(struct notifier_block *nb)
991{
Herbert Xu9f514952006-03-25 01:24:25 -0800992 int err;
993
994 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -0700995 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -0800996 rtnl_unlock();
997 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998}
999
1000/**
1001 * call_netdevice_notifiers - call all network notifier blocks
1002 * @val: value passed unmodified to notifier function
1003 * @v: pointer passed unmodified to notifier function
1004 *
1005 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001006 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 */
1008
1009int call_netdevice_notifiers(unsigned long val, void *v)
1010{
Alan Sternf07d5b92006-05-09 15:23:03 -07001011 return raw_notifier_call_chain(&netdev_chain, val, v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012}
1013
1014/* When > 0 there are consumers of rx skb time stamps */
1015static atomic_t netstamp_needed = ATOMIC_INIT(0);
1016
1017void net_enable_timestamp(void)
1018{
1019 atomic_inc(&netstamp_needed);
1020}
1021
1022void net_disable_timestamp(void)
1023{
1024 atomic_dec(&netstamp_needed);
1025}
1026
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001027void __net_timestamp(struct sk_buff *skb)
1028{
1029 struct timeval tv;
1030
1031 do_gettimeofday(&tv);
1032 skb_set_timestamp(skb, &tv);
1033}
1034EXPORT_SYMBOL(__net_timestamp);
1035
1036static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037{
1038 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001039 __net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 else {
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001041 skb->tstamp.off_sec = 0;
1042 skb->tstamp.off_usec = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 }
1044}
1045
1046/*
1047 * Support routine. Sends outgoing frames to any network
1048 * taps currently in use.
1049 */
1050
1051void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1052{
1053 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001054
1055 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
1057 rcu_read_lock();
1058 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1059 /* Never send packets back to the socket
1060 * they originated from - MvS (miquels@drinkel.ow.org)
1061 */
1062 if ((ptype->dev == dev || !ptype->dev) &&
1063 (ptype->af_packet_priv == NULL ||
1064 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1065 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1066 if (!skb2)
1067 break;
1068
1069 /* skb->nh should be correctly
1070 set by sender, so that the second statement is
1071 just protection against buggy protocols.
1072 */
1073 skb2->mac.raw = skb2->data;
1074
1075 if (skb2->nh.raw < skb2->data ||
1076 skb2->nh.raw > skb2->tail) {
1077 if (net_ratelimit())
1078 printk(KERN_CRIT "protocol %04x is "
1079 "buggy, dev %s\n",
1080 skb2->protocol, dev->name);
1081 skb2->nh.raw = skb2->data;
1082 }
1083
1084 skb2->h.raw = skb2->nh.raw;
1085 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001086 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 }
1088 }
1089 rcu_read_unlock();
1090}
1091
Denis Vlasenko56079432006-03-29 15:57:29 -08001092
1093void __netif_schedule(struct net_device *dev)
1094{
1095 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1096 unsigned long flags;
1097 struct softnet_data *sd;
1098
1099 local_irq_save(flags);
1100 sd = &__get_cpu_var(softnet_data);
1101 dev->next_sched = sd->output_queue;
1102 sd->output_queue = dev;
1103 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1104 local_irq_restore(flags);
1105 }
1106}
1107EXPORT_SYMBOL(__netif_schedule);
1108
1109void __netif_rx_schedule(struct net_device *dev)
1110{
1111 unsigned long flags;
1112
1113 local_irq_save(flags);
1114 dev_hold(dev);
1115 list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1116 if (dev->quota < 0)
1117 dev->quota += dev->weight;
1118 else
1119 dev->quota = dev->weight;
1120 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1121 local_irq_restore(flags);
1122}
1123EXPORT_SYMBOL(__netif_rx_schedule);
1124
1125void dev_kfree_skb_any(struct sk_buff *skb)
1126{
1127 if (in_irq() || irqs_disabled())
1128 dev_kfree_skb_irq(skb);
1129 else
1130 dev_kfree_skb(skb);
1131}
1132EXPORT_SYMBOL(dev_kfree_skb_any);
1133
1134
1135/* Hot-plugging. */
1136void netif_device_detach(struct net_device *dev)
1137{
1138 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1139 netif_running(dev)) {
1140 netif_stop_queue(dev);
1141 }
1142}
1143EXPORT_SYMBOL(netif_device_detach);
1144
1145void netif_device_attach(struct net_device *dev)
1146{
1147 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1148 netif_running(dev)) {
1149 netif_wake_queue(dev);
1150 __netdev_watchdog_up(dev);
1151 }
1152}
1153EXPORT_SYMBOL(netif_device_attach);
1154
1155
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156/*
1157 * Invalidate hardware checksum when packet is to be mangled, and
1158 * complete checksum manually on outgoing path.
1159 */
1160int skb_checksum_help(struct sk_buff *skb, int inward)
1161{
1162 unsigned int csum;
1163 int ret = 0, offset = skb->h.raw - skb->data;
1164
1165 if (inward) {
1166 skb->ip_summed = CHECKSUM_NONE;
1167 goto out;
1168 }
1169
1170 if (skb_cloned(skb)) {
1171 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1172 if (ret)
1173 goto out;
1174 }
1175
Kris Katterjohn09a62662006-01-08 22:24:28 -08001176 BUG_ON(offset > (int)skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1178
1179 offset = skb->tail - skb->h.raw;
Kris Katterjohn09a62662006-01-08 22:24:28 -08001180 BUG_ON(offset <= 0);
1181 BUG_ON(skb->csum + 2 > offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182
1183 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1184 skb->ip_summed = CHECKSUM_NONE;
1185out:
1186 return ret;
1187}
1188
Herbert Xufb286bb2005-11-10 13:01:24 -08001189/* Take action when hardware reception checksum errors are detected. */
1190#ifdef CONFIG_BUG
1191void netdev_rx_csum_fault(struct net_device *dev)
1192{
1193 if (net_ratelimit()) {
Stephen Hemminger246a4212005-12-08 15:21:39 -08001194 printk(KERN_ERR "%s: hw csum failure.\n",
1195 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001196 dump_stack();
1197 }
1198}
1199EXPORT_SYMBOL(netdev_rx_csum_fault);
1200#endif
1201
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202#ifdef CONFIG_HIGHMEM
1203/* Actually, we should eliminate this check as soon as we know, that:
1204 * 1. IOMMU is present and allows to map all the memory.
1205 * 2. No high memory really exists on this machine.
1206 */
1207
1208static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1209{
1210 int i;
1211
1212 if (dev->features & NETIF_F_HIGHDMA)
1213 return 0;
1214
1215 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1216 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1217 return 1;
1218
1219 return 0;
1220}
1221#else
1222#define illegal_highdma(dev, skb) (0)
1223#endif
1224
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225/* Keep head the same: replace data */
Al Virodd0fc662005-10-07 07:46:04 +01001226int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227{
1228 unsigned int size;
1229 u8 *data;
1230 long offset;
1231 struct skb_shared_info *ninfo;
1232 int headerlen = skb->data - skb->head;
1233 int expand = (skb->tail + skb->data_len) - skb->end;
1234
1235 if (skb_shared(skb))
1236 BUG();
1237
1238 if (expand <= 0)
1239 expand = 0;
1240
1241 size = skb->end - skb->head + expand;
1242 size = SKB_DATA_ALIGN(size);
1243 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1244 if (!data)
1245 return -ENOMEM;
1246
1247 /* Copy entire thing */
1248 if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1249 BUG();
1250
1251 /* Set up shinfo */
1252 ninfo = (struct skb_shared_info*)(data + size);
1253 atomic_set(&ninfo->dataref, 1);
1254 ninfo->tso_size = skb_shinfo(skb)->tso_size;
1255 ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1256 ninfo->nr_frags = 0;
1257 ninfo->frag_list = NULL;
1258
1259 /* Offset between the two in bytes */
1260 offset = data - skb->head;
1261
1262 /* Free old data. */
1263 skb_release_data(skb);
1264
1265 skb->head = data;
1266 skb->end = data + size;
1267
1268 /* Set up new pointers */
1269 skb->h.raw += offset;
1270 skb->nh.raw += offset;
1271 skb->mac.raw += offset;
1272 skb->tail += offset;
1273 skb->data += offset;
1274
1275 /* We are no longer a clone, even if we were. */
1276 skb->cloned = 0;
1277
1278 skb->tail += skb->data_len;
1279 skb->data_len = 0;
1280 return 0;
1281}
1282
1283#define HARD_TX_LOCK(dev, cpu) { \
1284 if ((dev->features & NETIF_F_LLTX) == 0) { \
1285 spin_lock(&dev->xmit_lock); \
1286 dev->xmit_lock_owner = cpu; \
1287 } \
1288}
1289
1290#define HARD_TX_UNLOCK(dev) { \
1291 if ((dev->features & NETIF_F_LLTX) == 0) { \
1292 dev->xmit_lock_owner = -1; \
1293 spin_unlock(&dev->xmit_lock); \
1294 } \
1295}
1296
1297/**
1298 * dev_queue_xmit - transmit a buffer
1299 * @skb: buffer to transmit
1300 *
1301 * Queue a buffer for transmission to a network device. The caller must
1302 * have set the device and priority and built the buffer before calling
1303 * this function. The function can be called from an interrupt.
1304 *
1305 * A negative errno code is returned on a failure. A success does not
1306 * guarantee the frame will be transmitted as it may be dropped due
1307 * to congestion or traffic shaping.
Ben Greearaf191362005-04-24 20:12:36 -07001308 *
1309 * -----------------------------------------------------------------------------------
1310 * I notice this method can also return errors from the queue disciplines,
1311 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1312 * be positive.
1313 *
1314 * Regardless of the return value, the skb is consumed, so it is currently
1315 * difficult to retry a send to this method. (You can bump the ref count
1316 * before sending to hold a reference for retry if you are careful.)
1317 *
1318 * When calling this method, interrupts MUST be enabled. This is because
1319 * the BH enable code must have IRQs enabled so that it will not deadlock.
1320 * --BLG
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 */
1322
1323int dev_queue_xmit(struct sk_buff *skb)
1324{
1325 struct net_device *dev = skb->dev;
1326 struct Qdisc *q;
1327 int rc = -ENOMEM;
1328
1329 if (skb_shinfo(skb)->frag_list &&
1330 !(dev->features & NETIF_F_FRAGLIST) &&
1331 __skb_linearize(skb, GFP_ATOMIC))
1332 goto out_kfree_skb;
1333
1334 /* Fragmented skb is linearized if device does not support SG,
1335 * or if at least one of fragments is in highmem and device
1336 * does not support DMA from it.
1337 */
1338 if (skb_shinfo(skb)->nr_frags &&
1339 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1340 __skb_linearize(skb, GFP_ATOMIC))
1341 goto out_kfree_skb;
1342
1343 /* If packet is not checksummed and device does not support
1344 * checksumming for this protocol, complete checksumming here.
1345 */
1346 if (skb->ip_summed == CHECKSUM_HW &&
1347 (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1348 (!(dev->features & NETIF_F_IP_CSUM) ||
1349 skb->protocol != htons(ETH_P_IP))))
1350 if (skb_checksum_help(skb, 0))
1351 goto out_kfree_skb;
1352
Eric Dumazet2d7ceec2005-09-27 15:22:58 -07001353 spin_lock_prefetch(&dev->queue_lock);
1354
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 /* Disable soft irqs for various locks below. Also
1356 * stops preemption for RCU.
1357 */
1358 local_bh_disable();
1359
1360 /* Updates of qdisc are serialized by queue_lock.
1361 * The struct Qdisc which is pointed to by qdisc is now a
1362 * rcu structure - it may be accessed without acquiring
1363 * a lock (but the structure may be stale.) The freeing of the
1364 * qdisc will be deferred until it's known that there are no
1365 * more references to it.
1366 *
1367 * If the qdisc has an enqueue function, we still need to
1368 * hold the queue_lock before calling it, since queue_lock
1369 * also serializes access to the device queue.
1370 */
1371
1372 q = rcu_dereference(dev->qdisc);
1373#ifdef CONFIG_NET_CLS_ACT
1374 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1375#endif
1376 if (q->enqueue) {
1377 /* Grab device queue */
1378 spin_lock(&dev->queue_lock);
1379
1380 rc = q->enqueue(skb, q);
1381
1382 qdisc_run(dev);
1383
1384 spin_unlock(&dev->queue_lock);
1385 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1386 goto out;
1387 }
1388
1389 /* The device has no queue. Common case for software devices:
1390 loopback, all the sorts of tunnels...
1391
1392 Really, it is unlikely that xmit_lock protection is necessary here.
1393 (f.e. loopback and IP tunnels are clean ignoring statistics
1394 counters.)
1395 However, it is possible, that they rely on protection
1396 made by us here.
1397
1398 Check this and shot the lock. It is not prone from deadlocks.
1399 Either shot noqueue qdisc, it is even simpler 8)
1400 */
1401 if (dev->flags & IFF_UP) {
1402 int cpu = smp_processor_id(); /* ok because BHs are off */
1403
1404 if (dev->xmit_lock_owner != cpu) {
1405
1406 HARD_TX_LOCK(dev, cpu);
1407
1408 if (!netif_queue_stopped(dev)) {
1409 if (netdev_nit)
1410 dev_queue_xmit_nit(skb, dev);
1411
1412 rc = 0;
1413 if (!dev->hard_start_xmit(skb, dev)) {
1414 HARD_TX_UNLOCK(dev);
1415 goto out;
1416 }
1417 }
1418 HARD_TX_UNLOCK(dev);
1419 if (net_ratelimit())
1420 printk(KERN_CRIT "Virtual device %s asks to "
1421 "queue packet!\n", dev->name);
1422 } else {
1423 /* Recursion is detected! It is possible,
1424 * unfortunately */
1425 if (net_ratelimit())
1426 printk(KERN_CRIT "Dead loop on virtual device "
1427 "%s, fix it urgently!\n", dev->name);
1428 }
1429 }
1430
1431 rc = -ENETDOWN;
1432 local_bh_enable();
1433
1434out_kfree_skb:
1435 kfree_skb(skb);
1436 return rc;
1437out:
1438 local_bh_enable();
1439 return rc;
1440}
1441
1442
1443/*=======================================================================
1444 Receiver routines
1445 =======================================================================*/
1446
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07001447int netdev_max_backlog = 1000;
1448int netdev_budget = 300;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449int weight_p = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450
1451DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1452
1453
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454/**
1455 * netif_rx - post buffer to the network code
1456 * @skb: buffer to post
1457 *
1458 * This function receives a packet from a device driver and queues it for
1459 * the upper (protocol) levels to process. It always succeeds. The buffer
1460 * may be dropped during processing for congestion control or by the
1461 * protocol layers.
1462 *
1463 * return values:
1464 * NET_RX_SUCCESS (no congestion)
1465 * NET_RX_CN_LOW (low congestion)
1466 * NET_RX_CN_MOD (moderate congestion)
1467 * NET_RX_CN_HIGH (high congestion)
1468 * NET_RX_DROP (packet was dropped)
1469 *
1470 */
1471
1472int netif_rx(struct sk_buff *skb)
1473{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 struct softnet_data *queue;
1475 unsigned long flags;
1476
1477 /* if netpoll wants it, pretend we never saw it */
1478 if (netpoll_rx(skb))
1479 return NET_RX_DROP;
1480
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001481 if (!skb->tstamp.off_sec)
1482 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483
1484 /*
1485 * The code is rearranged so that the path is the most
1486 * short when CPU is congested, but is still operating.
1487 */
1488 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 queue = &__get_cpu_var(softnet_data);
1490
1491 __get_cpu_var(netdev_rx_stat).total++;
1492 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1493 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494enqueue:
1495 dev_hold(skb->dev);
1496 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001498 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 }
1500
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 netif_rx_schedule(&queue->backlog_dev);
1502 goto enqueue;
1503 }
1504
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 __get_cpu_var(netdev_rx_stat).dropped++;
1506 local_irq_restore(flags);
1507
1508 kfree_skb(skb);
1509 return NET_RX_DROP;
1510}
1511
1512int netif_rx_ni(struct sk_buff *skb)
1513{
1514 int err;
1515
1516 preempt_disable();
1517 err = netif_rx(skb);
1518 if (local_softirq_pending())
1519 do_softirq();
1520 preempt_enable();
1521
1522 return err;
1523}
1524
1525EXPORT_SYMBOL(netif_rx_ni);
1526
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001527static inline struct net_device *skb_bond(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528{
1529 struct net_device *dev = skb->dev;
1530
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001531 if (dev->master) {
1532 /*
1533 * On bonding slaves other than the currently active
1534 * slave, suppress duplicates except for 802.3ad
1535 * ETH_P_SLOW and alb non-mcast/bcast.
1536 */
1537 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
1538 if (dev->master->priv_flags & IFF_MASTER_ALB) {
1539 if (skb->pkt_type != PACKET_BROADCAST &&
1540 skb->pkt_type != PACKET_MULTICAST)
1541 goto keep;
1542 }
1543
1544 if (dev->master->priv_flags & IFF_MASTER_8023AD &&
1545 skb->protocol == __constant_htons(ETH_P_SLOW))
1546 goto keep;
1547
1548 kfree_skb(skb);
1549 return NULL;
1550 }
1551keep:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 skb->dev = dev->master;
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001553 }
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001554
1555 return dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556}
1557
1558static void net_tx_action(struct softirq_action *h)
1559{
1560 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1561
1562 if (sd->completion_queue) {
1563 struct sk_buff *clist;
1564
1565 local_irq_disable();
1566 clist = sd->completion_queue;
1567 sd->completion_queue = NULL;
1568 local_irq_enable();
1569
1570 while (clist) {
1571 struct sk_buff *skb = clist;
1572 clist = clist->next;
1573
1574 BUG_TRAP(!atomic_read(&skb->users));
1575 __kfree_skb(skb);
1576 }
1577 }
1578
1579 if (sd->output_queue) {
1580 struct net_device *head;
1581
1582 local_irq_disable();
1583 head = sd->output_queue;
1584 sd->output_queue = NULL;
1585 local_irq_enable();
1586
1587 while (head) {
1588 struct net_device *dev = head;
1589 head = head->next_sched;
1590
1591 smp_mb__before_clear_bit();
1592 clear_bit(__LINK_STATE_SCHED, &dev->state);
1593
1594 if (spin_trylock(&dev->queue_lock)) {
1595 qdisc_run(dev);
1596 spin_unlock(&dev->queue_lock);
1597 } else {
1598 netif_schedule(dev);
1599 }
1600 }
1601 }
1602}
1603
1604static __inline__ int deliver_skb(struct sk_buff *skb,
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001605 struct packet_type *pt_prev,
1606 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607{
1608 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001609 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610}
1611
1612#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1613int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1614struct net_bridge;
1615struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1616 unsigned char *addr);
1617void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1618
1619static __inline__ int handle_bridge(struct sk_buff **pskb,
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001620 struct packet_type **pt_prev, int *ret,
1621 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622{
1623 struct net_bridge_port *port;
1624
1625 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1626 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1627 return 0;
1628
1629 if (*pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001630 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 *pt_prev = NULL;
1632 }
1633
1634 return br_handle_frame_hook(port, pskb);
1635}
1636#else
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001637#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638#endif
1639
1640#ifdef CONFIG_NET_CLS_ACT
1641/* TODO: Maybe we should just force sch_ingress to be compiled in
1642 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1643 * a compare and 2 stores extra right now if we dont have it on
1644 * but have CONFIG_NET_CLS_ACT
1645 * NOTE: This doesnt stop any functionality; if you dont have
1646 * the ingress scheduler, you just cant add policies on ingress.
1647 *
1648 */
1649static int ing_filter(struct sk_buff *skb)
1650{
1651 struct Qdisc *q;
1652 struct net_device *dev = skb->dev;
1653 int result = TC_ACT_OK;
1654
1655 if (dev->qdisc_ingress) {
1656 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1657 if (MAX_RED_LOOP < ttl++) {
1658 printk("Redir loop detected Dropping packet (%s->%s)\n",
David S. Miller86e65da2005-08-09 19:36:29 -07001659 skb->input_dev->name, skb->dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 return TC_ACT_SHOT;
1661 }
1662
1663 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1664
1665 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
David S. Miller86e65da2005-08-09 19:36:29 -07001666
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 spin_lock(&dev->ingress_lock);
1668 if ((q = dev->qdisc_ingress) != NULL)
1669 result = q->enqueue(skb, q);
1670 spin_unlock(&dev->ingress_lock);
1671
1672 }
1673
1674 return result;
1675}
1676#endif
1677
1678int netif_receive_skb(struct sk_buff *skb)
1679{
1680 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001681 struct net_device *orig_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 int ret = NET_RX_DROP;
1683 unsigned short type;
1684
1685 /* if we've gotten here through NAPI, check netpoll */
1686 if (skb->dev->poll && netpoll_rx(skb))
1687 return NET_RX_DROP;
1688
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001689 if (!skb->tstamp.off_sec)
1690 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691
David S. Miller86e65da2005-08-09 19:36:29 -07001692 if (!skb->input_dev)
1693 skb->input_dev = skb->dev;
1694
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001695 orig_dev = skb_bond(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001697 if (!orig_dev)
1698 return NET_RX_DROP;
1699
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 __get_cpu_var(netdev_rx_stat).total++;
1701
1702 skb->h.raw = skb->nh.raw = skb->data;
1703 skb->mac_len = skb->nh.raw - skb->mac.raw;
1704
1705 pt_prev = NULL;
1706
1707 rcu_read_lock();
1708
1709#ifdef CONFIG_NET_CLS_ACT
1710 if (skb->tc_verd & TC_NCLS) {
1711 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1712 goto ncls;
1713 }
1714#endif
1715
1716 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1717 if (!ptype->dev || ptype->dev == skb->dev) {
1718 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001719 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 pt_prev = ptype;
1721 }
1722 }
1723
1724#ifdef CONFIG_NET_CLS_ACT
1725 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001726 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 pt_prev = NULL; /* noone else should process this after*/
1728 } else {
1729 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1730 }
1731
1732 ret = ing_filter(skb);
1733
1734 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1735 kfree_skb(skb);
1736 goto out;
1737 }
1738
1739 skb->tc_verd = 0;
1740ncls:
1741#endif
1742
1743 handle_diverter(skb);
1744
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001745 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 goto out;
1747
1748 type = skb->protocol;
1749 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1750 if (ptype->type == type &&
1751 (!ptype->dev || ptype->dev == skb->dev)) {
1752 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001753 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 pt_prev = ptype;
1755 }
1756 }
1757
1758 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001759 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 } else {
1761 kfree_skb(skb);
1762 /* Jamal, now you will not able to escape explaining
1763 * me how you were going to use this. :-)
1764 */
1765 ret = NET_RX_DROP;
1766 }
1767
1768out:
1769 rcu_read_unlock();
1770 return ret;
1771}
1772
1773static int process_backlog(struct net_device *backlog_dev, int *budget)
1774{
1775 int work = 0;
1776 int quota = min(backlog_dev->quota, *budget);
1777 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1778 unsigned long start_time = jiffies;
1779
Stephen Hemmingere3876602005-06-08 14:56:01 -07001780 backlog_dev->weight = weight_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 for (;;) {
1782 struct sk_buff *skb;
1783 struct net_device *dev;
1784
1785 local_irq_disable();
1786 skb = __skb_dequeue(&queue->input_pkt_queue);
1787 if (!skb)
1788 goto job_done;
1789 local_irq_enable();
1790
1791 dev = skb->dev;
1792
1793 netif_receive_skb(skb);
1794
1795 dev_put(dev);
1796
1797 work++;
1798
1799 if (work >= quota || jiffies - start_time > 1)
1800 break;
1801
1802 }
1803
1804 backlog_dev->quota -= work;
1805 *budget -= work;
1806 return -1;
1807
1808job_done:
1809 backlog_dev->quota -= work;
1810 *budget -= work;
1811
1812 list_del(&backlog_dev->poll_list);
1813 smp_mb__before_clear_bit();
1814 netif_poll_enable(backlog_dev);
1815
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 local_irq_enable();
1817 return 0;
1818}
1819
1820static void net_rx_action(struct softirq_action *h)
1821{
1822 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1823 unsigned long start_time = jiffies;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07001824 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07001825 void *have;
1826
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 local_irq_disable();
1828
1829 while (!list_empty(&queue->poll_list)) {
1830 struct net_device *dev;
1831
1832 if (budget <= 0 || jiffies - start_time > 1)
1833 goto softnet_break;
1834
1835 local_irq_enable();
1836
1837 dev = list_entry(queue->poll_list.next,
1838 struct net_device, poll_list);
Matt Mackall53fb95d2005-08-11 19:27:43 -07001839 have = netpoll_poll_lock(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840
1841 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
Matt Mackall53fb95d2005-08-11 19:27:43 -07001842 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843 local_irq_disable();
Stephen Hemminger8aca8a22006-03-20 22:26:39 -08001844 list_move_tail(&dev->poll_list, &queue->poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845 if (dev->quota < 0)
1846 dev->quota += dev->weight;
1847 else
1848 dev->quota = dev->weight;
1849 } else {
Matt Mackall53fb95d2005-08-11 19:27:43 -07001850 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 dev_put(dev);
1852 local_irq_disable();
1853 }
1854 }
1855out:
Chris Leechdb217332006-06-17 21:24:58 -07001856#ifdef CONFIG_NET_DMA
1857 /*
1858 * There may not be any more sk_buffs coming right now, so push
1859 * any pending DMA copies to hardware
1860 */
1861 if (net_dma_client) {
1862 struct dma_chan *chan;
1863 rcu_read_lock();
1864 list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
1865 dma_async_memcpy_issue_pending(chan);
1866 rcu_read_unlock();
1867 }
1868#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 local_irq_enable();
1870 return;
1871
1872softnet_break:
1873 __get_cpu_var(netdev_rx_stat).time_squeeze++;
1874 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1875 goto out;
1876}
1877
1878static gifconf_func_t * gifconf_list [NPROTO];
1879
1880/**
1881 * register_gifconf - register a SIOCGIF handler
1882 * @family: Address family
1883 * @gifconf: Function handler
1884 *
1885 * Register protocol dependent address dumping routines. The handler
1886 * that is passed must not be freed or reused until it has been replaced
1887 * by another handler.
1888 */
1889int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1890{
1891 if (family >= NPROTO)
1892 return -EINVAL;
1893 gifconf_list[family] = gifconf;
1894 return 0;
1895}
1896
1897
1898/*
1899 * Map an interface index to its name (SIOCGIFNAME)
1900 */
1901
1902/*
1903 * We need this ioctl for efficient implementation of the
1904 * if_indextoname() function required by the IPv6 API. Without
1905 * it, we would have to search all the interfaces to find a
1906 * match. --pb
1907 */
1908
1909static int dev_ifname(struct ifreq __user *arg)
1910{
1911 struct net_device *dev;
1912 struct ifreq ifr;
1913
1914 /*
1915 * Fetch the caller's info block.
1916 */
1917
1918 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1919 return -EFAULT;
1920
1921 read_lock(&dev_base_lock);
1922 dev = __dev_get_by_index(ifr.ifr_ifindex);
1923 if (!dev) {
1924 read_unlock(&dev_base_lock);
1925 return -ENODEV;
1926 }
1927
1928 strcpy(ifr.ifr_name, dev->name);
1929 read_unlock(&dev_base_lock);
1930
1931 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1932 return -EFAULT;
1933 return 0;
1934}
1935
1936/*
1937 * Perform a SIOCGIFCONF call. This structure will change
1938 * size eventually, and there is nothing I can do about it.
1939 * Thus we will need a 'compatibility mode'.
1940 */
1941
1942static int dev_ifconf(char __user *arg)
1943{
1944 struct ifconf ifc;
1945 struct net_device *dev;
1946 char __user *pos;
1947 int len;
1948 int total;
1949 int i;
1950
1951 /*
1952 * Fetch the caller's info block.
1953 */
1954
1955 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1956 return -EFAULT;
1957
1958 pos = ifc.ifc_buf;
1959 len = ifc.ifc_len;
1960
1961 /*
1962 * Loop over the interfaces, and write an info block for each.
1963 */
1964
1965 total = 0;
1966 for (dev = dev_base; dev; dev = dev->next) {
1967 for (i = 0; i < NPROTO; i++) {
1968 if (gifconf_list[i]) {
1969 int done;
1970 if (!pos)
1971 done = gifconf_list[i](dev, NULL, 0);
1972 else
1973 done = gifconf_list[i](dev, pos + total,
1974 len - total);
1975 if (done < 0)
1976 return -EFAULT;
1977 total += done;
1978 }
1979 }
1980 }
1981
1982 /*
1983 * All done. Write the updated control block back to the caller.
1984 */
1985 ifc.ifc_len = total;
1986
1987 /*
1988 * Both BSD and Solaris return 0 here, so we do too.
1989 */
1990 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1991}
1992
1993#ifdef CONFIG_PROC_FS
1994/*
1995 * This is invoked by the /proc filesystem handler to display a device
1996 * in detail.
1997 */
1998static __inline__ struct net_device *dev_get_idx(loff_t pos)
1999{
2000 struct net_device *dev;
2001 loff_t i;
2002
2003 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2004
2005 return i == pos ? dev : NULL;
2006}
2007
2008void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2009{
2010 read_lock(&dev_base_lock);
2011 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2012}
2013
2014void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2015{
2016 ++*pos;
2017 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2018}
2019
2020void dev_seq_stop(struct seq_file *seq, void *v)
2021{
2022 read_unlock(&dev_base_lock);
2023}
2024
2025static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2026{
2027 if (dev->get_stats) {
2028 struct net_device_stats *stats = dev->get_stats(dev);
2029
2030 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2031 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2032 dev->name, stats->rx_bytes, stats->rx_packets,
2033 stats->rx_errors,
2034 stats->rx_dropped + stats->rx_missed_errors,
2035 stats->rx_fifo_errors,
2036 stats->rx_length_errors + stats->rx_over_errors +
2037 stats->rx_crc_errors + stats->rx_frame_errors,
2038 stats->rx_compressed, stats->multicast,
2039 stats->tx_bytes, stats->tx_packets,
2040 stats->tx_errors, stats->tx_dropped,
2041 stats->tx_fifo_errors, stats->collisions,
2042 stats->tx_carrier_errors +
2043 stats->tx_aborted_errors +
2044 stats->tx_window_errors +
2045 stats->tx_heartbeat_errors,
2046 stats->tx_compressed);
2047 } else
2048 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2049}
2050
2051/*
2052 * Called from the PROCfs module. This now uses the new arbitrary sized
2053 * /proc/net interface to create /proc/net/dev
2054 */
2055static int dev_seq_show(struct seq_file *seq, void *v)
2056{
2057 if (v == SEQ_START_TOKEN)
2058 seq_puts(seq, "Inter-| Receive "
2059 " | Transmit\n"
2060 " face |bytes packets errs drop fifo frame "
2061 "compressed multicast|bytes packets errs "
2062 "drop fifo colls carrier compressed\n");
2063 else
2064 dev_seq_printf_stats(seq, v);
2065 return 0;
2066}
2067
2068static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2069{
2070 struct netif_rx_stats *rc = NULL;
2071
2072 while (*pos < NR_CPUS)
2073 if (cpu_online(*pos)) {
2074 rc = &per_cpu(netdev_rx_stat, *pos);
2075 break;
2076 } else
2077 ++*pos;
2078 return rc;
2079}
2080
2081static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2082{
2083 return softnet_get_online(pos);
2084}
2085
2086static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2087{
2088 ++*pos;
2089 return softnet_get_online(pos);
2090}
2091
2092static void softnet_seq_stop(struct seq_file *seq, void *v)
2093{
2094}
2095
2096static int softnet_seq_show(struct seq_file *seq, void *v)
2097{
2098 struct netif_rx_stats *s = v;
2099
2100 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07002101 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07002102 0, 0, 0, 0, /* was fastroute */
2103 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 return 0;
2105}
2106
2107static struct seq_operations dev_seq_ops = {
2108 .start = dev_seq_start,
2109 .next = dev_seq_next,
2110 .stop = dev_seq_stop,
2111 .show = dev_seq_show,
2112};
2113
2114static int dev_seq_open(struct inode *inode, struct file *file)
2115{
2116 return seq_open(file, &dev_seq_ops);
2117}
2118
2119static struct file_operations dev_seq_fops = {
2120 .owner = THIS_MODULE,
2121 .open = dev_seq_open,
2122 .read = seq_read,
2123 .llseek = seq_lseek,
2124 .release = seq_release,
2125};
2126
2127static struct seq_operations softnet_seq_ops = {
2128 .start = softnet_seq_start,
2129 .next = softnet_seq_next,
2130 .stop = softnet_seq_stop,
2131 .show = softnet_seq_show,
2132};
2133
2134static int softnet_seq_open(struct inode *inode, struct file *file)
2135{
2136 return seq_open(file, &softnet_seq_ops);
2137}
2138
2139static struct file_operations softnet_seq_fops = {
2140 .owner = THIS_MODULE,
2141 .open = softnet_seq_open,
2142 .read = seq_read,
2143 .llseek = seq_lseek,
2144 .release = seq_release,
2145};
2146
Adrian Bunkd86b5e02006-01-21 00:46:55 +01002147#ifdef CONFIG_WIRELESS_EXT
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148extern int wireless_proc_init(void);
2149#else
2150#define wireless_proc_init() 0
2151#endif
2152
2153static int __init dev_proc_init(void)
2154{
2155 int rc = -ENOMEM;
2156
2157 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2158 goto out;
2159 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2160 goto out_dev;
2161 if (wireless_proc_init())
2162 goto out_softnet;
2163 rc = 0;
2164out:
2165 return rc;
2166out_softnet:
2167 proc_net_remove("softnet_stat");
2168out_dev:
2169 proc_net_remove("dev");
2170 goto out;
2171}
2172#else
2173#define dev_proc_init() 0
2174#endif /* CONFIG_PROC_FS */
2175
2176
2177/**
2178 * netdev_set_master - set up master/slave pair
2179 * @slave: slave device
2180 * @master: new master device
2181 *
2182 * Changes the master device of the slave. Pass %NULL to break the
2183 * bonding. The caller must hold the RTNL semaphore. On a failure
2184 * a negative errno code is returned. On success the reference counts
2185 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2186 * function returns zero.
2187 */
2188int netdev_set_master(struct net_device *slave, struct net_device *master)
2189{
2190 struct net_device *old = slave->master;
2191
2192 ASSERT_RTNL();
2193
2194 if (master) {
2195 if (old)
2196 return -EBUSY;
2197 dev_hold(master);
2198 }
2199
2200 slave->master = master;
2201
2202 synchronize_net();
2203
2204 if (old)
2205 dev_put(old);
2206
2207 if (master)
2208 slave->flags |= IFF_SLAVE;
2209 else
2210 slave->flags &= ~IFF_SLAVE;
2211
2212 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2213 return 0;
2214}
2215
2216/**
2217 * dev_set_promiscuity - update promiscuity count on a device
2218 * @dev: device
2219 * @inc: modifier
2220 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07002221 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 * remains above zero the interface remains promiscuous. Once it hits zero
2223 * the device reverts back to normal filtering operation. A negative inc
2224 * value is used to drop promiscuity on the device.
2225 */
2226void dev_set_promiscuity(struct net_device *dev, int inc)
2227{
2228 unsigned short old_flags = dev->flags;
2229
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 if ((dev->promiscuity += inc) == 0)
2231 dev->flags &= ~IFF_PROMISC;
David Chau52609c02005-07-05 15:11:06 -07002232 else
2233 dev->flags |= IFF_PROMISC;
2234 if (dev->flags != old_flags) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 dev_mc_upload(dev);
2236 printk(KERN_INFO "device %s %s promiscuous mode\n",
2237 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2238 "left");
Steve Grubb5bdb9882005-12-03 08:39:35 -05002239 audit_log(current->audit_context, GFP_ATOMIC,
2240 AUDIT_ANOM_PROMISCUOUS,
2241 "dev=%s prom=%d old_prom=%d auid=%u",
2242 dev->name, (dev->flags & IFF_PROMISC),
2243 (old_flags & IFF_PROMISC),
2244 audit_get_loginuid(current->audit_context));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 }
2246}
2247
2248/**
2249 * dev_set_allmulti - update allmulti count on a device
2250 * @dev: device
2251 * @inc: modifier
2252 *
2253 * Add or remove reception of all multicast frames to a device. While the
2254 * count in the device remains above zero the interface remains listening
2255 * to all interfaces. Once it hits zero the device reverts back to normal
2256 * filtering operation. A negative @inc value is used to drop the counter
2257 * when releasing a resource needing all multicasts.
2258 */
2259
2260void dev_set_allmulti(struct net_device *dev, int inc)
2261{
2262 unsigned short old_flags = dev->flags;
2263
2264 dev->flags |= IFF_ALLMULTI;
2265 if ((dev->allmulti += inc) == 0)
2266 dev->flags &= ~IFF_ALLMULTI;
2267 if (dev->flags ^ old_flags)
2268 dev_mc_upload(dev);
2269}
2270
2271unsigned dev_get_flags(const struct net_device *dev)
2272{
2273 unsigned flags;
2274
2275 flags = (dev->flags & ~(IFF_PROMISC |
2276 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08002277 IFF_RUNNING |
2278 IFF_LOWER_UP |
2279 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280 (dev->gflags & (IFF_PROMISC |
2281 IFF_ALLMULTI));
2282
Stefan Rompfb00055a2006-03-20 17:09:11 -08002283 if (netif_running(dev)) {
2284 if (netif_oper_up(dev))
2285 flags |= IFF_RUNNING;
2286 if (netif_carrier_ok(dev))
2287 flags |= IFF_LOWER_UP;
2288 if (netif_dormant(dev))
2289 flags |= IFF_DORMANT;
2290 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291
2292 return flags;
2293}
2294
2295int dev_change_flags(struct net_device *dev, unsigned flags)
2296{
2297 int ret;
2298 int old_flags = dev->flags;
2299
2300 /*
2301 * Set the flags on our device.
2302 */
2303
2304 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2305 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2306 IFF_AUTOMEDIA)) |
2307 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2308 IFF_ALLMULTI));
2309
2310 /*
2311 * Load in the correct multicast list now the flags have changed.
2312 */
2313
2314 dev_mc_upload(dev);
2315
2316 /*
2317 * Have we downed the interface. We handle IFF_UP ourselves
2318 * according to user attempts to set it, rather than blindly
2319 * setting it.
2320 */
2321
2322 ret = 0;
2323 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
2324 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2325
2326 if (!ret)
2327 dev_mc_upload(dev);
2328 }
2329
2330 if (dev->flags & IFF_UP &&
2331 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2332 IFF_VOLATILE)))
Alan Sternf07d5b92006-05-09 15:23:03 -07002333 raw_notifier_call_chain(&netdev_chain,
Alan Sterne041c682006-03-27 01:16:30 -08002334 NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335
2336 if ((flags ^ dev->gflags) & IFF_PROMISC) {
2337 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2338 dev->gflags ^= IFF_PROMISC;
2339 dev_set_promiscuity(dev, inc);
2340 }
2341
2342 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2343 is important. Some (broken) drivers set IFF_PROMISC, when
2344 IFF_ALLMULTI is requested not asking us and not reporting.
2345 */
2346 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2347 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2348 dev->gflags ^= IFF_ALLMULTI;
2349 dev_set_allmulti(dev, inc);
2350 }
2351
2352 if (old_flags ^ dev->flags)
2353 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2354
2355 return ret;
2356}
2357
2358int dev_set_mtu(struct net_device *dev, int new_mtu)
2359{
2360 int err;
2361
2362 if (new_mtu == dev->mtu)
2363 return 0;
2364
2365 /* MTU must be positive. */
2366 if (new_mtu < 0)
2367 return -EINVAL;
2368
2369 if (!netif_device_present(dev))
2370 return -ENODEV;
2371
2372 err = 0;
2373 if (dev->change_mtu)
2374 err = dev->change_mtu(dev, new_mtu);
2375 else
2376 dev->mtu = new_mtu;
2377 if (!err && dev->flags & IFF_UP)
Alan Sternf07d5b92006-05-09 15:23:03 -07002378 raw_notifier_call_chain(&netdev_chain,
Alan Sterne041c682006-03-27 01:16:30 -08002379 NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380 return err;
2381}
2382
2383int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2384{
2385 int err;
2386
2387 if (!dev->set_mac_address)
2388 return -EOPNOTSUPP;
2389 if (sa->sa_family != dev->type)
2390 return -EINVAL;
2391 if (!netif_device_present(dev))
2392 return -ENODEV;
2393 err = dev->set_mac_address(dev, sa);
2394 if (!err)
Alan Sternf07d5b92006-05-09 15:23:03 -07002395 raw_notifier_call_chain(&netdev_chain,
Alan Sterne041c682006-03-27 01:16:30 -08002396 NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 return err;
2398}
2399
2400/*
2401 * Perform the SIOCxIFxxx calls.
2402 */
2403static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2404{
2405 int err;
2406 struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2407
2408 if (!dev)
2409 return -ENODEV;
2410
2411 switch (cmd) {
2412 case SIOCGIFFLAGS: /* Get interface flags */
2413 ifr->ifr_flags = dev_get_flags(dev);
2414 return 0;
2415
2416 case SIOCSIFFLAGS: /* Set interface flags */
2417 return dev_change_flags(dev, ifr->ifr_flags);
2418
2419 case SIOCGIFMETRIC: /* Get the metric on the interface
2420 (currently unused) */
2421 ifr->ifr_metric = 0;
2422 return 0;
2423
2424 case SIOCSIFMETRIC: /* Set the metric on the interface
2425 (currently unused) */
2426 return -EOPNOTSUPP;
2427
2428 case SIOCGIFMTU: /* Get the MTU of a device */
2429 ifr->ifr_mtu = dev->mtu;
2430 return 0;
2431
2432 case SIOCSIFMTU: /* Set the MTU of a device */
2433 return dev_set_mtu(dev, ifr->ifr_mtu);
2434
2435 case SIOCGIFHWADDR:
2436 if (!dev->addr_len)
2437 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2438 else
2439 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2440 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2441 ifr->ifr_hwaddr.sa_family = dev->type;
2442 return 0;
2443
2444 case SIOCSIFHWADDR:
2445 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2446
2447 case SIOCSIFHWBROADCAST:
2448 if (ifr->ifr_hwaddr.sa_family != dev->type)
2449 return -EINVAL;
2450 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2451 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Alan Sternf07d5b92006-05-09 15:23:03 -07002452 raw_notifier_call_chain(&netdev_chain,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453 NETDEV_CHANGEADDR, dev);
2454 return 0;
2455
2456 case SIOCGIFMAP:
2457 ifr->ifr_map.mem_start = dev->mem_start;
2458 ifr->ifr_map.mem_end = dev->mem_end;
2459 ifr->ifr_map.base_addr = dev->base_addr;
2460 ifr->ifr_map.irq = dev->irq;
2461 ifr->ifr_map.dma = dev->dma;
2462 ifr->ifr_map.port = dev->if_port;
2463 return 0;
2464
2465 case SIOCSIFMAP:
2466 if (dev->set_config) {
2467 if (!netif_device_present(dev))
2468 return -ENODEV;
2469 return dev->set_config(dev, &ifr->ifr_map);
2470 }
2471 return -EOPNOTSUPP;
2472
2473 case SIOCADDMULTI:
2474 if (!dev->set_multicast_list ||
2475 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2476 return -EINVAL;
2477 if (!netif_device_present(dev))
2478 return -ENODEV;
2479 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2480 dev->addr_len, 1);
2481
2482 case SIOCDELMULTI:
2483 if (!dev->set_multicast_list ||
2484 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2485 return -EINVAL;
2486 if (!netif_device_present(dev))
2487 return -ENODEV;
2488 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2489 dev->addr_len, 1);
2490
2491 case SIOCGIFINDEX:
2492 ifr->ifr_ifindex = dev->ifindex;
2493 return 0;
2494
2495 case SIOCGIFTXQLEN:
2496 ifr->ifr_qlen = dev->tx_queue_len;
2497 return 0;
2498
2499 case SIOCSIFTXQLEN:
2500 if (ifr->ifr_qlen < 0)
2501 return -EINVAL;
2502 dev->tx_queue_len = ifr->ifr_qlen;
2503 return 0;
2504
2505 case SIOCSIFNAME:
2506 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2507 return dev_change_name(dev, ifr->ifr_newname);
2508
2509 /*
2510 * Unknown or private ioctl
2511 */
2512
2513 default:
2514 if ((cmd >= SIOCDEVPRIVATE &&
2515 cmd <= SIOCDEVPRIVATE + 15) ||
2516 cmd == SIOCBONDENSLAVE ||
2517 cmd == SIOCBONDRELEASE ||
2518 cmd == SIOCBONDSETHWADDR ||
2519 cmd == SIOCBONDSLAVEINFOQUERY ||
2520 cmd == SIOCBONDINFOQUERY ||
2521 cmd == SIOCBONDCHANGEACTIVE ||
2522 cmd == SIOCGMIIPHY ||
2523 cmd == SIOCGMIIREG ||
2524 cmd == SIOCSMIIREG ||
2525 cmd == SIOCBRADDIF ||
2526 cmd == SIOCBRDELIF ||
2527 cmd == SIOCWANDEV) {
2528 err = -EOPNOTSUPP;
2529 if (dev->do_ioctl) {
2530 if (netif_device_present(dev))
2531 err = dev->do_ioctl(dev, ifr,
2532 cmd);
2533 else
2534 err = -ENODEV;
2535 }
2536 } else
2537 err = -EINVAL;
2538
2539 }
2540 return err;
2541}
2542
2543/*
2544 * This function handles all "interface"-type I/O control requests. The actual
2545 * 'doing' part of this is dev_ifsioc above.
2546 */
2547
2548/**
2549 * dev_ioctl - network device ioctl
2550 * @cmd: command to issue
2551 * @arg: pointer to a struct ifreq in user space
2552 *
2553 * Issue ioctl functions to devices. This is normally called by the
2554 * user space syscall interfaces but can sometimes be useful for
2555 * other purposes. The return value is the return from the syscall if
2556 * positive or a negative errno code on error.
2557 */
2558
2559int dev_ioctl(unsigned int cmd, void __user *arg)
2560{
2561 struct ifreq ifr;
2562 int ret;
2563 char *colon;
2564
2565 /* One special case: SIOCGIFCONF takes ifconf argument
2566 and requires shared lock, because it sleeps writing
2567 to user space.
2568 */
2569
2570 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08002571 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 ret = dev_ifconf((char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08002573 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 return ret;
2575 }
2576 if (cmd == SIOCGIFNAME)
2577 return dev_ifname((struct ifreq __user *)arg);
2578
2579 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2580 return -EFAULT;
2581
2582 ifr.ifr_name[IFNAMSIZ-1] = 0;
2583
2584 colon = strchr(ifr.ifr_name, ':');
2585 if (colon)
2586 *colon = 0;
2587
2588 /*
2589 * See which interface the caller is talking about.
2590 */
2591
2592 switch (cmd) {
2593 /*
2594 * These ioctl calls:
2595 * - can be done by all.
2596 * - atomic and do not require locking.
2597 * - return a value
2598 */
2599 case SIOCGIFFLAGS:
2600 case SIOCGIFMETRIC:
2601 case SIOCGIFMTU:
2602 case SIOCGIFHWADDR:
2603 case SIOCGIFSLAVE:
2604 case SIOCGIFMAP:
2605 case SIOCGIFINDEX:
2606 case SIOCGIFTXQLEN:
2607 dev_load(ifr.ifr_name);
2608 read_lock(&dev_base_lock);
2609 ret = dev_ifsioc(&ifr, cmd);
2610 read_unlock(&dev_base_lock);
2611 if (!ret) {
2612 if (colon)
2613 *colon = ':';
2614 if (copy_to_user(arg, &ifr,
2615 sizeof(struct ifreq)))
2616 ret = -EFAULT;
2617 }
2618 return ret;
2619
2620 case SIOCETHTOOL:
2621 dev_load(ifr.ifr_name);
2622 rtnl_lock();
2623 ret = dev_ethtool(&ifr);
2624 rtnl_unlock();
2625 if (!ret) {
2626 if (colon)
2627 *colon = ':';
2628 if (copy_to_user(arg, &ifr,
2629 sizeof(struct ifreq)))
2630 ret = -EFAULT;
2631 }
2632 return ret;
2633
2634 /*
2635 * These ioctl calls:
2636 * - require superuser power.
2637 * - require strict serialization.
2638 * - return a value
2639 */
2640 case SIOCGMIIPHY:
2641 case SIOCGMIIREG:
2642 case SIOCSIFNAME:
2643 if (!capable(CAP_NET_ADMIN))
2644 return -EPERM;
2645 dev_load(ifr.ifr_name);
2646 rtnl_lock();
2647 ret = dev_ifsioc(&ifr, cmd);
2648 rtnl_unlock();
2649 if (!ret) {
2650 if (colon)
2651 *colon = ':';
2652 if (copy_to_user(arg, &ifr,
2653 sizeof(struct ifreq)))
2654 ret = -EFAULT;
2655 }
2656 return ret;
2657
2658 /*
2659 * These ioctl calls:
2660 * - require superuser power.
2661 * - require strict serialization.
2662 * - do not return a value
2663 */
2664 case SIOCSIFFLAGS:
2665 case SIOCSIFMETRIC:
2666 case SIOCSIFMTU:
2667 case SIOCSIFMAP:
2668 case SIOCSIFHWADDR:
2669 case SIOCSIFSLAVE:
2670 case SIOCADDMULTI:
2671 case SIOCDELMULTI:
2672 case SIOCSIFHWBROADCAST:
2673 case SIOCSIFTXQLEN:
2674 case SIOCSMIIREG:
2675 case SIOCBONDENSLAVE:
2676 case SIOCBONDRELEASE:
2677 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002678 case SIOCBONDCHANGEACTIVE:
2679 case SIOCBRADDIF:
2680 case SIOCBRDELIF:
2681 if (!capable(CAP_NET_ADMIN))
2682 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08002683 /* fall through */
2684 case SIOCBONDSLAVEINFOQUERY:
2685 case SIOCBONDINFOQUERY:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686 dev_load(ifr.ifr_name);
2687 rtnl_lock();
2688 ret = dev_ifsioc(&ifr, cmd);
2689 rtnl_unlock();
2690 return ret;
2691
2692 case SIOCGIFMEM:
2693 /* Get the per device memory space. We can add this but
2694 * currently do not support it */
2695 case SIOCSIFMEM:
2696 /* Set the per device memory buffer space.
2697 * Not applicable in our case */
2698 case SIOCSIFLINK:
2699 return -EINVAL;
2700
2701 /*
2702 * Unknown or private ioctl.
2703 */
2704 default:
2705 if (cmd == SIOCWANDEV ||
2706 (cmd >= SIOCDEVPRIVATE &&
2707 cmd <= SIOCDEVPRIVATE + 15)) {
2708 dev_load(ifr.ifr_name);
2709 rtnl_lock();
2710 ret = dev_ifsioc(&ifr, cmd);
2711 rtnl_unlock();
2712 if (!ret && copy_to_user(arg, &ifr,
2713 sizeof(struct ifreq)))
2714 ret = -EFAULT;
2715 return ret;
2716 }
Adrian Bunkd86b5e02006-01-21 00:46:55 +01002717#ifdef CONFIG_WIRELESS_EXT
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718 /* Take care of Wireless Extensions */
2719 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2720 /* If command is `set a parameter', or
2721 * `get the encoding parameters', check if
2722 * the user has the right to do it */
Jean Tourrilhesa4170162006-04-04 15:53:43 -07002723 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
2724 || cmd == SIOCGIWENCODEEXT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002725 if (!capable(CAP_NET_ADMIN))
2726 return -EPERM;
2727 }
2728 dev_load(ifr.ifr_name);
2729 rtnl_lock();
2730 /* Follow me in net/core/wireless.c */
2731 ret = wireless_process_ioctl(&ifr, cmd);
2732 rtnl_unlock();
2733 if (IW_IS_GET(cmd) &&
2734 copy_to_user(arg, &ifr,
2735 sizeof(struct ifreq)))
2736 ret = -EFAULT;
2737 return ret;
2738 }
Adrian Bunkd86b5e02006-01-21 00:46:55 +01002739#endif /* CONFIG_WIRELESS_EXT */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002740 return -EINVAL;
2741 }
2742}
2743
2744
2745/**
2746 * dev_new_index - allocate an ifindex
2747 *
2748 * Returns a suitable unique value for a new device interface
2749 * number. The caller must hold the rtnl semaphore or the
2750 * dev_base_lock to be sure it remains unique.
2751 */
2752static int dev_new_index(void)
2753{
2754 static int ifindex;
2755 for (;;) {
2756 if (++ifindex <= 0)
2757 ifindex = 1;
2758 if (!__dev_get_by_index(ifindex))
2759 return ifindex;
2760 }
2761}
2762
2763static int dev_boot_phase = 1;
2764
2765/* Delayed registration/unregisteration */
2766static DEFINE_SPINLOCK(net_todo_list_lock);
2767static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2768
2769static inline void net_set_todo(struct net_device *dev)
2770{
2771 spin_lock(&net_todo_list_lock);
2772 list_add_tail(&dev->todo_list, &net_todo_list);
2773 spin_unlock(&net_todo_list_lock);
2774}
2775
2776/**
2777 * register_netdevice - register a network device
2778 * @dev: device to register
2779 *
2780 * Take a completed network device structure and add it to the kernel
2781 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2782 * chain. 0 is returned on success. A negative errno code is returned
2783 * on a failure to set up the device, or if the name is a duplicate.
2784 *
2785 * Callers must hold the rtnl semaphore. You may want
2786 * register_netdev() instead of this.
2787 *
2788 * BUGS:
2789 * The locking appears insufficient to guarantee two parallel registers
2790 * will not get the same name.
2791 */
2792
2793int register_netdevice(struct net_device *dev)
2794{
2795 struct hlist_head *head;
2796 struct hlist_node *p;
2797 int ret;
2798
2799 BUG_ON(dev_boot_phase);
2800 ASSERT_RTNL();
2801
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07002802 might_sleep();
2803
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804 /* When net_device's are persistent, this will be fatal. */
2805 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2806
2807 spin_lock_init(&dev->queue_lock);
2808 spin_lock_init(&dev->xmit_lock);
2809 dev->xmit_lock_owner = -1;
2810#ifdef CONFIG_NET_CLS_ACT
2811 spin_lock_init(&dev->ingress_lock);
2812#endif
2813
2814 ret = alloc_divert_blk(dev);
2815 if (ret)
2816 goto out;
2817
2818 dev->iflink = -1;
2819
2820 /* Init, if this function is available */
2821 if (dev->init) {
2822 ret = dev->init(dev);
2823 if (ret) {
2824 if (ret > 0)
2825 ret = -EIO;
2826 goto out_err;
2827 }
2828 }
2829
2830 if (!dev_valid_name(dev->name)) {
2831 ret = -EINVAL;
2832 goto out_err;
2833 }
2834
2835 dev->ifindex = dev_new_index();
2836 if (dev->iflink == -1)
2837 dev->iflink = dev->ifindex;
2838
2839 /* Check for existence of name */
2840 head = dev_name_hash(dev->name);
2841 hlist_for_each(p, head) {
2842 struct net_device *d
2843 = hlist_entry(p, struct net_device, name_hlist);
2844 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2845 ret = -EEXIST;
2846 goto out_err;
2847 }
2848 }
2849
2850 /* Fix illegal SG+CSUM combinations. */
2851 if ((dev->features & NETIF_F_SG) &&
2852 !(dev->features & (NETIF_F_IP_CSUM |
2853 NETIF_F_NO_CSUM |
2854 NETIF_F_HW_CSUM))) {
2855 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2856 dev->name);
2857 dev->features &= ~NETIF_F_SG;
2858 }
2859
2860 /* TSO requires that SG is present as well. */
2861 if ((dev->features & NETIF_F_TSO) &&
2862 !(dev->features & NETIF_F_SG)) {
2863 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2864 dev->name);
2865 dev->features &= ~NETIF_F_TSO;
2866 }
Ananda Rajue89e9cf2005-10-18 15:46:41 -07002867 if (dev->features & NETIF_F_UFO) {
2868 if (!(dev->features & NETIF_F_HW_CSUM)) {
2869 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2870 "NETIF_F_HW_CSUM feature.\n",
2871 dev->name);
2872 dev->features &= ~NETIF_F_UFO;
2873 }
2874 if (!(dev->features & NETIF_F_SG)) {
2875 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2876 "NETIF_F_SG feature.\n",
2877 dev->name);
2878 dev->features &= ~NETIF_F_UFO;
2879 }
2880 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002881
2882 /*
2883 * nil rebuild_header routine,
2884 * that should be never called and used as just bug trap.
2885 */
2886
2887 if (!dev->rebuild_header)
2888 dev->rebuild_header = default_rebuild_header;
2889
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07002890 ret = netdev_register_sysfs(dev);
2891 if (ret)
2892 goto out_err;
2893 dev->reg_state = NETREG_REGISTERED;
2894
Linus Torvalds1da177e2005-04-16 15:20:36 -07002895 /*
2896 * Default initial state at registry is that the
2897 * device is present.
2898 */
2899
2900 set_bit(__LINK_STATE_PRESENT, &dev->state);
2901
2902 dev->next = NULL;
2903 dev_init_scheduler(dev);
2904 write_lock_bh(&dev_base_lock);
2905 *dev_tail = dev;
2906 dev_tail = &dev->next;
2907 hlist_add_head(&dev->name_hlist, head);
2908 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2909 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002910 write_unlock_bh(&dev_base_lock);
2911
2912 /* Notify protocols, that a new device appeared. */
Alan Sternf07d5b92006-05-09 15:23:03 -07002913 raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002914
Linus Torvalds1da177e2005-04-16 15:20:36 -07002915 ret = 0;
2916
2917out:
2918 return ret;
2919out_err:
2920 free_divert_blk(dev);
2921 goto out;
2922}
2923
2924/**
2925 * register_netdev - register a network device
2926 * @dev: device to register
2927 *
2928 * Take a completed network device structure and add it to the kernel
2929 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2930 * chain. 0 is returned on success. A negative errno code is returned
2931 * on a failure to set up the device, or if the name is a duplicate.
2932 *
2933 * This is a wrapper around register_netdev that takes the rtnl semaphore
2934 * and expands the device name if you passed a format string to
2935 * alloc_netdev.
2936 */
2937int register_netdev(struct net_device *dev)
2938{
2939 int err;
2940
2941 rtnl_lock();
2942
2943 /*
2944 * If the name is a format string the caller wants us to do a
2945 * name allocation.
2946 */
2947 if (strchr(dev->name, '%')) {
2948 err = dev_alloc_name(dev, dev->name);
2949 if (err < 0)
2950 goto out;
2951 }
2952
2953 /*
2954 * Back compatibility hook. Kill this one in 2.5
2955 */
2956 if (dev->name[0] == 0 || dev->name[0] == ' ') {
2957 err = dev_alloc_name(dev, "eth%d");
2958 if (err < 0)
2959 goto out;
2960 }
2961
2962 err = register_netdevice(dev);
2963out:
2964 rtnl_unlock();
2965 return err;
2966}
2967EXPORT_SYMBOL(register_netdev);
2968
2969/*
2970 * netdev_wait_allrefs - wait until all references are gone.
2971 *
2972 * This is called when unregistering network devices.
2973 *
2974 * Any protocol or device that holds a reference should register
2975 * for netdevice notification, and cleanup and put back the
2976 * reference if they receive an UNREGISTER event.
2977 * We can get stuck here if buggy protocols don't correctly
2978 * call dev_put.
2979 */
2980static void netdev_wait_allrefs(struct net_device *dev)
2981{
2982 unsigned long rebroadcast_time, warning_time;
2983
2984 rebroadcast_time = warning_time = jiffies;
2985 while (atomic_read(&dev->refcnt) != 0) {
2986 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08002987 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002988
2989 /* Rebroadcast unregister notification */
Alan Sternf07d5b92006-05-09 15:23:03 -07002990 raw_notifier_call_chain(&netdev_chain,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002991 NETDEV_UNREGISTER, dev);
2992
2993 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2994 &dev->state)) {
2995 /* We must not have linkwatch events
2996 * pending on unregister. If this
2997 * happens, we simply run the queue
2998 * unscheduled, resulting in a noop
2999 * for this device.
3000 */
3001 linkwatch_run_queue();
3002 }
3003
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003004 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003005
3006 rebroadcast_time = jiffies;
3007 }
3008
3009 msleep(250);
3010
3011 if (time_after(jiffies, warning_time + 10 * HZ)) {
3012 printk(KERN_EMERG "unregister_netdevice: "
3013 "waiting for %s to become free. Usage "
3014 "count = %d\n",
3015 dev->name, atomic_read(&dev->refcnt));
3016 warning_time = jiffies;
3017 }
3018 }
3019}
3020
3021/* The sequence is:
3022 *
3023 * rtnl_lock();
3024 * ...
3025 * register_netdevice(x1);
3026 * register_netdevice(x2);
3027 * ...
3028 * unregister_netdevice(y1);
3029 * unregister_netdevice(y2);
3030 * ...
3031 * rtnl_unlock();
3032 * free_netdev(y1);
3033 * free_netdev(y2);
3034 *
3035 * We are invoked by rtnl_unlock() after it drops the semaphore.
3036 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003037 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07003038 * without deadlocking with linkwatch via keventd.
3039 * 2) Since we run with the RTNL semaphore not held, we can sleep
3040 * safely in order to wait for the netdev refcnt to drop to zero.
3041 */
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003042static DEFINE_MUTEX(net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003043void netdev_run_todo(void)
3044{
3045 struct list_head list = LIST_HEAD_INIT(list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003046
3047 /* Need to guard against multiple cpu's getting out of order. */
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003048 mutex_lock(&net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003049
3050 /* Not safe to do outside the semaphore. We must not return
3051 * until all unregister events invoked by the local processor
3052 * have been completed (either by this todo run, or one on
3053 * another cpu).
3054 */
3055 if (list_empty(&net_todo_list))
3056 goto out;
3057
3058 /* Snapshot list, allow later requests */
3059 spin_lock(&net_todo_list_lock);
3060 list_splice_init(&net_todo_list, &list);
3061 spin_unlock(&net_todo_list_lock);
3062
3063 while (!list_empty(&list)) {
3064 struct net_device *dev
3065 = list_entry(list.next, struct net_device, todo_list);
3066 list_del(&dev->todo_list);
3067
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003068 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003069 printk(KERN_ERR "network todo '%s' but state %d\n",
3070 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003071 dump_stack();
3072 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003073 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003074
3075 netdev_unregister_sysfs(dev);
3076 dev->reg_state = NETREG_UNREGISTERED;
3077
3078 netdev_wait_allrefs(dev);
3079
3080 /* paranoia */
3081 BUG_ON(atomic_read(&dev->refcnt));
3082 BUG_TRAP(!dev->ip_ptr);
3083 BUG_TRAP(!dev->ip6_ptr);
3084 BUG_TRAP(!dev->dn_ptr);
3085
3086 /* It must be the very last action,
3087 * after this 'dev' may point to freed up memory.
3088 */
3089 if (dev->destructor)
3090 dev->destructor(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003091 }
3092
3093out:
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003094 mutex_unlock(&net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003095}
3096
3097/**
3098 * alloc_netdev - allocate network device
3099 * @sizeof_priv: size of private data to allocate space for
3100 * @name: device name format string
3101 * @setup: callback to initialize device
3102 *
3103 * Allocates a struct net_device with private data area for driver use
3104 * and performs basic initialization.
3105 */
3106struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3107 void (*setup)(struct net_device *))
3108{
3109 void *p;
3110 struct net_device *dev;
3111 int alloc_size;
3112
3113 /* ensure 32-byte alignment of both the device and private area */
3114 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3115 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3116
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07003117 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003118 if (!p) {
3119 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3120 return NULL;
3121 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003122
3123 dev = (struct net_device *)
3124 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3125 dev->padded = (char *)dev - (char *)p;
3126
3127 if (sizeof_priv)
3128 dev->priv = netdev_priv(dev);
3129
3130 setup(dev);
3131 strcpy(dev->name, name);
3132 return dev;
3133}
3134EXPORT_SYMBOL(alloc_netdev);
3135
3136/**
3137 * free_netdev - free network device
3138 * @dev: device
3139 *
3140 * This function does the last stage of destroying an allocated device
3141 * interface. The reference to the device object is released.
3142 * If this is the last reference then it will be freed.
3143 */
3144void free_netdev(struct net_device *dev)
3145{
3146#ifdef CONFIG_SYSFS
Stephen Hemminger3041a062006-05-26 13:25:24 -07003147 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003148 if (dev->reg_state == NETREG_UNINITIALIZED) {
3149 kfree((char *)dev - dev->padded);
3150 return;
3151 }
3152
3153 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3154 dev->reg_state = NETREG_RELEASED;
3155
3156 /* will free via class release */
3157 class_device_put(&dev->class_dev);
3158#else
3159 kfree((char *)dev - dev->padded);
3160#endif
3161}
3162
3163/* Synchronize with packet receive processing. */
3164void synchronize_net(void)
3165{
3166 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07003167 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003168}
3169
3170/**
3171 * unregister_netdevice - remove device from the kernel
3172 * @dev: device
3173 *
3174 * This function shuts down a device interface and removes it
3175 * from the kernel tables. On success 0 is returned, on a failure
3176 * a negative errno code is returned.
3177 *
3178 * Callers must hold the rtnl semaphore. You may want
3179 * unregister_netdev() instead of this.
3180 */
3181
3182int unregister_netdevice(struct net_device *dev)
3183{
3184 struct net_device *d, **dp;
3185
3186 BUG_ON(dev_boot_phase);
3187 ASSERT_RTNL();
3188
3189 /* Some devices call without registering for initialization unwind. */
3190 if (dev->reg_state == NETREG_UNINITIALIZED) {
3191 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3192 "was registered\n", dev->name, dev);
3193 return -ENODEV;
3194 }
3195
3196 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3197
3198 /* If device is running, close it first. */
3199 if (dev->flags & IFF_UP)
3200 dev_close(dev);
3201
3202 /* And unlink it from device chain. */
3203 for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3204 if (d == dev) {
3205 write_lock_bh(&dev_base_lock);
3206 hlist_del(&dev->name_hlist);
3207 hlist_del(&dev->index_hlist);
3208 if (dev_tail == &dev->next)
3209 dev_tail = dp;
3210 *dp = d->next;
3211 write_unlock_bh(&dev_base_lock);
3212 break;
3213 }
3214 }
3215 if (!d) {
3216 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3217 dev->name);
3218 return -ENODEV;
3219 }
3220
3221 dev->reg_state = NETREG_UNREGISTERING;
3222
3223 synchronize_net();
3224
3225 /* Shutdown queueing discipline. */
3226 dev_shutdown(dev);
3227
3228
3229 /* Notify protocols, that we are about to destroy
3230 this device. They should clean all the things.
3231 */
Alan Sternf07d5b92006-05-09 15:23:03 -07003232 raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003233
3234 /*
3235 * Flush the multicast chain
3236 */
3237 dev_mc_discard(dev);
3238
3239 if (dev->uninit)
3240 dev->uninit(dev);
3241
3242 /* Notifier chain MUST detach us from master device. */
3243 BUG_TRAP(!dev->master);
3244
3245 free_divert_blk(dev);
3246
3247 /* Finish processing unregister after unlock */
3248 net_set_todo(dev);
3249
3250 synchronize_net();
3251
3252 dev_put(dev);
3253 return 0;
3254}
3255
3256/**
3257 * unregister_netdev - remove device from the kernel
3258 * @dev: device
3259 *
3260 * This function shuts down a device interface and removes it
3261 * from the kernel tables. On success 0 is returned, on a failure
3262 * a negative errno code is returned.
3263 *
3264 * This is just a wrapper for unregister_netdevice that takes
3265 * the rtnl semaphore. In general you want to use this and not
3266 * unregister_netdevice.
3267 */
3268void unregister_netdev(struct net_device *dev)
3269{
3270 rtnl_lock();
3271 unregister_netdevice(dev);
3272 rtnl_unlock();
3273}
3274
3275EXPORT_SYMBOL(unregister_netdev);
3276
3277#ifdef CONFIG_HOTPLUG_CPU
3278static int dev_cpu_callback(struct notifier_block *nfb,
3279 unsigned long action,
3280 void *ocpu)
3281{
3282 struct sk_buff **list_skb;
3283 struct net_device **list_net;
3284 struct sk_buff *skb;
3285 unsigned int cpu, oldcpu = (unsigned long)ocpu;
3286 struct softnet_data *sd, *oldsd;
3287
3288 if (action != CPU_DEAD)
3289 return NOTIFY_OK;
3290
3291 local_irq_disable();
3292 cpu = smp_processor_id();
3293 sd = &per_cpu(softnet_data, cpu);
3294 oldsd = &per_cpu(softnet_data, oldcpu);
3295
3296 /* Find end of our completion_queue. */
3297 list_skb = &sd->completion_queue;
3298 while (*list_skb)
3299 list_skb = &(*list_skb)->next;
3300 /* Append completion queue from offline CPU. */
3301 *list_skb = oldsd->completion_queue;
3302 oldsd->completion_queue = NULL;
3303
3304 /* Find end of our output_queue. */
3305 list_net = &sd->output_queue;
3306 while (*list_net)
3307 list_net = &(*list_net)->next_sched;
3308 /* Append output queue from offline CPU. */
3309 *list_net = oldsd->output_queue;
3310 oldsd->output_queue = NULL;
3311
3312 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3313 local_irq_enable();
3314
3315 /* Process offline CPU's input_pkt_queue */
3316 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3317 netif_rx(skb);
3318
3319 return NOTIFY_OK;
3320}
3321#endif /* CONFIG_HOTPLUG_CPU */
3322
Chris Leechdb217332006-06-17 21:24:58 -07003323#ifdef CONFIG_NET_DMA
3324/**
3325 * net_dma_rebalance -
3326 * This is called when the number of channels allocated to the net_dma_client
3327 * changes. The net_dma_client tries to have one DMA channel per CPU.
3328 */
3329static void net_dma_rebalance(void)
3330{
3331 unsigned int cpu, i, n;
3332 struct dma_chan *chan;
3333
3334 lock_cpu_hotplug();
3335
3336 if (net_dma_count == 0) {
3337 for_each_online_cpu(cpu)
3338 rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL);
3339 unlock_cpu_hotplug();
3340 return;
3341 }
3342
3343 i = 0;
3344 cpu = first_cpu(cpu_online_map);
3345
3346 rcu_read_lock();
3347 list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3348 n = ((num_online_cpus() / net_dma_count)
3349 + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3350
3351 while(n) {
3352 per_cpu(softnet_data.net_dma, cpu) = chan;
3353 cpu = next_cpu(cpu, cpu_online_map);
3354 n--;
3355 }
3356 i++;
3357 }
3358 rcu_read_unlock();
3359
3360 unlock_cpu_hotplug();
3361}
3362
3363/**
3364 * netdev_dma_event - event callback for the net_dma_client
3365 * @client: should always be net_dma_client
3366 * @chan:
3367 * @event:
3368 */
3369static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3370 enum dma_event event)
3371{
3372 spin_lock(&net_dma_event_lock);
3373 switch (event) {
3374 case DMA_RESOURCE_ADDED:
3375 net_dma_count++;
3376 net_dma_rebalance();
3377 break;
3378 case DMA_RESOURCE_REMOVED:
3379 net_dma_count--;
3380 net_dma_rebalance();
3381 break;
3382 default:
3383 break;
3384 }
3385 spin_unlock(&net_dma_event_lock);
3386}
3387
3388/**
3389 * netdev_dma_regiser - register the networking subsystem as a DMA client
3390 */
3391static int __init netdev_dma_register(void)
3392{
3393 spin_lock_init(&net_dma_event_lock);
3394 net_dma_client = dma_async_client_register(netdev_dma_event);
3395 if (net_dma_client == NULL)
3396 return -ENOMEM;
3397
3398 dma_async_client_chan_request(net_dma_client, num_online_cpus());
3399 return 0;
3400}
3401
3402#else
3403static int __init netdev_dma_register(void) { return -ENODEV; }
3404#endif /* CONFIG_NET_DMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003405
3406/*
3407 * Initialize the DEV module. At boot time this walks the device list and
3408 * unhooks any devices that fail to initialise (normally hardware not
3409 * present) and leaves us with a valid list of present and active devices.
3410 *
3411 */
3412
3413/*
3414 * This is called single threaded during boot, so no need
3415 * to take the rtnl semaphore.
3416 */
3417static int __init net_dev_init(void)
3418{
3419 int i, rc = -ENOMEM;
3420
3421 BUG_ON(!dev_boot_phase);
3422
3423 net_random_init();
3424
3425 if (dev_proc_init())
3426 goto out;
3427
3428 if (netdev_sysfs_init())
3429 goto out;
3430
3431 INIT_LIST_HEAD(&ptype_all);
3432 for (i = 0; i < 16; i++)
3433 INIT_LIST_HEAD(&ptype_base[i]);
3434
3435 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3436 INIT_HLIST_HEAD(&dev_name_head[i]);
3437
3438 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3439 INIT_HLIST_HEAD(&dev_index_head[i]);
3440
3441 /*
3442 * Initialise the packet receive queues.
3443 */
3444
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07003445 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003446 struct softnet_data *queue;
3447
3448 queue = &per_cpu(softnet_data, i);
3449 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003450 queue->completion_queue = NULL;
3451 INIT_LIST_HEAD(&queue->poll_list);
3452 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3453 queue->backlog_dev.weight = weight_p;
3454 queue->backlog_dev.poll = process_backlog;
3455 atomic_set(&queue->backlog_dev.refcnt, 1);
3456 }
3457
Chris Leechdb217332006-06-17 21:24:58 -07003458 netdev_dma_register();
3459
Linus Torvalds1da177e2005-04-16 15:20:36 -07003460 dev_boot_phase = 0;
3461
3462 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3463 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3464
3465 hotcpu_notifier(dev_cpu_callback, 0);
3466 dst_init();
3467 dev_mcast_init();
3468 rc = 0;
3469out:
3470 return rc;
3471}
3472
3473subsys_initcall(net_dev_init);
3474
3475EXPORT_SYMBOL(__dev_get_by_index);
3476EXPORT_SYMBOL(__dev_get_by_name);
3477EXPORT_SYMBOL(__dev_remove_pack);
3478EXPORT_SYMBOL(__skb_linearize);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08003479EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003480EXPORT_SYMBOL(dev_add_pack);
3481EXPORT_SYMBOL(dev_alloc_name);
3482EXPORT_SYMBOL(dev_close);
3483EXPORT_SYMBOL(dev_get_by_flags);
3484EXPORT_SYMBOL(dev_get_by_index);
3485EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003486EXPORT_SYMBOL(dev_open);
3487EXPORT_SYMBOL(dev_queue_xmit);
3488EXPORT_SYMBOL(dev_remove_pack);
3489EXPORT_SYMBOL(dev_set_allmulti);
3490EXPORT_SYMBOL(dev_set_promiscuity);
3491EXPORT_SYMBOL(dev_change_flags);
3492EXPORT_SYMBOL(dev_set_mtu);
3493EXPORT_SYMBOL(dev_set_mac_address);
3494EXPORT_SYMBOL(free_netdev);
3495EXPORT_SYMBOL(netdev_boot_setup_check);
3496EXPORT_SYMBOL(netdev_set_master);
3497EXPORT_SYMBOL(netdev_state_change);
3498EXPORT_SYMBOL(netif_receive_skb);
3499EXPORT_SYMBOL(netif_rx);
3500EXPORT_SYMBOL(register_gifconf);
3501EXPORT_SYMBOL(register_netdevice);
3502EXPORT_SYMBOL(register_netdevice_notifier);
3503EXPORT_SYMBOL(skb_checksum_help);
3504EXPORT_SYMBOL(synchronize_net);
3505EXPORT_SYMBOL(unregister_netdevice);
3506EXPORT_SYMBOL(unregister_netdevice_notifier);
3507EXPORT_SYMBOL(net_enable_timestamp);
3508EXPORT_SYMBOL(net_disable_timestamp);
3509EXPORT_SYMBOL(dev_get_flags);
3510
3511#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3512EXPORT_SYMBOL(br_handle_frame_hook);
3513EXPORT_SYMBOL(br_fdb_get_hook);
3514EXPORT_SYMBOL(br_fdb_put_hook);
3515#endif
3516
3517#ifdef CONFIG_KMOD
3518EXPORT_SYMBOL(dev_load);
3519#endif
3520
3521EXPORT_PER_CPU_SYMBOL(softnet_data);