blob: c3e0578d29d1238030779a7c0ca0bc22ecebd17a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
stephen hemminger08e98972009-11-10 07:20:34 +000082#include <linux/hash.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080084#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070085#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070094#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070095#include <linux/notifier.h>
96#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020097#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070098#include <net/sock.h>
99#include <linux/rtnetlink.h>
100#include <linux/proc_fs.h>
101#include <linux/seq_file.h>
102#include <linux/stat.h>
103#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700104#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105#include <net/dst.h>
106#include <net/pkt_sched.h>
107#include <net/checksum.h>
108#include <linux/highmem.h>
109#include <linux/init.h>
110#include <linux/kmod.h>
111#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112#include <linux/netpoll.h>
113#include <linux/rcupdate.h>
114#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700115#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500118#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700119#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700120#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700121#include <linux/ctype.h>
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700122#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700123#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700124#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700125#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700126#include <linux/ipv6.h>
127#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700128#include <linux/jhash.h>
129#include <linux/random.h>
David S. Miller9cbc1cb2009-06-15 03:02:23 -0700130#include <trace/events/napi.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700132#include "net-sysfs.h"
133
Herbert Xud565b0a2008-12-15 23:38:52 -0800134/* Instead of increasing this, you should create a hash table. */
135#define MAX_GRO_SKBS 8
136
Herbert Xu5d38a072009-01-04 16:13:40 -0800137/* This should be increased if a protocol with a bigger head is added. */
138#define GRO_MAX_HEAD (MAX_HEADER + 128)
139
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140/*
141 * The list of packet types we will receive (as opposed to discard)
142 * and the routines to invoke.
143 *
144 * Why 16. Because with 16 the only overlap we get on a hash of the
145 * low nibble of the protocol value is RARP/SNAP/X.25.
146 *
147 * NOTE: That is no longer true with the addition of VLAN tags. Not
148 * sure which should go first, but I bet it won't make much
149 * difference if we are running VLANs. The good news is that
150 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700151 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 * --BLG
153 *
154 * 0800 IP
155 * 8100 802.1Q VLAN
156 * 0001 802.3
157 * 0002 AX.25
158 * 0004 802.2
159 * 8035 RARP
160 * 0005 SNAP
161 * 0805 X.25
162 * 0806 ARP
163 * 8137 IPX
164 * 0009 Localtalk
165 * 86DD IPv6
166 */
167
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800168#define PTYPE_HASH_SIZE (16)
169#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
170
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800172static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700173static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700176 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 * semaphore.
178 *
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800179 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 *
181 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700182 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 * actual updates. This allows pure readers to access the list even
184 * while a writer is preparing to update it.
185 *
186 * To put it another way, dev_base_lock is held for writing only to
187 * protect against pure readers; the rtnl semaphore provides the
188 * protection against other writers.
189 *
190 * See, for example usages, register_netdevice() and
191 * unregister_netdevice(), which must be called with the rtnl
192 * semaphore held.
193 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194DEFINE_RWLOCK(dev_base_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195EXPORT_SYMBOL(dev_base_lock);
196
Eric W. Biederman881d9662007-09-17 11:56:21 -0700197static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198{
199 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
stephen hemminger08e98972009-11-10 07:20:34 +0000200 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201}
202
Eric W. Biederman881d9662007-09-17 11:56:21 -0700203static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204{
Eric Dumazet7c28bd02009-10-24 06:13:17 -0700205 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206}
207
Eric W. Biedermance286d32007-09-12 13:53:49 +0200208/* Device list insertion */
209static int list_netdevice(struct net_device *dev)
210{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900211 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200212
213 ASSERT_RTNL();
214
215 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800216 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
Eric Dumazet72c95282009-10-30 07:11:27 +0000217 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000218 hlist_add_head_rcu(&dev->index_hlist,
219 dev_index_hash(net, dev->ifindex));
Eric W. Biedermance286d32007-09-12 13:53:49 +0200220 write_unlock_bh(&dev_base_lock);
221 return 0;
222}
223
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000224/* Device list removal
225 * caller must respect a RCU grace period before freeing/reusing dev
226 */
Eric W. Biedermance286d32007-09-12 13:53:49 +0200227static void unlist_netdevice(struct net_device *dev)
228{
229 ASSERT_RTNL();
230
231 /* Unlink dev from the device chain */
232 write_lock_bh(&dev_base_lock);
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800233 list_del_rcu(&dev->dev_list);
Eric Dumazet72c95282009-10-30 07:11:27 +0000234 hlist_del_rcu(&dev->name_hlist);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000235 hlist_del_rcu(&dev->index_hlist);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200236 write_unlock_bh(&dev_base_lock);
237}
238
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239/*
240 * Our notifier list
241 */
242
Alan Sternf07d5b92006-05-09 15:23:03 -0700243static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244
245/*
246 * Device drivers call our routines to queue packets here. We empty the
247 * queue in the local softnet handler.
248 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700249
250DEFINE_PER_CPU(struct softnet_data, softnet_data);
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700251EXPORT_PER_CPU_SYMBOL(softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252
David S. Millercf508b12008-07-22 14:16:42 -0700253#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700254/*
David S. Millerc773e842008-07-08 23:13:53 -0700255 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700256 * according to dev->type
257 */
258static const unsigned short netdev_lock_type[] =
259 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
260 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
261 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
262 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
263 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
264 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
265 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
266 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
267 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
268 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
269 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
270 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
271 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800272 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
Dmitry Eremin-Solenikov929122c2009-08-14 20:00:20 +0400273 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
Sergey Lapinfcb94e42009-06-08 12:18:47 +0000274 ARPHRD_VOID, ARPHRD_NONE};
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700275
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -0700276static const char *const netdev_lock_name[] =
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700277 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
278 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
279 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
280 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
281 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
282 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
283 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
284 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
285 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
286 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
287 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
288 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
289 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
Rémi Denis-Courmont2d91d782008-12-17 15:47:29 -0800290 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
Dmitry Eremin-Solenikov929122c2009-08-14 20:00:20 +0400291 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
Sergey Lapinfcb94e42009-06-08 12:18:47 +0000292 "_xmit_VOID", "_xmit_NONE"};
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700293
294static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700295static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700296
297static inline unsigned short netdev_lock_pos(unsigned short dev_type)
298{
299 int i;
300
301 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
302 if (netdev_lock_type[i] == dev_type)
303 return i;
304 /* the last key is used by default */
305 return ARRAY_SIZE(netdev_lock_type) - 1;
306}
307
David S. Millercf508b12008-07-22 14:16:42 -0700308static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
309 unsigned short dev_type)
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700310{
311 int i;
312
313 i = netdev_lock_pos(dev_type);
314 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
315 netdev_lock_name[i]);
316}
David S. Millercf508b12008-07-22 14:16:42 -0700317
318static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
319{
320 int i;
321
322 i = netdev_lock_pos(dev->type);
323 lockdep_set_class_and_name(&dev->addr_list_lock,
324 &netdev_addr_lock_key[i],
325 netdev_lock_name[i]);
326}
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700327#else
David S. Millercf508b12008-07-22 14:16:42 -0700328static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
329 unsigned short dev_type)
330{
331}
332static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b72007-05-15 22:46:18 -0700333{
334}
335#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
337/*******************************************************************************
338
339 Protocol management and registration routines
340
341*******************************************************************************/
342
343/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 * Add a protocol ID to the list. Now that the input handler is
345 * smarter we can dispense with all the messy stuff that used to be
346 * here.
347 *
348 * BEWARE!!! Protocol handlers, mangling input packets,
349 * MUST BE last in hash buckets and checking protocol handlers
350 * MUST start from promiscuous ptype_all chain in net_bh.
351 * It is true now, do not change it.
352 * Explanation follows: if protocol handler, mangling packet, will
353 * be the first on list, it is not able to sense, that packet
354 * is cloned and should be copied-on-write, so that it will
355 * change it and subsequent readers will get broken packet.
356 * --ANK (980803)
357 */
358
359/**
360 * dev_add_pack - add packet handler
361 * @pt: packet type declaration
362 *
363 * Add a protocol handler to the networking stack. The passed &packet_type
364 * is linked into kernel lists and may not be freed until it has been
365 * removed from the kernel lists.
366 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900367 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 * guarantee all CPU's that are in middle of receiving packets
369 * will see the new packet type (until the next received packet).
370 */
371
372void dev_add_pack(struct packet_type *pt)
373{
374 int hash;
375
376 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700377 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700379 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800380 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 list_add_rcu(&pt->list, &ptype_base[hash]);
382 }
383 spin_unlock_bh(&ptype_lock);
384}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700385EXPORT_SYMBOL(dev_add_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387/**
388 * __dev_remove_pack - remove packet handler
389 * @pt: packet type declaration
390 *
391 * Remove a protocol handler that was previously added to the kernel
392 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
393 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900394 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 *
396 * The packet type might still be in use by receivers
397 * and must not be freed until after all the CPU's have gone
398 * through a quiescent state.
399 */
400void __dev_remove_pack(struct packet_type *pt)
401{
402 struct list_head *head;
403 struct packet_type *pt1;
404
405 spin_lock_bh(&ptype_lock);
406
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700407 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700409 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800410 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411
412 list_for_each_entry(pt1, head, list) {
413 if (pt == pt1) {
414 list_del_rcu(&pt->list);
415 goto out;
416 }
417 }
418
419 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
420out:
421 spin_unlock_bh(&ptype_lock);
422}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700423EXPORT_SYMBOL(__dev_remove_pack);
424
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425/**
426 * dev_remove_pack - remove packet handler
427 * @pt: packet type declaration
428 *
429 * Remove a protocol handler that was previously added to the kernel
430 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
431 * from the kernel lists and can be freed or reused once this function
432 * returns.
433 *
434 * This call sleeps to guarantee that no CPU is looking at the packet
435 * type after return.
436 */
437void dev_remove_pack(struct packet_type *pt)
438{
439 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900440
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 synchronize_net();
442}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700443EXPORT_SYMBOL(dev_remove_pack);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
445/******************************************************************************
446
447 Device Boot-time Settings Routines
448
449*******************************************************************************/
450
451/* Boot time configuration table */
452static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
453
454/**
455 * netdev_boot_setup_add - add new setup entry
456 * @name: name of the device
457 * @map: configured settings for the device
458 *
459 * Adds new setup entry to the dev_boot_setup list. The function
460 * returns 0 on error and 1 on success. This is a generic routine to
461 * all netdevices.
462 */
463static int netdev_boot_setup_add(char *name, struct ifmap *map)
464{
465 struct netdev_boot_setup *s;
466 int i;
467
468 s = dev_boot_setup;
469 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
470 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
471 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff92008-07-01 19:57:19 -0700472 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 memcpy(&s[i].map, map, sizeof(s[i].map));
474 break;
475 }
476 }
477
478 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
479}
480
481/**
482 * netdev_boot_setup_check - check boot time settings
483 * @dev: the netdevice
484 *
485 * Check boot time settings for the device.
486 * The found settings are set for the device to be used
487 * later in the device probing.
488 * Returns 0 if no settings found, 1 if they are.
489 */
490int netdev_boot_setup_check(struct net_device *dev)
491{
492 struct netdev_boot_setup *s = dev_boot_setup;
493 int i;
494
495 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
496 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff92008-07-01 19:57:19 -0700497 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498 dev->irq = s[i].map.irq;
499 dev->base_addr = s[i].map.base_addr;
500 dev->mem_start = s[i].map.mem_start;
501 dev->mem_end = s[i].map.mem_end;
502 return 1;
503 }
504 }
505 return 0;
506}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700507EXPORT_SYMBOL(netdev_boot_setup_check);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508
509
510/**
511 * netdev_boot_base - get address from boot time settings
512 * @prefix: prefix for network device
513 * @unit: id for network device
514 *
515 * Check boot time settings for the base address of device.
516 * The found settings are set for the device to be used
517 * later in the device probing.
518 * Returns 0 if no settings found.
519 */
520unsigned long netdev_boot_base(const char *prefix, int unit)
521{
522 const struct netdev_boot_setup *s = dev_boot_setup;
523 char name[IFNAMSIZ];
524 int i;
525
526 sprintf(name, "%s%d", prefix, unit);
527
528 /*
529 * If device already registered then return base of 1
530 * to indicate not to probe for this interface
531 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700532 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return 1;
534
535 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
536 if (!strcmp(name, s[i].name))
537 return s[i].map.base_addr;
538 return 0;
539}
540
541/*
542 * Saves at boot time configured settings for any netdevice.
543 */
544int __init netdev_boot_setup(char *str)
545{
546 int ints[5];
547 struct ifmap map;
548
549 str = get_options(str, ARRAY_SIZE(ints), ints);
550 if (!str || !*str)
551 return 0;
552
553 /* Save settings */
554 memset(&map, 0, sizeof(map));
555 if (ints[0] > 0)
556 map.irq = ints[1];
557 if (ints[0] > 1)
558 map.base_addr = ints[2];
559 if (ints[0] > 2)
560 map.mem_start = ints[3];
561 if (ints[0] > 3)
562 map.mem_end = ints[4];
563
564 /* Add new entry to the list */
565 return netdev_boot_setup_add(str, &map);
566}
567
568__setup("netdev=", netdev_boot_setup);
569
570/*******************************************************************************
571
572 Device Interface Subroutines
573
574*******************************************************************************/
575
576/**
577 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700578 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 * @name: name to find
580 *
581 * Find an interface by name. Must be called under RTNL semaphore
582 * or @dev_base_lock. If the name is found a pointer to the device
583 * is returned. If the name is not found then %NULL is returned. The
584 * reference counters are not incremented so the caller must be
585 * careful with locks.
586 */
587
Eric W. Biederman881d9662007-09-17 11:56:21 -0700588struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589{
590 struct hlist_node *p;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700591 struct net_device *dev;
592 struct hlist_head *head = dev_name_hash(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700594 hlist_for_each_entry(dev, p, head, name_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 if (!strncmp(dev->name, name, IFNAMSIZ))
596 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700597
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 return NULL;
599}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700600EXPORT_SYMBOL(__dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
602/**
Eric Dumazet72c95282009-10-30 07:11:27 +0000603 * dev_get_by_name_rcu - find a device by its name
604 * @net: the applicable net namespace
605 * @name: name to find
606 *
607 * Find an interface by name.
608 * If the name is found a pointer to the device is returned.
609 * If the name is not found then %NULL is returned.
610 * The reference counters are not incremented so the caller must be
611 * careful with locks. The caller must hold RCU lock.
612 */
613
614struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
615{
616 struct hlist_node *p;
617 struct net_device *dev;
618 struct hlist_head *head = dev_name_hash(net, name);
619
620 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
621 if (!strncmp(dev->name, name, IFNAMSIZ))
622 return dev;
623
624 return NULL;
625}
626EXPORT_SYMBOL(dev_get_by_name_rcu);
627
628/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700630 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 * @name: name to find
632 *
633 * Find an interface by name. This can be called from any
634 * context and does its own locking. The returned handle has
635 * the usage count incremented and the caller must use dev_put() to
636 * release it when it is no longer needed. %NULL is returned if no
637 * matching device is found.
638 */
639
Eric W. Biederman881d9662007-09-17 11:56:21 -0700640struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641{
642 struct net_device *dev;
643
Eric Dumazet72c95282009-10-30 07:11:27 +0000644 rcu_read_lock();
645 dev = dev_get_by_name_rcu(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 if (dev)
647 dev_hold(dev);
Eric Dumazet72c95282009-10-30 07:11:27 +0000648 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 return dev;
650}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700651EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
653/**
654 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700655 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 * @ifindex: index of device
657 *
658 * Search for an interface by index. Returns %NULL if the device
659 * is not found or a pointer to the device. The device has not
660 * had its reference counter increased so the caller must be careful
661 * about locking. The caller must hold either the RTNL semaphore
662 * or @dev_base_lock.
663 */
664
Eric W. Biederman881d9662007-09-17 11:56:21 -0700665struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666{
667 struct hlist_node *p;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700668 struct net_device *dev;
669 struct hlist_head *head = dev_index_hash(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700671 hlist_for_each_entry(dev, p, head, index_hlist)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 if (dev->ifindex == ifindex)
673 return dev;
Eric Dumazet0bd8d532009-10-30 01:40:11 -0700674
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 return NULL;
676}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700677EXPORT_SYMBOL(__dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000679/**
680 * dev_get_by_index_rcu - find a device by its ifindex
681 * @net: the applicable net namespace
682 * @ifindex: index of device
683 *
684 * Search for an interface by index. Returns %NULL if the device
685 * is not found or a pointer to the device. The device has not
686 * had its reference counter increased so the caller must be careful
687 * about locking. The caller must hold RCU lock.
688 */
689
690struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
691{
692 struct hlist_node *p;
693 struct net_device *dev;
694 struct hlist_head *head = dev_index_hash(net, ifindex);
695
696 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
697 if (dev->ifindex == ifindex)
698 return dev;
699
700 return NULL;
701}
702EXPORT_SYMBOL(dev_get_by_index_rcu);
703
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704
705/**
706 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700707 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 * @ifindex: index of device
709 *
710 * Search for an interface by index. Returns NULL if the device
711 * is not found or a pointer to the device. The device returned has
712 * had a reference added and the pointer is safe until the user calls
713 * dev_put to indicate they have finished with it.
714 */
715
Eric W. Biederman881d9662007-09-17 11:56:21 -0700716struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717{
718 struct net_device *dev;
719
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000720 rcu_read_lock();
721 dev = dev_get_by_index_rcu(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 if (dev)
723 dev_hold(dev);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +0000724 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 return dev;
726}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700727EXPORT_SYMBOL(dev_get_by_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
729/**
730 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700731 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 * @type: media type of device
733 * @ha: hardware address
734 *
735 * Search for an interface by MAC address. Returns NULL if the device
736 * is not found or a pointer to the device. The caller must hold the
737 * rtnl semaphore. The returned device has not had its ref count increased
738 * and the caller must therefore be careful about locking
739 *
740 * BUGS:
741 * If the API was consistent this would be __dev_get_by_hwaddr
742 */
743
Eric W. Biederman881d9662007-09-17 11:56:21 -0700744struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745{
746 struct net_device *dev;
747
748 ASSERT_RTNL();
749
Denis V. Lunev81103a52007-12-12 10:47:38 -0800750 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 if (dev->type == type &&
752 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700753 return dev;
754
755 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756}
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300757EXPORT_SYMBOL(dev_getbyhwaddr);
758
Eric W. Biederman881d9662007-09-17 11:56:21 -0700759struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700760{
761 struct net_device *dev;
762
763 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700764 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700765 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700766 return dev;
767
768 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700769}
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700770EXPORT_SYMBOL(__dev_getfirstbyhwtype);
771
Eric W. Biederman881d9662007-09-17 11:56:21 -0700772struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773{
774 struct net_device *dev;
775
776 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700777 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700778 if (dev)
779 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 rtnl_unlock();
781 return dev;
782}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783EXPORT_SYMBOL(dev_getfirstbyhwtype);
784
785/**
786 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700787 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 * @if_flags: IFF_* values
789 * @mask: bitmask of bits in if_flags to check
790 *
791 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900792 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 * had a reference added and the pointer is safe until the user calls
794 * dev_put to indicate they have finished with it.
795 */
796
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700797struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
798 unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700800 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
Pavel Emelianov7562f872007-05-03 15:13:45 -0700802 ret = NULL;
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800803 rcu_read_lock();
804 for_each_netdev_rcu(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 if (((dev->flags ^ if_flags) & mask) == 0) {
806 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700807 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 break;
809 }
810 }
Eric Dumazetc6d14c82009-11-04 05:43:23 -0800811 rcu_read_unlock();
Pavel Emelianov7562f872007-05-03 15:13:45 -0700812 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700814EXPORT_SYMBOL(dev_get_by_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815
816/**
817 * dev_valid_name - check if name is okay for network device
818 * @name: name string
819 *
820 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700821 * to allow sysfs to work. We also disallow any kind of
822 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800824int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700826 if (*name == '\0')
827 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700828 if (strlen(name) >= IFNAMSIZ)
829 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700830 if (!strcmp(name, ".") || !strcmp(name, ".."))
831 return 0;
832
833 while (*name) {
834 if (*name == '/' || isspace(*name))
835 return 0;
836 name++;
837 }
838 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700840EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
842/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200843 * __dev_alloc_name - allocate a name for a device
844 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200846 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 *
848 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700849 * id. It scans list of devices to build up a free map, then chooses
850 * the first empty slot. The caller must hold the dev_base or rtnl lock
851 * while allocating the name and adding the device in order to avoid
852 * duplicates.
853 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
854 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 */
856
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200857static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858{
859 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 const char *p;
861 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700862 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 struct net_device *d;
864
865 p = strnchr(name, IFNAMSIZ-1, '%');
866 if (p) {
867 /*
868 * Verify the string as this thing may have come from
869 * the user. There must be either one "%d" and no other "%"
870 * characters.
871 */
872 if (p[1] != 'd' || strchr(p + 2, '%'))
873 return -EINVAL;
874
875 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700876 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 if (!inuse)
878 return -ENOMEM;
879
Eric W. Biederman881d9662007-09-17 11:56:21 -0700880 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 if (!sscanf(d->name, name, &i))
882 continue;
883 if (i < 0 || i >= max_netdevices)
884 continue;
885
886 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200887 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 if (!strncmp(buf, d->name, IFNAMSIZ))
889 set_bit(i, inuse);
890 }
891
892 i = find_first_zero_bit(inuse, max_netdevices);
893 free_page((unsigned long) inuse);
894 }
895
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200896 snprintf(buf, IFNAMSIZ, name, i);
897 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899
900 /* It is possible to run out of possible slots
901 * when the name is long and there isn't enough space left
902 * for the digits, or if all bits are used.
903 */
904 return -ENFILE;
905}
906
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200907/**
908 * dev_alloc_name - allocate a name for a device
909 * @dev: device
910 * @name: name format string
911 *
912 * Passed a format string - eg "lt%d" it will try and find a suitable
913 * id. It scans list of devices to build up a free map, then chooses
914 * the first empty slot. The caller must hold the dev_base or rtnl lock
915 * while allocating the name and adding the device in order to avoid
916 * duplicates.
917 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
918 * Returns the number of the unit assigned or a negative errno code.
919 */
920
921int dev_alloc_name(struct net_device *dev, const char *name)
922{
923 char buf[IFNAMSIZ];
924 struct net *net;
925 int ret;
926
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900927 BUG_ON(!dev_net(dev));
928 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200929 ret = __dev_alloc_name(net, name, buf);
930 if (ret >= 0)
931 strlcpy(dev->name, buf, IFNAMSIZ);
932 return ret;
933}
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700934EXPORT_SYMBOL(dev_alloc_name);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200935
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936
937/**
938 * dev_change_name - change name of a device
939 * @dev: device
940 * @newname: name (or format string) must be at least IFNAMSIZ
941 *
942 * Change name of a device, can pass format strings "eth%d".
943 * for wildcarding.
944 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700945int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946{
Herbert Xufcc5a032007-07-30 17:03:38 -0700947 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700949 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700950 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
952 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900953 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900955 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 if (dev->flags & IFF_UP)
957 return -EBUSY;
958
959 if (!dev_valid_name(newname))
960 return -EINVAL;
961
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700962 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
963 return 0;
964
Herbert Xufcc5a032007-07-30 17:03:38 -0700965 memcpy(oldname, dev->name, IFNAMSIZ);
966
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 if (strchr(newname, '%')) {
968 err = dev_alloc_name(dev, newname);
969 if (err < 0)
970 return err;
Eric Dumazetd1b19df2009-09-03 01:29:39 -0700971 } else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 return -EEXIST;
973 else
974 strlcpy(dev->name, newname, IFNAMSIZ);
975
Herbert Xufcc5a032007-07-30 17:03:38 -0700976rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700977 /* For now only devices in the initial network namespace
978 * are in sysfs.
979 */
980 if (net == &init_net) {
981 ret = device_rename(&dev->dev, dev->name);
982 if (ret) {
983 memcpy(dev->name, oldname, IFNAMSIZ);
984 return ret;
985 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700986 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700987
988 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600989 hlist_del(&dev->name_hlist);
Eric Dumazet72c95282009-10-30 07:11:27 +0000990 write_unlock_bh(&dev_base_lock);
991
992 synchronize_rcu();
993
994 write_lock_bh(&dev_base_lock);
995 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700996 write_unlock_bh(&dev_base_lock);
997
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700998 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700999 ret = notifier_to_errno(ret);
1000
1001 if (ret) {
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001002 /* err >= 0 after dev_alloc_name() or stores the first errno */
1003 if (err >= 0) {
Herbert Xufcc5a032007-07-30 17:03:38 -07001004 err = ret;
1005 memcpy(dev->name, oldname, IFNAMSIZ);
1006 goto rollback;
Eric Dumazet91e9c07b2009-11-15 23:30:24 +00001007 } else {
1008 printk(KERN_ERR
1009 "%s: name change rollback failed: %d.\n",
1010 dev->name, ret);
Herbert Xufcc5a032007-07-30 17:03:38 -07001011 }
1012 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013
1014 return err;
1015}
1016
1017/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001018 * dev_set_alias - change ifalias of a device
1019 * @dev: device
1020 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07001021 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001022 *
1023 * Set ifalias for a device,
1024 */
1025int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1026{
1027 ASSERT_RTNL();
1028
1029 if (len >= IFALIASZ)
1030 return -EINVAL;
1031
Oliver Hartkopp96ca4a2c2008-09-23 21:23:19 -07001032 if (!len) {
1033 if (dev->ifalias) {
1034 kfree(dev->ifalias);
1035 dev->ifalias = NULL;
1036 }
1037 return 0;
1038 }
1039
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001040 dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
Stephen Hemminger0b815a12008-09-22 21:28:11 -07001041 if (!dev->ifalias)
1042 return -ENOMEM;
1043
1044 strlcpy(dev->ifalias, alias, len+1);
1045 return len;
1046}
1047
1048
1049/**
Stephen Hemminger3041a062006-05-26 13:25:24 -07001050 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001051 * @dev: device to cause notification
1052 *
1053 * Called to indicate a device has changed features.
1054 */
1055void netdev_features_change(struct net_device *dev)
1056{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001057 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001058}
1059EXPORT_SYMBOL(netdev_features_change);
1060
1061/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 * netdev_state_change - device changes state
1063 * @dev: device to cause notification
1064 *
1065 * Called to indicate a device has changed state. This function calls
1066 * the notifier chains for netdev_chain and sends a NEWLINK message
1067 * to the routing socket.
1068 */
1069void netdev_state_change(struct net_device *dev)
1070{
1071 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001072 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1074 }
1075}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001076EXPORT_SYMBOL(netdev_state_change);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077
Moni Shoua75c78502009-09-15 02:37:40 -07001078void netdev_bonding_change(struct net_device *dev, unsigned long event)
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001079{
Moni Shoua75c78502009-09-15 02:37:40 -07001080 call_netdevice_notifiers(event, dev);
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001081}
1082EXPORT_SYMBOL(netdev_bonding_change);
1083
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084/**
1085 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001086 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 * @name: name of interface
1088 *
1089 * If a network interface is not present and the process has suitable
1090 * privileges this function loads the module. If module loading is not
1091 * available in this kernel then it becomes a nop.
1092 */
1093
Eric W. Biederman881d9662007-09-17 11:56:21 -07001094void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001096 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097
Eric Dumazet72c95282009-10-30 07:11:27 +00001098 rcu_read_lock();
1099 dev = dev_get_by_name_rcu(net, name);
1100 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
Eric Parisa8f80e82009-08-13 09:44:51 -04001102 if (!dev && capable(CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 request_module("%s", name);
1104}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001105EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107/**
1108 * dev_open - prepare an interface for use.
1109 * @dev: device to open
1110 *
1111 * Takes a device from down to up state. The device's private open
1112 * function is invoked and then the multicast lists are loaded. Finally
1113 * the device is moved into the up state and a %NETDEV_UP message is
1114 * sent to the netdev notifier chain.
1115 *
1116 * Calling this function on an active interface is a nop. On a failure
1117 * a negative errno code is returned.
1118 */
1119int dev_open(struct net_device *dev)
1120{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001121 const struct net_device_ops *ops = dev->netdev_ops;
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001122 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001124 ASSERT_RTNL();
1125
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 /*
1127 * Is it already up?
1128 */
1129
1130 if (dev->flags & IFF_UP)
1131 return 0;
1132
1133 /*
1134 * Is it even present?
1135 */
1136 if (!netif_device_present(dev))
1137 return -ENODEV;
1138
Johannes Berg3b8bcfd2009-05-30 01:39:53 +02001139 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1140 ret = notifier_to_errno(ret);
1141 if (ret)
1142 return ret;
1143
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 /*
1145 * Call device private open method
1146 */
1147 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001148
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001149 if (ops->ndo_validate_addr)
1150 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001151
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001152 if (!ret && ops->ndo_open)
1153 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001155 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 * If it went open OK then:
1157 */
1158
Jeff Garzikbada3392007-10-23 20:19:37 -07001159 if (ret)
1160 clear_bit(__LINK_STATE_START, &dev->state);
1161 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 /*
1163 * Set the flags.
1164 */
1165 dev->flags |= IFF_UP;
1166
1167 /*
Dan Williams649274d2009-01-11 00:20:39 -08001168 * Enable NET_DMA
1169 */
David S. Millerb4bd07c2009-02-06 22:06:43 -08001170 net_dmaengine_get();
Dan Williams649274d2009-01-11 00:20:39 -08001171
1172 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 * Initialize multicasting status
1174 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001175 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176
1177 /*
1178 * Wakeup transmit queue engine
1179 */
1180 dev_activate(dev);
1181
1182 /*
1183 * ... and announce new interface.
1184 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001185 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001187
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 return ret;
1189}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001190EXPORT_SYMBOL(dev_open);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191
1192/**
1193 * dev_close - shutdown an interface.
1194 * @dev: device to shutdown
1195 *
1196 * This function moves an active device into down state. A
1197 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1198 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1199 * chain.
1200 */
1201int dev_close(struct net_device *dev)
1202{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001203 const struct net_device_ops *ops = dev->netdev_ops;
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001204 ASSERT_RTNL();
1205
David S. Miller9d5010d2007-09-12 14:33:25 +02001206 might_sleep();
1207
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 if (!(dev->flags & IFF_UP))
1209 return 0;
1210
1211 /*
1212 * Tell people we are going down, so that they can
1213 * prepare to death, when device is still operating.
1214 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001215 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 clear_bit(__LINK_STATE_START, &dev->state);
1218
1219 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001220 * it can be even on different cpu. So just clear netif_running().
1221 *
1222 * dev->stop() will invoke napi_disable() on all of it's
1223 * napi_struct instances on this device.
1224 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001227 dev_deactivate(dev);
1228
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 /*
1230 * Call the device specific close. This cannot fail.
1231 * Only if device is UP
1232 *
1233 * We allow it to be called even after a DETACH hot-plug
1234 * event.
1235 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001236 if (ops->ndo_stop)
1237 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
1239 /*
1240 * Device is now down.
1241 */
1242
1243 dev->flags &= ~IFF_UP;
1244
1245 /*
1246 * Tell people we are down
1247 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001248 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
Dan Williams649274d2009-01-11 00:20:39 -08001250 /*
1251 * Shutdown NET_DMA
1252 */
David S. Millerb4bd07c2009-02-06 22:06:43 -08001253 net_dmaengine_put();
Dan Williams649274d2009-01-11 00:20:39 -08001254
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 return 0;
1256}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001257EXPORT_SYMBOL(dev_close);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258
1259
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001260/**
1261 * dev_disable_lro - disable Large Receive Offload on a device
1262 * @dev: device
1263 *
1264 * Disable Large Receive Offload (LRO) on a net device. Must be
1265 * called under RTNL. This is needed if received packets may be
1266 * forwarded to another interface.
1267 */
1268void dev_disable_lro(struct net_device *dev)
1269{
1270 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1271 dev->ethtool_ops->set_flags) {
1272 u32 flags = dev->ethtool_ops->get_flags(dev);
1273 if (flags & ETH_FLAG_LRO) {
1274 flags &= ~ETH_FLAG_LRO;
1275 dev->ethtool_ops->set_flags(dev, flags);
1276 }
1277 }
1278 WARN_ON(dev->features & NETIF_F_LRO);
1279}
1280EXPORT_SYMBOL(dev_disable_lro);
1281
1282
Eric W. Biederman881d9662007-09-17 11:56:21 -07001283static int dev_boot_phase = 1;
1284
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285/*
1286 * Device change register/unregister. These are not inline or static
1287 * as we export them to the world.
1288 */
1289
1290/**
1291 * register_netdevice_notifier - register a network notifier block
1292 * @nb: notifier
1293 *
1294 * Register a notifier to be called when network device events occur.
1295 * The notifier passed is linked into the kernel structures and must
1296 * not be reused until it has been unregistered. A negative errno code
1297 * is returned on a failure.
1298 *
1299 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001300 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 * view of the network device list.
1302 */
1303
1304int register_netdevice_notifier(struct notifier_block *nb)
1305{
1306 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001307 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001308 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 int err;
1310
1311 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001312 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001313 if (err)
1314 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001315 if (dev_boot_phase)
1316 goto unlock;
1317 for_each_net(net) {
1318 for_each_netdev(net, dev) {
1319 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1320 err = notifier_to_errno(err);
1321 if (err)
1322 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
Eric W. Biederman881d9662007-09-17 11:56:21 -07001324 if (!(dev->flags & IFF_UP))
1325 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001326
Eric W. Biederman881d9662007-09-17 11:56:21 -07001327 nb->notifier_call(nb, NETDEV_UP, dev);
1328 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001330
1331unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 rtnl_unlock();
1333 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001334
1335rollback:
1336 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001337 for_each_net(net) {
1338 for_each_netdev(net, dev) {
1339 if (dev == last)
1340 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001341
Eric W. Biederman881d9662007-09-17 11:56:21 -07001342 if (dev->flags & IFF_UP) {
1343 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1344 nb->notifier_call(nb, NETDEV_DOWN, dev);
1345 }
1346 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001347 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001348 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001349
1350 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001351 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001353EXPORT_SYMBOL(register_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354
1355/**
1356 * unregister_netdevice_notifier - unregister a network notifier block
1357 * @nb: notifier
1358 *
1359 * Unregister a notifier previously registered by
1360 * register_netdevice_notifier(). The notifier is unlinked into the
1361 * kernel structures and may then be reused. A negative errno code
1362 * is returned on a failure.
1363 */
1364
1365int unregister_netdevice_notifier(struct notifier_block *nb)
1366{
Herbert Xu9f514952006-03-25 01:24:25 -08001367 int err;
1368
1369 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001370 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001371 rtnl_unlock();
1372 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001374EXPORT_SYMBOL(unregister_netdevice_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
1376/**
1377 * call_netdevice_notifiers - call all network notifier blocks
1378 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001379 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380 *
1381 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001382 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 */
1384
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001385int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001387 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388}
1389
1390/* When > 0 there are consumers of rx skb time stamps */
1391static atomic_t netstamp_needed = ATOMIC_INIT(0);
1392
1393void net_enable_timestamp(void)
1394{
1395 atomic_inc(&netstamp_needed);
1396}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001397EXPORT_SYMBOL(net_enable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398
1399void net_disable_timestamp(void)
1400{
1401 atomic_dec(&netstamp_needed);
1402}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001403EXPORT_SYMBOL(net_disable_timestamp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001405static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406{
1407 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001408 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001409 else
1410 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411}
1412
1413/*
1414 * Support routine. Sends outgoing frames to any network
1415 * taps currently in use.
1416 */
1417
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001418static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419{
1420 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001421
Jarek Poplawski8caf1532009-04-17 10:08:49 +00001422#ifdef CONFIG_NET_CLS_ACT
1423 if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
1424 net_timestamp(skb);
1425#else
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001426 net_timestamp(skb);
Jarek Poplawski8caf1532009-04-17 10:08:49 +00001427#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
1429 rcu_read_lock();
1430 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1431 /* Never send packets back to the socket
1432 * they originated from - MvS (miquels@drinkel.ow.org)
1433 */
1434 if ((ptype->dev == dev || !ptype->dev) &&
1435 (ptype->af_packet_priv == NULL ||
1436 (struct sock *)ptype->af_packet_priv != skb->sk)) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001437 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 if (!skb2)
1439 break;
1440
1441 /* skb->nh should be correctly
1442 set by sender, so that the second statement is
1443 just protection against buggy protocols.
1444 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001445 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001447 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001448 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 if (net_ratelimit())
1450 printk(KERN_CRIT "protocol %04x is "
1451 "buggy, dev %s\n",
1452 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001453 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 }
1455
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001456 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001458 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 }
1460 }
1461 rcu_read_unlock();
1462}
1463
Denis Vlasenko56079432006-03-29 15:57:29 -08001464
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001465static inline void __netif_reschedule(struct Qdisc *q)
1466{
1467 struct softnet_data *sd;
1468 unsigned long flags;
1469
1470 local_irq_save(flags);
1471 sd = &__get_cpu_var(softnet_data);
1472 q->next_sched = sd->output_queue;
1473 sd->output_queue = q;
1474 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1475 local_irq_restore(flags);
1476}
1477
David S. Miller37437bb2008-07-16 02:15:04 -07001478void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001479{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001480 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1481 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001482}
1483EXPORT_SYMBOL(__netif_schedule);
1484
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001485void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001486{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001487 if (atomic_dec_and_test(&skb->users)) {
1488 struct softnet_data *sd;
1489 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001490
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001491 local_irq_save(flags);
1492 sd = &__get_cpu_var(softnet_data);
1493 skb->next = sd->completion_queue;
1494 sd->completion_queue = skb;
1495 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1496 local_irq_restore(flags);
1497 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001498}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001499EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001500
1501void dev_kfree_skb_any(struct sk_buff *skb)
1502{
1503 if (in_irq() || irqs_disabled())
1504 dev_kfree_skb_irq(skb);
1505 else
1506 dev_kfree_skb(skb);
1507}
1508EXPORT_SYMBOL(dev_kfree_skb_any);
1509
1510
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001511/**
1512 * netif_device_detach - mark device as removed
1513 * @dev: network device
1514 *
1515 * Mark device as removed from system and therefore no longer available.
1516 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001517void netif_device_detach(struct net_device *dev)
1518{
1519 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1520 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00001521 netif_tx_stop_all_queues(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001522 }
1523}
1524EXPORT_SYMBOL(netif_device_detach);
1525
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001526/**
1527 * netif_device_attach - mark device as attached
1528 * @dev: network device
1529 *
1530 * Mark device as attached from system and restart if needed.
1531 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001532void netif_device_attach(struct net_device *dev)
1533{
1534 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1535 netif_running(dev)) {
Alexander Duyckd5431032009-04-08 13:15:22 +00001536 netif_tx_wake_all_queues(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001537 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001538 }
1539}
1540EXPORT_SYMBOL(netif_device_attach);
1541
Ben Hutchings6de329e2008-06-16 17:02:28 -07001542static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1543{
1544 return ((features & NETIF_F_GEN_CSUM) ||
1545 ((features & NETIF_F_IP_CSUM) &&
1546 protocol == htons(ETH_P_IP)) ||
1547 ((features & NETIF_F_IPV6_CSUM) &&
Yi Zou1c8dbcf2009-02-27 14:06:54 -08001548 protocol == htons(ETH_P_IPV6)) ||
1549 ((features & NETIF_F_FCOE_CRC) &&
1550 protocol == htons(ETH_P_FCOE)));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001551}
1552
1553static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1554{
1555 if (can_checksum_protocol(dev->features, skb->protocol))
1556 return true;
1557
1558 if (skb->protocol == htons(ETH_P_8021Q)) {
1559 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1560 if (can_checksum_protocol(dev->features & dev->vlan_features,
1561 veh->h_vlan_encapsulated_proto))
1562 return true;
1563 }
1564
1565 return false;
1566}
Denis Vlasenko56079432006-03-29 15:57:29 -08001567
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568/*
1569 * Invalidate hardware checksum when packet is to be mangled, and
1570 * complete checksum manually on outgoing path.
1571 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001572int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573{
Al Virod3bc23e2006-11-14 21:24:49 -08001574 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001575 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
Patrick McHardy84fa7932006-08-29 16:44:56 -07001577 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001578 goto out_set_summed;
1579
1580 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001581 /* Let GSO fix up the checksum. */
1582 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 }
1584
Herbert Xua0308472007-10-15 01:47:15 -07001585 offset = skb->csum_start - skb_headroom(skb);
1586 BUG_ON(offset >= skb_headlen(skb));
1587 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1588
1589 offset += skb->csum_offset;
1590 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1591
1592 if (skb_cloned(skb) &&
1593 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1595 if (ret)
1596 goto out;
1597 }
1598
Herbert Xua0308472007-10-15 01:47:15 -07001599 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001600out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001602out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 return ret;
1604}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001605EXPORT_SYMBOL(skb_checksum_help);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001607/**
1608 * skb_gso_segment - Perform segmentation on skb.
1609 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001610 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001611 *
1612 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001613 *
1614 * It may return NULL if the skb requires no segmentation. This is
1615 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001616 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001617struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001618{
1619 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1620 struct packet_type *ptype;
Al Viro252e33462006-11-14 20:48:11 -08001621 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001622 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001623
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001624 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001625 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001626 __skb_pull(skb, skb->mac_len);
1627
Herbert Xu67fd1a72009-01-19 16:26:44 -08001628 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1629 struct net_device *dev = skb->dev;
1630 struct ethtool_drvinfo info = {};
1631
1632 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1633 dev->ethtool_ops->get_drvinfo(dev, &info);
1634
1635 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
1636 "ip_summed=%d",
1637 info.driver, dev ? dev->features : 0L,
1638 skb->sk ? skb->sk->sk_route_caps : 0L,
1639 skb->len, skb->data_len, skb->ip_summed);
1640
Herbert Xua430a432006-07-08 13:34:56 -07001641 if (skb_header_cloned(skb) &&
1642 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1643 return ERR_PTR(err);
1644 }
1645
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001646 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001647 list_for_each_entry_rcu(ptype,
1648 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001649 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001650 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001651 err = ptype->gso_send_check(skb);
1652 segs = ERR_PTR(err);
1653 if (err || skb_gso_ok(skb, features))
1654 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001655 __skb_push(skb, (skb->data -
1656 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001657 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001658 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001659 break;
1660 }
1661 }
1662 rcu_read_unlock();
1663
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001664 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001665
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001666 return segs;
1667}
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001668EXPORT_SYMBOL(skb_gso_segment);
1669
Herbert Xufb286bb2005-11-10 13:01:24 -08001670/* Take action when hardware reception checksum errors are detected. */
1671#ifdef CONFIG_BUG
1672void netdev_rx_csum_fault(struct net_device *dev)
1673{
1674 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001675 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001676 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001677 dump_stack();
1678 }
1679}
1680EXPORT_SYMBOL(netdev_rx_csum_fault);
1681#endif
1682
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683/* Actually, we should eliminate this check as soon as we know, that:
1684 * 1. IOMMU is present and allows to map all the memory.
1685 * 2. No high memory really exists on this machine.
1686 */
1687
1688static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1689{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001690#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 int i;
1692
1693 if (dev->features & NETIF_F_HIGHDMA)
1694 return 0;
1695
1696 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1697 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1698 return 1;
1699
Herbert Xu3d3a8532006-06-27 13:33:10 -07001700#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 return 0;
1702}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001704struct dev_gso_cb {
1705 void (*destructor)(struct sk_buff *skb);
1706};
1707
1708#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1709
1710static void dev_gso_skb_destructor(struct sk_buff *skb)
1711{
1712 struct dev_gso_cb *cb;
1713
1714 do {
1715 struct sk_buff *nskb = skb->next;
1716
1717 skb->next = nskb->next;
1718 nskb->next = NULL;
1719 kfree_skb(nskb);
1720 } while (skb->next);
1721
1722 cb = DEV_GSO_CB(skb);
1723 if (cb->destructor)
1724 cb->destructor(skb);
1725}
1726
1727/**
1728 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1729 * @skb: buffer to segment
1730 *
1731 * This function segments the given skb and stores the list of segments
1732 * in skb->next.
1733 */
1734static int dev_gso_segment(struct sk_buff *skb)
1735{
1736 struct net_device *dev = skb->dev;
1737 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001738 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1739 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001740
Herbert Xu576a30e2006-06-27 13:22:38 -07001741 segs = skb_gso_segment(skb, features);
1742
1743 /* Verifying header integrity only. */
1744 if (!segs)
1745 return 0;
1746
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001747 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001748 return PTR_ERR(segs);
1749
1750 skb->next = segs;
1751 DEV_GSO_CB(skb)->destructor = skb->destructor;
1752 skb->destructor = dev_gso_skb_destructor;
1753
1754 return 0;
1755}
1756
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001757int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1758 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001759{
Stephen Hemminger00829822008-11-20 20:14:53 -08001760 const struct net_device_ops *ops = dev->netdev_ops;
Patrick McHardy572a9d72009-11-10 06:14:14 +00001761 int rc = NETDEV_TX_OK;
Stephen Hemminger00829822008-11-20 20:14:53 -08001762
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001763 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001764 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001765 dev_queue_xmit_nit(skb, dev);
1766
Herbert Xu576a30e2006-06-27 13:22:38 -07001767 if (netif_needs_gso(dev, skb)) {
1768 if (unlikely(dev_gso_segment(skb)))
1769 goto out_kfree_skb;
1770 if (skb->next)
1771 goto gso;
1772 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001773
Eric Dumazet93f154b2009-05-18 22:19:19 -07001774 /*
1775 * If device doesnt need skb->dst, release it right now while
1776 * its hot in this cpu cache
1777 */
Eric Dumazetadf30902009-06-02 05:19:30 +00001778 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1779 skb_dst_drop(skb);
1780
Patrick Ohlyac45f602009-02-12 05:03:37 +00001781 rc = ops->ndo_start_xmit(skb, dev);
Patrick McHardyec634fe2009-07-05 19:23:38 -07001782 if (rc == NETDEV_TX_OK)
Eric Dumazet08baf562009-05-25 22:58:01 -07001783 txq_trans_update(txq);
Patrick Ohlyac45f602009-02-12 05:03:37 +00001784 /*
1785 * TODO: if skb_orphan() was called by
1786 * dev->hard_start_xmit() (for example, the unmodified
1787 * igb driver does that; bnx2 doesn't), then
1788 * skb_tx_software_timestamp() will be unable to send
1789 * back the time stamp.
1790 *
1791 * How can this be prevented? Always create another
1792 * reference to the socket before calling
1793 * dev->hard_start_xmit()? Prevent that skb_orphan()
1794 * does anything in dev->hard_start_xmit() by clearing
1795 * the skb destructor before the call and restoring it
1796 * afterwards, then doing the skb_orphan() ourselves?
1797 */
Patrick Ohlyac45f602009-02-12 05:03:37 +00001798 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001799 }
1800
Herbert Xu576a30e2006-06-27 13:22:38 -07001801gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001802 do {
1803 struct sk_buff *nskb = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001804
1805 skb->next = nskb->next;
1806 nskb->next = NULL;
Stephen Hemminger00829822008-11-20 20:14:53 -08001807 rc = ops->ndo_start_xmit(nskb, dev);
Patrick McHardyec634fe2009-07-05 19:23:38 -07001808 if (unlikely(rc != NETDEV_TX_OK)) {
Patrick McHardy572a9d72009-11-10 06:14:14 +00001809 if (rc & ~NETDEV_TX_MASK)
1810 goto out_kfree_gso_skb;
Michael Chanf54d9e82006-06-25 23:57:04 -07001811 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001812 skb->next = nskb;
1813 return rc;
1814 }
Eric Dumazet08baf562009-05-25 22:58:01 -07001815 txq_trans_update(txq);
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001816 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001817 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001818 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001819
Patrick McHardy572a9d72009-11-10 06:14:14 +00001820out_kfree_gso_skb:
1821 if (likely(skb->next == NULL))
1822 skb->destructor = DEV_GSO_CB(skb)->destructor;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001823out_kfree_skb:
1824 kfree_skb(skb);
Patrick McHardy572a9d72009-11-10 06:14:14 +00001825 return rc;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001826}
1827
David S. Miller70192982009-01-27 16:34:47 -08001828static u32 skb_tx_hashrnd;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001829
Stephen Hemminger92477442009-03-21 13:39:26 -07001830u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
David S. Miller8f0f2222008-07-15 03:47:03 -07001831{
David S. Miller70192982009-01-27 16:34:47 -08001832 u32 hash;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001833
David S. Miller513de112009-05-03 14:43:10 -07001834 if (skb_rx_queue_recorded(skb)) {
1835 hash = skb_get_rx_queue(skb);
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001836 while (unlikely(hash >= dev->real_num_tx_queues))
David S. Miller513de112009-05-03 14:43:10 -07001837 hash -= dev->real_num_tx_queues;
1838 return hash;
1839 }
Eric Dumazetec581f62009-05-01 09:05:06 -07001840
1841 if (skb->sk && skb->sk->sk_hash)
David S. Miller70192982009-01-27 16:34:47 -08001842 hash = skb->sk->sk_hash;
Eric Dumazetec581f62009-05-01 09:05:06 -07001843 else
David S. Miller70192982009-01-27 16:34:47 -08001844 hash = skb->protocol;
David S. Millerd5a9e242009-01-27 16:22:11 -08001845
David S. Miller70192982009-01-27 16:34:47 -08001846 hash = jhash_1word(hash, skb_tx_hashrnd);
David S. Millerd5a9e242009-01-27 16:22:11 -08001847
David S. Millerb6b2fed2008-07-21 09:48:06 -07001848 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001849}
Stephen Hemminger92477442009-03-21 13:39:26 -07001850EXPORT_SYMBOL(skb_tx_hash);
David S. Miller8f0f2222008-07-15 03:47:03 -07001851
Eric Dumazeted046422009-11-13 21:54:04 +00001852static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
1853{
1854 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
1855 if (net_ratelimit()) {
1856 WARN(1, "%s selects TX queue %d, but "
1857 "real number of TX queues is %d\n",
1858 dev->name, queue_index,
1859 dev->real_num_tx_queues);
1860 }
1861 return 0;
1862 }
1863 return queue_index;
1864}
1865
David S. Millere8a04642008-07-17 00:34:19 -07001866static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1867 struct sk_buff *skb)
1868{
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001869 u16 queue_index;
1870 struct sock *sk = skb->sk;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001871
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001872 if (sk_tx_queue_recorded(sk)) {
1873 queue_index = sk_tx_queue_get(sk);
1874 } else {
1875 const struct net_device_ops *ops = dev->netdev_ops;
1876
1877 if (ops->ndo_select_queue) {
1878 queue_index = ops->ndo_select_queue(dev, skb);
Eric Dumazeted046422009-11-13 21:54:04 +00001879 queue_index = dev_cap_txqueue(dev, queue_index);
Krishna Kumara4ee3ce2009-10-19 23:50:07 +00001880 } else {
1881 queue_index = 0;
1882 if (dev->real_num_tx_queues > 1)
1883 queue_index = skb_tx_hash(dev, skb);
1884
1885 if (sk && sk->sk_dst_cache)
1886 sk_tx_queue_set(sk, queue_index);
1887 }
1888 }
David S. Millereae792b2008-07-15 03:03:33 -07001889
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001890 skb_set_queue_mapping(skb, queue_index);
1891 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001892}
1893
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00001894static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
1895 struct net_device *dev,
1896 struct netdev_queue *txq)
1897{
1898 spinlock_t *root_lock = qdisc_lock(q);
1899 int rc;
1900
1901 spin_lock(root_lock);
1902 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
1903 kfree_skb(skb);
1904 rc = NET_XMIT_DROP;
1905 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
1906 !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
1907 /*
1908 * This is a work-conserving queue; there are no old skbs
1909 * waiting to be sent out; and the qdisc is not running -
1910 * xmit the skb directly.
1911 */
1912 __qdisc_update_bstats(q, skb->len);
1913 if (sch_direct_xmit(skb, q, dev, txq, root_lock))
1914 __qdisc_run(q);
1915 else
1916 clear_bit(__QDISC_STATE_RUNNING, &q->state);
1917
1918 rc = NET_XMIT_SUCCESS;
1919 } else {
1920 rc = qdisc_enqueue_root(skb, q);
1921 qdisc_run(q);
1922 }
1923 spin_unlock(root_lock);
1924
1925 return rc;
1926}
1927
Dave Jonesd29f7492008-07-22 14:09:06 -07001928/**
1929 * dev_queue_xmit - transmit a buffer
1930 * @skb: buffer to transmit
1931 *
1932 * Queue a buffer for transmission to a network device. The caller must
1933 * have set the device and priority and built the buffer before calling
1934 * this function. The function can be called from an interrupt.
1935 *
1936 * A negative errno code is returned on a failure. A success does not
1937 * guarantee the frame will be transmitted as it may be dropped due
1938 * to congestion or traffic shaping.
1939 *
1940 * -----------------------------------------------------------------------------------
1941 * I notice this method can also return errors from the queue disciplines,
1942 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1943 * be positive.
1944 *
1945 * Regardless of the return value, the skb is consumed, so it is currently
1946 * difficult to retry a send to this method. (You can bump the ref count
1947 * before sending to hold a reference for retry if you are careful.)
1948 *
1949 * When calling this method, interrupts MUST be enabled. This is because
1950 * the BH enable code must have IRQs enabled so that it will not deadlock.
1951 * --BLG
1952 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953int dev_queue_xmit(struct sk_buff *skb)
1954{
1955 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07001956 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 struct Qdisc *q;
1958 int rc = -ENOMEM;
1959
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001960 /* GSO will handle the following emulations directly. */
1961 if (netif_needs_gso(dev, skb))
1962 goto gso;
1963
David S. Miller4cf704f2009-06-09 00:18:51 -07001964 if (skb_has_frags(skb) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001966 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 goto out_kfree_skb;
1968
1969 /* Fragmented skb is linearized if device does not support SG,
1970 * or if at least one of fragments is in highmem and device
1971 * does not support DMA from it.
1972 */
1973 if (skb_shinfo(skb)->nr_frags &&
1974 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001975 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976 goto out_kfree_skb;
1977
1978 /* If packet is not checksummed and device does not support
1979 * checksumming for this protocol, complete checksumming here.
1980 */
Herbert Xu663ead32007-04-09 11:59:07 -07001981 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1982 skb_set_transport_header(skb, skb->csum_start -
1983 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001984 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1985 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07001986 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001988gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001989 /* Disable soft irqs for various locks below. Also
1990 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001992 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993
David S. Millereae792b2008-07-15 03:03:33 -07001994 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07001995 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07001996
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetd1b19df2009-09-03 01:29:39 -07001998 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999#endif
2000 if (q->enqueue) {
Krishna Kumarbbd8a0d2009-08-06 01:44:21 +00002001 rc = __dev_xmit_skb(skb, q, dev, txq);
David S. Miller37437bb2008-07-16 02:15:04 -07002002 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 }
2004
2005 /* The device has no queue. Common case for software devices:
2006 loopback, all the sorts of tunnels...
2007
Herbert Xu932ff272006-06-09 12:20:56 -07002008 Really, it is unlikely that netif_tx_lock protection is necessary
2009 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 counters.)
2011 However, it is possible, that they rely on protection
2012 made by us here.
2013
2014 Check this and shot the lock. It is not prone from deadlocks.
2015 Either shot noqueue qdisc, it is even simpler 8)
2016 */
2017 if (dev->flags & IFF_UP) {
2018 int cpu = smp_processor_id(); /* ok because BHs are off */
2019
David S. Millerc773e842008-07-08 23:13:53 -07002020 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021
David S. Millerc773e842008-07-08 23:13:53 -07002022 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023
David S. Millerfd2ea0a2008-07-17 01:56:23 -07002024 if (!netif_tx_queue_stopped(txq)) {
Patrick McHardy572a9d72009-11-10 06:14:14 +00002025 rc = dev_hard_start_xmit(skb, dev, txq);
2026 if (dev_xmit_complete(rc)) {
David S. Millerc773e842008-07-08 23:13:53 -07002027 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028 goto out;
2029 }
2030 }
David S. Millerc773e842008-07-08 23:13:53 -07002031 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032 if (net_ratelimit())
2033 printk(KERN_CRIT "Virtual device %s asks to "
2034 "queue packet!\n", dev->name);
2035 } else {
2036 /* Recursion is detected! It is possible,
2037 * unfortunately */
2038 if (net_ratelimit())
2039 printk(KERN_CRIT "Dead loop on virtual device "
2040 "%s, fix it urgently!\n", dev->name);
2041 }
2042 }
2043
2044 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07002045 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046
2047out_kfree_skb:
2048 kfree_skb(skb);
2049 return rc;
2050out:
Herbert Xud4828d82006-06-22 02:28:18 -07002051 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052 return rc;
2053}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002054EXPORT_SYMBOL(dev_queue_xmit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055
2056
2057/*=======================================================================
2058 Receiver routines
2059 =======================================================================*/
2060
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07002061int netdev_max_backlog __read_mostly = 1000;
2062int netdev_budget __read_mostly = 300;
2063int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064
2065DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2066
2067
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068/**
2069 * netif_rx - post buffer to the network code
2070 * @skb: buffer to post
2071 *
2072 * This function receives a packet from a device driver and queues it for
2073 * the upper (protocol) levels to process. It always succeeds. The buffer
2074 * may be dropped during processing for congestion control or by the
2075 * protocol layers.
2076 *
2077 * return values:
2078 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079 * NET_RX_DROP (packet was dropped)
2080 *
2081 */
2082
2083int netif_rx(struct sk_buff *skb)
2084{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 struct softnet_data *queue;
2086 unsigned long flags;
2087
2088 /* if netpoll wants it, pretend we never saw it */
2089 if (netpoll_rx(skb))
2090 return NET_RX_DROP;
2091
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002092 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002093 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094
2095 /*
2096 * The code is rearranged so that the path is the most
2097 * short when CPU is congested, but is still operating.
2098 */
2099 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 queue = &__get_cpu_var(softnet_data);
2101
2102 __get_cpu_var(netdev_rx_stat).total++;
2103 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2104 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07002108 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109 }
2110
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002111 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002112 goto enqueue;
2113 }
2114
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 __get_cpu_var(netdev_rx_stat).dropped++;
2116 local_irq_restore(flags);
2117
2118 kfree_skb(skb);
2119 return NET_RX_DROP;
2120}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002121EXPORT_SYMBOL(netif_rx);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122
2123int netif_rx_ni(struct sk_buff *skb)
2124{
2125 int err;
2126
2127 preempt_disable();
2128 err = netif_rx(skb);
2129 if (local_softirq_pending())
2130 do_softirq();
2131 preempt_enable();
2132
2133 return err;
2134}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135EXPORT_SYMBOL(netif_rx_ni);
2136
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137static void net_tx_action(struct softirq_action *h)
2138{
2139 struct softnet_data *sd = &__get_cpu_var(softnet_data);
2140
2141 if (sd->completion_queue) {
2142 struct sk_buff *clist;
2143
2144 local_irq_disable();
2145 clist = sd->completion_queue;
2146 sd->completion_queue = NULL;
2147 local_irq_enable();
2148
2149 while (clist) {
2150 struct sk_buff *skb = clist;
2151 clist = clist->next;
2152
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002153 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 __kfree_skb(skb);
2155 }
2156 }
2157
2158 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07002159 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160
2161 local_irq_disable();
2162 head = sd->output_queue;
2163 sd->output_queue = NULL;
2164 local_irq_enable();
2165
2166 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002167 struct Qdisc *q = head;
2168 spinlock_t *root_lock;
2169
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170 head = head->next_sched;
2171
David S. Miller5fb66222008-08-02 20:02:43 -07002172 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002173 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002174 smp_mb__before_clear_bit();
2175 clear_bit(__QDISC_STATE_SCHED,
2176 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002177 qdisc_run(q);
2178 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002180 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002181 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002182 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002183 } else {
2184 smp_mb__before_clear_bit();
2185 clear_bit(__QDISC_STATE_SCHED,
2186 &q->state);
2187 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 }
2189 }
2190 }
2191}
2192
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002193static inline int deliver_skb(struct sk_buff *skb,
2194 struct packet_type *pt_prev,
2195 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196{
2197 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002198 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199}
2200
2201#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Michał Mirosławda678292009-06-05 05:35:28 +00002202
2203#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
2204/* This hook is defined here for ATM LANE */
2205int (*br_fdb_test_addr_hook)(struct net_device *dev,
2206 unsigned char *addr) __read_mostly;
Stephen Hemminger4fb019a2009-09-11 11:50:08 -07002207EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
Michał Mirosławda678292009-06-05 05:35:28 +00002208#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209
Stephen Hemminger6229e362007-03-21 13:38:47 -07002210/*
2211 * If bridge module is loaded call bridging hook.
2212 * returns NULL if packet was consumed.
2213 */
2214struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2215 struct sk_buff *skb) __read_mostly;
Stephen Hemminger4fb019a2009-09-11 11:50:08 -07002216EXPORT_SYMBOL_GPL(br_handle_frame_hook);
Michał Mirosławda678292009-06-05 05:35:28 +00002217
Stephen Hemminger6229e362007-03-21 13:38:47 -07002218static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2219 struct packet_type **pt_prev, int *ret,
2220 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221{
2222 struct net_bridge_port *port;
2223
Stephen Hemminger6229e362007-03-21 13:38:47 -07002224 if (skb->pkt_type == PACKET_LOOPBACK ||
2225 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2226 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227
2228 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002229 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002231 }
2232
Stephen Hemminger6229e362007-03-21 13:38:47 -07002233 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234}
2235#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002236#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237#endif
2238
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002239#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2240struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2241EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2242
2243static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2244 struct packet_type **pt_prev,
2245 int *ret,
2246 struct net_device *orig_dev)
2247{
2248 if (skb->dev->macvlan_port == NULL)
2249 return skb;
2250
2251 if (*pt_prev) {
2252 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2253 *pt_prev = NULL;
2254 }
2255 return macvlan_handle_frame_hook(skb);
2256}
2257#else
2258#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2259#endif
2260
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261#ifdef CONFIG_NET_CLS_ACT
2262/* TODO: Maybe we should just force sch_ingress to be compiled in
2263 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2264 * a compare and 2 stores extra right now if we dont have it on
2265 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002266 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267 * the ingress scheduler, you just cant add policies on ingress.
2268 *
2269 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002270static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002273 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002274 struct netdev_queue *rxq;
2275 int result = TC_ACT_OK;
2276 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002277
Herbert Xuf697c3e2007-10-14 00:38:47 -07002278 if (MAX_RED_LOOP < ttl++) {
2279 printk(KERN_WARNING
2280 "Redir loop detected Dropping packet (%d->%d)\n",
2281 skb->iif, dev->ifindex);
2282 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 }
2284
Herbert Xuf697c3e2007-10-14 00:38:47 -07002285 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2286 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2287
David S. Miller555353c2008-07-08 17:33:13 -07002288 rxq = &dev->rx_queue;
2289
David S. Miller838740002008-07-17 00:53:03 -07002290 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002291 if (q != &noop_qdisc) {
David S. Miller838740002008-07-17 00:53:03 -07002292 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002293 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2294 result = qdisc_enqueue_root(skb, q);
David S. Miller838740002008-07-17 00:53:03 -07002295 spin_unlock(qdisc_lock(q));
2296 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002297
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298 return result;
2299}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002300
2301static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2302 struct packet_type **pt_prev,
2303 int *ret, struct net_device *orig_dev)
2304{
David S. Miller8d50b532008-07-30 02:37:46 -07002305 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002306 goto out;
2307
2308 if (*pt_prev) {
2309 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2310 *pt_prev = NULL;
2311 } else {
2312 /* Huh? Why does turning on AF_PACKET affect this? */
2313 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2314 }
2315
2316 switch (ing_filter(skb)) {
2317 case TC_ACT_SHOT:
2318 case TC_ACT_STOLEN:
2319 kfree_skb(skb);
2320 return NULL;
2321 }
2322
2323out:
2324 skb->tc_verd = 0;
2325 return skb;
2326}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327#endif
2328
Patrick McHardybc1d0412008-07-14 22:49:30 -07002329/*
2330 * netif_nit_deliver - deliver received packets to network taps
2331 * @skb: buffer
2332 *
2333 * This function is used to deliver incoming packets to network
2334 * taps. It should be used when the normal netif_receive_skb path
2335 * is bypassed, for example because of VLAN acceleration.
2336 */
2337void netif_nit_deliver(struct sk_buff *skb)
2338{
2339 struct packet_type *ptype;
2340
2341 if (list_empty(&ptype_all))
2342 return;
2343
2344 skb_reset_network_header(skb);
2345 skb_reset_transport_header(skb);
2346 skb->mac_len = skb->network_header - skb->mac_header;
2347
2348 rcu_read_lock();
2349 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2350 if (!ptype->dev || ptype->dev == skb->dev)
2351 deliver_skb(skb, ptype, skb->dev);
2352 }
2353 rcu_read_unlock();
2354}
2355
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002356/**
2357 * netif_receive_skb - process receive buffer from network
2358 * @skb: buffer to process
2359 *
2360 * netif_receive_skb() is the main receive data processing function.
2361 * It always succeeds. The buffer may be dropped during processing
2362 * for congestion control or by the protocol layers.
2363 *
2364 * This function may only be called from softirq context and interrupts
2365 * should be enabled.
2366 *
2367 * Return values (usually ignored):
2368 * NET_RX_SUCCESS: no congestion
2369 * NET_RX_DROP: packet was dropped
2370 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371int netif_receive_skb(struct sk_buff *skb)
2372{
2373 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002374 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002375 struct net_device *null_or_orig;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 int ret = NET_RX_DROP;
Al Viro252e33462006-11-14 20:48:11 -08002377 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378
Eric Dumazet81bbb3d2009-09-30 16:42:42 -07002379 if (!skb->tstamp.tv64)
2380 net_timestamp(skb);
2381
Eric Dumazet05423b22009-10-26 18:40:35 -07002382 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002383 return NET_RX_SUCCESS;
2384
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002386 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387 return NET_RX_DROP;
2388
Patrick McHardyc01003c2007-03-29 11:46:52 -07002389 if (!skb->iif)
2390 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002391
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002392 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002393 orig_dev = skb->dev;
2394 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002395 if (skb_bond_should_drop(skb))
2396 null_or_orig = orig_dev; /* deliver only exact match */
2397 else
2398 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002399 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002400
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 __get_cpu_var(netdev_rx_stat).total++;
2402
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002403 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002404 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002405 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406
2407 pt_prev = NULL;
2408
2409 rcu_read_lock();
2410
2411#ifdef CONFIG_NET_CLS_ACT
2412 if (skb->tc_verd & TC_NCLS) {
2413 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2414 goto ncls;
2415 }
2416#endif
2417
2418 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002419 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2420 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002421 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002422 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 pt_prev = ptype;
2424 }
2425 }
2426
2427#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002428 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2429 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431ncls:
2432#endif
2433
Stephen Hemminger6229e362007-03-21 13:38:47 -07002434 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2435 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002437 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2438 if (!skb)
2439 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440
2441 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002442 list_for_each_entry_rcu(ptype,
2443 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 if (ptype->type == type &&
Joe Eykholtf9823072008-07-02 18:22:02 -07002445 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2446 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002447 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002448 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 pt_prev = ptype;
2450 }
2451 }
2452
2453 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002454 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 } else {
2456 kfree_skb(skb);
2457 /* Jamal, now you will not able to escape explaining
2458 * me how you were going to use this. :-)
2459 */
2460 ret = NET_RX_DROP;
2461 }
2462
2463out:
2464 rcu_read_unlock();
2465 return ret;
2466}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002467EXPORT_SYMBOL(netif_receive_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002469/* Network device is going away, flush any packets still pending */
2470static void flush_backlog(void *arg)
2471{
2472 struct net_device *dev = arg;
2473 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2474 struct sk_buff *skb, *tmp;
2475
2476 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2477 if (skb->dev == dev) {
2478 __skb_unlink(skb, &queue->input_pkt_queue);
2479 kfree_skb(skb);
2480 }
2481}
2482
Herbert Xud565b0a2008-12-15 23:38:52 -08002483static int napi_gro_complete(struct sk_buff *skb)
2484{
2485 struct packet_type *ptype;
2486 __be16 type = skb->protocol;
2487 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2488 int err = -ENOENT;
2489
Herbert Xufc59f9a2009-04-14 15:11:06 -07002490 if (NAPI_GRO_CB(skb)->count == 1) {
2491 skb_shinfo(skb)->gso_size = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002492 goto out;
Herbert Xufc59f9a2009-04-14 15:11:06 -07002493 }
Herbert Xud565b0a2008-12-15 23:38:52 -08002494
2495 rcu_read_lock();
2496 list_for_each_entry_rcu(ptype, head, list) {
2497 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
2498 continue;
2499
2500 err = ptype->gro_complete(skb);
2501 break;
2502 }
2503 rcu_read_unlock();
2504
2505 if (err) {
2506 WARN_ON(&ptype->list == head);
2507 kfree_skb(skb);
2508 return NET_RX_SUCCESS;
2509 }
2510
2511out:
Herbert Xud565b0a2008-12-15 23:38:52 -08002512 return netif_receive_skb(skb);
2513}
2514
2515void napi_gro_flush(struct napi_struct *napi)
2516{
2517 struct sk_buff *skb, *next;
2518
2519 for (skb = napi->gro_list; skb; skb = next) {
2520 next = skb->next;
2521 skb->next = NULL;
2522 napi_gro_complete(skb);
2523 }
2524
Herbert Xu4ae55442009-02-08 18:00:36 +00002525 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002526 napi->gro_list = NULL;
2527}
2528EXPORT_SYMBOL(napi_gro_flush);
2529
Ben Hutchings5b252f02009-10-29 07:17:09 +00002530enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xud565b0a2008-12-15 23:38:52 -08002531{
2532 struct sk_buff **pp = NULL;
2533 struct packet_type *ptype;
2534 __be16 type = skb->protocol;
2535 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
Herbert Xu0da2afd52008-12-26 14:57:42 -08002536 int same_flow;
Herbert Xud565b0a2008-12-15 23:38:52 -08002537 int mac_len;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002538 enum gro_result ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08002539
2540 if (!(skb->dev->features & NETIF_F_GRO))
2541 goto normal;
2542
David S. Miller4cf704f2009-06-09 00:18:51 -07002543 if (skb_is_gso(skb) || skb_has_frags(skb))
Herbert Xuf17f5c92009-01-14 14:36:12 -08002544 goto normal;
2545
Herbert Xud565b0a2008-12-15 23:38:52 -08002546 rcu_read_lock();
2547 list_for_each_entry_rcu(ptype, head, list) {
Herbert Xud565b0a2008-12-15 23:38:52 -08002548 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2549 continue;
2550
Herbert Xu86911732009-01-29 14:19:50 +00002551 skb_set_network_header(skb, skb_gro_offset(skb));
Herbert Xud565b0a2008-12-15 23:38:52 -08002552 mac_len = skb->network_header - skb->mac_header;
2553 skb->mac_len = mac_len;
2554 NAPI_GRO_CB(skb)->same_flow = 0;
2555 NAPI_GRO_CB(skb)->flush = 0;
Herbert Xu5d38a072009-01-04 16:13:40 -08002556 NAPI_GRO_CB(skb)->free = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002557
Herbert Xud565b0a2008-12-15 23:38:52 -08002558 pp = ptype->gro_receive(&napi->gro_list, skb);
2559 break;
2560 }
2561 rcu_read_unlock();
2562
2563 if (&ptype->list == head)
2564 goto normal;
2565
Herbert Xu0da2afd52008-12-26 14:57:42 -08002566 same_flow = NAPI_GRO_CB(skb)->same_flow;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002567 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
Herbert Xu0da2afd52008-12-26 14:57:42 -08002568
Herbert Xud565b0a2008-12-15 23:38:52 -08002569 if (pp) {
2570 struct sk_buff *nskb = *pp;
2571
2572 *pp = nskb->next;
2573 nskb->next = NULL;
2574 napi_gro_complete(nskb);
Herbert Xu4ae55442009-02-08 18:00:36 +00002575 napi->gro_count--;
Herbert Xud565b0a2008-12-15 23:38:52 -08002576 }
2577
Herbert Xu0da2afd52008-12-26 14:57:42 -08002578 if (same_flow)
Herbert Xud565b0a2008-12-15 23:38:52 -08002579 goto ok;
2580
Herbert Xu4ae55442009-02-08 18:00:36 +00002581 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
Herbert Xud565b0a2008-12-15 23:38:52 -08002582 goto normal;
Herbert Xud565b0a2008-12-15 23:38:52 -08002583
Herbert Xu4ae55442009-02-08 18:00:36 +00002584 napi->gro_count++;
Herbert Xud565b0a2008-12-15 23:38:52 -08002585 NAPI_GRO_CB(skb)->count = 1;
Herbert Xu86911732009-01-29 14:19:50 +00002586 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002587 skb->next = napi->gro_list;
2588 napi->gro_list = skb;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002589 ret = GRO_HELD;
Herbert Xud565b0a2008-12-15 23:38:52 -08002590
Herbert Xuad0f99042009-02-01 01:24:55 -08002591pull:
Herbert Xucb189782009-05-26 18:50:31 +00002592 if (skb_headlen(skb) < skb_gro_offset(skb)) {
2593 int grow = skb_gro_offset(skb) - skb_headlen(skb);
2594
2595 BUG_ON(skb->end - skb->tail < grow);
2596
2597 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
2598
2599 skb->tail += grow;
2600 skb->data_len -= grow;
2601
2602 skb_shinfo(skb)->frags[0].page_offset += grow;
2603 skb_shinfo(skb)->frags[0].size -= grow;
2604
2605 if (unlikely(!skb_shinfo(skb)->frags[0].size)) {
2606 put_page(skb_shinfo(skb)->frags[0].page);
2607 memmove(skb_shinfo(skb)->frags,
2608 skb_shinfo(skb)->frags + 1,
2609 --skb_shinfo(skb)->nr_frags);
2610 }
Herbert Xuad0f99042009-02-01 01:24:55 -08002611 }
2612
Herbert Xud565b0a2008-12-15 23:38:52 -08002613ok:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002614 return ret;
Herbert Xud565b0a2008-12-15 23:38:52 -08002615
2616normal:
Herbert Xuad0f99042009-02-01 01:24:55 -08002617 ret = GRO_NORMAL;
2618 goto pull;
Herbert Xu5d38a072009-01-04 16:13:40 -08002619}
Herbert Xu96e93ea2009-01-06 10:49:34 -08002620EXPORT_SYMBOL(dev_gro_receive);
2621
Ben Hutchings5b252f02009-10-29 07:17:09 +00002622static gro_result_t
2623__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu96e93ea2009-01-06 10:49:34 -08002624{
2625 struct sk_buff *p;
2626
Herbert Xud1c76af2009-03-16 10:50:02 -07002627 if (netpoll_rx_on(skb))
2628 return GRO_NORMAL;
2629
Herbert Xu96e93ea2009-01-06 10:49:34 -08002630 for (p = napi->gro_list; p; p = p->next) {
Stephen Hemmingerf2bde732009-04-01 11:20:20 +00002631 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2632 && !compare_ether_header(skb_mac_header(p),
2633 skb_gro_mac_header(skb));
Herbert Xu96e93ea2009-01-06 10:49:34 -08002634 NAPI_GRO_CB(p)->flush = 0;
2635 }
2636
2637 return dev_gro_receive(napi, skb);
2638}
Herbert Xu5d38a072009-01-04 16:13:40 -08002639
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002640gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
Herbert Xu5d38a072009-01-04 16:13:40 -08002641{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002642 switch (ret) {
2643 case GRO_NORMAL:
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002644 if (netif_receive_skb(skb))
2645 ret = GRO_DROP;
2646 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08002647
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002648 case GRO_DROP:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002649 case GRO_MERGED_FREE:
Herbert Xu5d38a072009-01-04 16:13:40 -08002650 kfree_skb(skb);
2651 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002652
2653 case GRO_HELD:
2654 case GRO_MERGED:
2655 break;
Herbert Xu5d38a072009-01-04 16:13:40 -08002656 }
2657
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002658 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002659}
2660EXPORT_SYMBOL(napi_skb_finish);
2661
Herbert Xu78a478d2009-05-26 18:50:21 +00002662void skb_gro_reset_offset(struct sk_buff *skb)
2663{
2664 NAPI_GRO_CB(skb)->data_offset = 0;
2665 NAPI_GRO_CB(skb)->frag0 = NULL;
Herbert Xu74895942009-05-26 18:50:27 +00002666 NAPI_GRO_CB(skb)->frag0_len = 0;
Herbert Xu78a478d2009-05-26 18:50:21 +00002667
Herbert Xu78d3fd02009-05-26 18:50:23 +00002668 if (skb->mac_header == skb->tail &&
Herbert Xu74895942009-05-26 18:50:27 +00002669 !PageHighMem(skb_shinfo(skb)->frags[0].page)) {
Herbert Xu78a478d2009-05-26 18:50:21 +00002670 NAPI_GRO_CB(skb)->frag0 =
2671 page_address(skb_shinfo(skb)->frags[0].page) +
2672 skb_shinfo(skb)->frags[0].page_offset;
Herbert Xu74895942009-05-26 18:50:27 +00002673 NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size;
2674 }
Herbert Xu78a478d2009-05-26 18:50:21 +00002675}
2676EXPORT_SYMBOL(skb_gro_reset_offset);
2677
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002678gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002679{
Herbert Xu86911732009-01-29 14:19:50 +00002680 skb_gro_reset_offset(skb);
2681
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002682 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
Herbert Xud565b0a2008-12-15 23:38:52 -08002683}
2684EXPORT_SYMBOL(napi_gro_receive);
2685
Herbert Xu96e93ea2009-01-06 10:49:34 -08002686void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
2687{
Herbert Xu96e93ea2009-01-06 10:49:34 -08002688 __skb_pull(skb, skb_headlen(skb));
2689 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
2690
2691 napi->skb = skb;
2692}
2693EXPORT_SYMBOL(napi_reuse_skb);
2694
Herbert Xu76620aa2009-04-16 02:02:07 -07002695struct sk_buff *napi_get_frags(struct napi_struct *napi)
Herbert Xu5d38a072009-01-04 16:13:40 -08002696{
Herbert Xu5d38a072009-01-04 16:13:40 -08002697 struct sk_buff *skb = napi->skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08002698
2699 if (!skb) {
Eric Dumazet89d71a62009-10-13 05:34:20 +00002700 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
2701 if (skb)
2702 napi->skb = skb;
Herbert Xu5d38a072009-01-04 16:13:40 -08002703 }
Herbert Xu96e93ea2009-01-06 10:49:34 -08002704 return skb;
2705}
Herbert Xu76620aa2009-04-16 02:02:07 -07002706EXPORT_SYMBOL(napi_get_frags);
Herbert Xu96e93ea2009-01-06 10:49:34 -08002707
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002708gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
2709 gro_result_t ret)
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002710{
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002711 switch (ret) {
2712 case GRO_NORMAL:
Herbert Xu86911732009-01-29 14:19:50 +00002713 case GRO_HELD:
Herbert Xu86911732009-01-29 14:19:50 +00002714 skb->protocol = eth_type_trans(skb, napi->dev);
2715
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002716 if (ret == GRO_HELD)
2717 skb_gro_pull(skb, -ETH_HLEN);
2718 else if (netif_receive_skb(skb))
2719 ret = GRO_DROP;
Herbert Xu86911732009-01-29 14:19:50 +00002720 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002721
2722 case GRO_DROP:
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002723 case GRO_MERGED_FREE:
2724 napi_reuse_skb(napi, skb);
2725 break;
Ben Hutchings5b252f02009-10-29 07:17:09 +00002726
2727 case GRO_MERGED:
2728 break;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002729 }
2730
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002731 return ret;
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002732}
2733EXPORT_SYMBOL(napi_frags_finish);
2734
Herbert Xu76620aa2009-04-16 02:02:07 -07002735struct sk_buff *napi_frags_skb(struct napi_struct *napi)
Herbert Xu96e93ea2009-01-06 10:49:34 -08002736{
Herbert Xu76620aa2009-04-16 02:02:07 -07002737 struct sk_buff *skb = napi->skb;
2738 struct ethhdr *eth;
Herbert Xua5b1cf22009-05-26 18:50:28 +00002739 unsigned int hlen;
2740 unsigned int off;
Herbert Xu76620aa2009-04-16 02:02:07 -07002741
2742 napi->skb = NULL;
2743
2744 skb_reset_mac_header(skb);
2745 skb_gro_reset_offset(skb);
2746
Herbert Xua5b1cf22009-05-26 18:50:28 +00002747 off = skb_gro_offset(skb);
2748 hlen = off + sizeof(*eth);
2749 eth = skb_gro_header_fast(skb, off);
2750 if (skb_gro_header_hard(skb, hlen)) {
2751 eth = skb_gro_header_slow(skb, hlen, off);
2752 if (unlikely(!eth)) {
2753 napi_reuse_skb(napi, skb);
2754 skb = NULL;
2755 goto out;
2756 }
Herbert Xu76620aa2009-04-16 02:02:07 -07002757 }
2758
2759 skb_gro_pull(skb, sizeof(*eth));
2760
2761 /*
2762 * This works because the only protocols we care about don't require
2763 * special handling. We'll fix it up properly at the end.
2764 */
2765 skb->protocol = eth->h_proto;
2766
2767out:
2768 return skb;
2769}
2770EXPORT_SYMBOL(napi_frags_skb);
2771
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002772gro_result_t napi_gro_frags(struct napi_struct *napi)
Herbert Xu76620aa2009-04-16 02:02:07 -07002773{
2774 struct sk_buff *skb = napi_frags_skb(napi);
Herbert Xu96e93ea2009-01-06 10:49:34 -08002775
2776 if (!skb)
Ben Hutchingsc7c4b3b2009-10-29 21:36:53 -07002777 return GRO_DROP;
Herbert Xu96e93ea2009-01-06 10:49:34 -08002778
Herbert Xu5d0d9be2009-01-29 14:19:48 +00002779 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
Herbert Xu5d38a072009-01-04 16:13:40 -08002780}
2781EXPORT_SYMBOL(napi_gro_frags);
2782
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002783static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002784{
2785 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002786 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2787 unsigned long start_time = jiffies;
2788
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002789 napi->weight = weight_p;
2790 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002791 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792
2793 local_irq_disable();
2794 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002795 if (!skb) {
Herbert Xu8f1ead22009-03-26 00:59:10 -07002796 __napi_complete(napi);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002797 local_irq_enable();
Herbert Xu8f1ead22009-03-26 00:59:10 -07002798 break;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002799 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002800 local_irq_enable();
2801
Herbert Xu8f1ead22009-03-26 00:59:10 -07002802 netif_receive_skb(skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002803 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002805 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002806}
2807
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002808/**
2809 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002810 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002811 *
2812 * The entry's receive function will be scheduled to run
2813 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002814void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002815{
2816 unsigned long flags;
2817
2818 local_irq_save(flags);
2819 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2820 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2821 local_irq_restore(flags);
2822}
2823EXPORT_SYMBOL(__napi_schedule);
2824
Herbert Xud565b0a2008-12-15 23:38:52 -08002825void __napi_complete(struct napi_struct *n)
2826{
2827 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
2828 BUG_ON(n->gro_list);
2829
2830 list_del(&n->poll_list);
2831 smp_mb__before_clear_bit();
2832 clear_bit(NAPI_STATE_SCHED, &n->state);
2833}
2834EXPORT_SYMBOL(__napi_complete);
2835
2836void napi_complete(struct napi_struct *n)
2837{
2838 unsigned long flags;
2839
2840 /*
2841 * don't let napi dequeue from the cpu poll list
2842 * just in case its running on a different cpu
2843 */
2844 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
2845 return;
2846
2847 napi_gro_flush(n);
2848 local_irq_save(flags);
2849 __napi_complete(n);
2850 local_irq_restore(flags);
2851}
2852EXPORT_SYMBOL(napi_complete);
2853
2854void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2855 int (*poll)(struct napi_struct *, int), int weight)
2856{
2857 INIT_LIST_HEAD(&napi->poll_list);
Herbert Xu4ae55442009-02-08 18:00:36 +00002858 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002859 napi->gro_list = NULL;
Herbert Xu5d38a072009-01-04 16:13:40 -08002860 napi->skb = NULL;
Herbert Xud565b0a2008-12-15 23:38:52 -08002861 napi->poll = poll;
2862 napi->weight = weight;
2863 list_add(&napi->dev_list, &dev->napi_list);
Herbert Xud565b0a2008-12-15 23:38:52 -08002864 napi->dev = dev;
Herbert Xu5d38a072009-01-04 16:13:40 -08002865#ifdef CONFIG_NETPOLL
Herbert Xud565b0a2008-12-15 23:38:52 -08002866 spin_lock_init(&napi->poll_lock);
2867 napi->poll_owner = -1;
2868#endif
2869 set_bit(NAPI_STATE_SCHED, &napi->state);
2870}
2871EXPORT_SYMBOL(netif_napi_add);
2872
2873void netif_napi_del(struct napi_struct *napi)
2874{
2875 struct sk_buff *skb, *next;
2876
Peter P Waskiewicz Jrd7b06632008-12-26 01:35:35 -08002877 list_del_init(&napi->dev_list);
Herbert Xu76620aa2009-04-16 02:02:07 -07002878 napi_free_frags(napi);
Herbert Xud565b0a2008-12-15 23:38:52 -08002879
2880 for (skb = napi->gro_list; skb; skb = next) {
2881 next = skb->next;
2882 skb->next = NULL;
2883 kfree_skb(skb);
2884 }
2885
2886 napi->gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00002887 napi->gro_count = 0;
Herbert Xud565b0a2008-12-15 23:38:52 -08002888}
2889EXPORT_SYMBOL(netif_napi_del);
2890
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002891
Linus Torvalds1da177e2005-04-16 15:20:36 -07002892static void net_rx_action(struct softirq_action *h)
2893{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002894 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002895 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002896 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002897 void *have;
2898
Linus Torvalds1da177e2005-04-16 15:20:36 -07002899 local_irq_disable();
2900
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002901 while (!list_empty(list)) {
2902 struct napi_struct *n;
2903 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002904
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002905 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002906 * Allow this to run for 2 jiffies since which will allow
2907 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002908 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002909 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002910 goto softnet_break;
2911
2912 local_irq_enable();
2913
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002914 /* Even though interrupts have been re-enabled, this
2915 * access is safe because interrupts can only add new
2916 * entries to the tail of this list, and only ->poll()
2917 * calls can remove this head entry from the list.
2918 */
2919 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002920
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002921 have = netpoll_poll_lock(n);
2922
2923 weight = n->weight;
2924
David S. Miller0a7606c2007-10-29 21:28:47 -07002925 /* This NAPI_STATE_SCHED test is for avoiding a race
2926 * with netpoll's poll_napi(). Only the entity which
2927 * obtains the lock and sees NAPI_STATE_SCHED set will
2928 * actually make the ->poll() call. Therefore we avoid
2929 * accidently calling ->poll() when NAPI is not scheduled.
2930 */
2931 work = 0;
Neil Horman4ea7e382009-05-21 07:36:08 +00002932 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
David S. Miller0a7606c2007-10-29 21:28:47 -07002933 work = n->poll(n, weight);
Neil Horman4ea7e382009-05-21 07:36:08 +00002934 trace_napi_poll(n);
2935 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002936
2937 WARN_ON_ONCE(work > weight);
2938
2939 budget -= work;
2940
2941 local_irq_disable();
2942
2943 /* Drivers must not modify the NAPI state if they
2944 * consume the entire weight. In such cases this code
2945 * still "owns" the NAPI instance and therefore can
2946 * move the instance around on the list at-will.
2947 */
David S. Millerfed17f32008-01-07 21:00:40 -08002948 if (unlikely(work == weight)) {
Herbert Xuff780cd2009-06-26 19:27:04 -07002949 if (unlikely(napi_disable_pending(n))) {
2950 local_irq_enable();
2951 napi_complete(n);
2952 local_irq_disable();
2953 } else
David S. Millerfed17f32008-01-07 21:00:40 -08002954 list_move_tail(&n->poll_list, list);
2955 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002956
2957 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002958 }
2959out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002960 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002961
Chris Leechdb217332006-06-17 21:24:58 -07002962#ifdef CONFIG_NET_DMA
2963 /*
2964 * There may not be any more sk_buffs coming right now, so push
2965 * any pending DMA copies to hardware
2966 */
Dan Williams2ba05622009-01-06 11:38:14 -07002967 dma_issue_pending_all();
Chris Leechdb217332006-06-17 21:24:58 -07002968#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002969
Linus Torvalds1da177e2005-04-16 15:20:36 -07002970 return;
2971
2972softnet_break:
2973 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2974 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2975 goto out;
2976}
2977
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002978static gifconf_func_t *gifconf_list[NPROTO];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002979
2980/**
2981 * register_gifconf - register a SIOCGIF handler
2982 * @family: Address family
2983 * @gifconf: Function handler
2984 *
2985 * Register protocol dependent address dumping routines. The handler
2986 * that is passed must not be freed or reused until it has been replaced
2987 * by another handler.
2988 */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002989int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002990{
2991 if (family >= NPROTO)
2992 return -EINVAL;
2993 gifconf_list[family] = gifconf;
2994 return 0;
2995}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07002996EXPORT_SYMBOL(register_gifconf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002997
2998
2999/*
3000 * Map an interface index to its name (SIOCGIFNAME)
3001 */
3002
3003/*
3004 * We need this ioctl for efficient implementation of the
3005 * if_indextoname() function required by the IPv6 API. Without
3006 * it, we would have to search all the interfaces to find a
3007 * match. --pb
3008 */
3009
Eric W. Biederman881d9662007-09-17 11:56:21 -07003010static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003011{
3012 struct net_device *dev;
3013 struct ifreq ifr;
3014
3015 /*
3016 * Fetch the caller's info block.
3017 */
3018
3019 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3020 return -EFAULT;
3021
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003022 rcu_read_lock();
3023 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003024 if (!dev) {
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003025 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003026 return -ENODEV;
3027 }
3028
3029 strcpy(ifr.ifr_name, dev->name);
Eric Dumazetfb699dfd2009-10-19 19:18:49 +00003030 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003031
3032 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3033 return -EFAULT;
3034 return 0;
3035}
3036
3037/*
3038 * Perform a SIOCGIFCONF call. This structure will change
3039 * size eventually, and there is nothing I can do about it.
3040 * Thus we will need a 'compatibility mode'.
3041 */
3042
Eric W. Biederman881d9662007-09-17 11:56:21 -07003043static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003044{
3045 struct ifconf ifc;
3046 struct net_device *dev;
3047 char __user *pos;
3048 int len;
3049 int total;
3050 int i;
3051
3052 /*
3053 * Fetch the caller's info block.
3054 */
3055
3056 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3057 return -EFAULT;
3058
3059 pos = ifc.ifc_buf;
3060 len = ifc.ifc_len;
3061
3062 /*
3063 * Loop over the interfaces, and write an info block for each.
3064 */
3065
3066 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003067 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003068 for (i = 0; i < NPROTO; i++) {
3069 if (gifconf_list[i]) {
3070 int done;
3071 if (!pos)
3072 done = gifconf_list[i](dev, NULL, 0);
3073 else
3074 done = gifconf_list[i](dev, pos + total,
3075 len - total);
3076 if (done < 0)
3077 return -EFAULT;
3078 total += done;
3079 }
3080 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003081 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003082
3083 /*
3084 * All done. Write the updated control block back to the caller.
3085 */
3086 ifc.ifc_len = total;
3087
3088 /*
3089 * Both BSD and Solaris return 0 here, so we do too.
3090 */
3091 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3092}
3093
3094#ifdef CONFIG_PROC_FS
3095/*
3096 * This is invoked by the /proc filesystem handler to display a device
3097 * in detail.
3098 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003099void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003100 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003101{
Denis V. Luneve372c412007-11-19 22:31:54 -08003102 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07003103 loff_t off;
3104 struct net_device *dev;
3105
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003106 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07003107 if (!*pos)
3108 return SEQ_START_TOKEN;
3109
3110 off = 1;
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003111 for_each_netdev_rcu(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07003112 if (off++ == *pos)
3113 return dev;
3114
3115 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003116}
3117
3118void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3119{
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003120 struct net_device *dev = (v == SEQ_START_TOKEN) ?
3121 first_net_device(seq_file_net(seq)) :
3122 next_net_device((struct net_device *)v);
3123
Linus Torvalds1da177e2005-04-16 15:20:36 -07003124 ++*pos;
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003125 return rcu_dereference(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003126}
3127
3128void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003129 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003130{
Eric Dumazetc6d14c82009-11-04 05:43:23 -08003131 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003132}
3133
3134static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3135{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08003136 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003137
Rusty Russell5a1b5892007-04-28 21:04:03 -07003138 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3139 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3140 dev->name, stats->rx_bytes, stats->rx_packets,
3141 stats->rx_errors,
3142 stats->rx_dropped + stats->rx_missed_errors,
3143 stats->rx_fifo_errors,
3144 stats->rx_length_errors + stats->rx_over_errors +
3145 stats->rx_crc_errors + stats->rx_frame_errors,
3146 stats->rx_compressed, stats->multicast,
3147 stats->tx_bytes, stats->tx_packets,
3148 stats->tx_errors, stats->tx_dropped,
3149 stats->tx_fifo_errors, stats->collisions,
3150 stats->tx_carrier_errors +
3151 stats->tx_aborted_errors +
3152 stats->tx_window_errors +
3153 stats->tx_heartbeat_errors,
3154 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003155}
3156
3157/*
3158 * Called from the PROCfs module. This now uses the new arbitrary sized
3159 * /proc/net interface to create /proc/net/dev
3160 */
3161static int dev_seq_show(struct seq_file *seq, void *v)
3162{
3163 if (v == SEQ_START_TOKEN)
3164 seq_puts(seq, "Inter-| Receive "
3165 " | Transmit\n"
3166 " face |bytes packets errs drop fifo frame "
3167 "compressed multicast|bytes packets errs "
3168 "drop fifo colls carrier compressed\n");
3169 else
3170 dev_seq_printf_stats(seq, v);
3171 return 0;
3172}
3173
3174static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3175{
3176 struct netif_rx_stats *rc = NULL;
3177
Mike Travis0c0b0ac2008-05-02 16:43:08 -07003178 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003179 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003180 rc = &per_cpu(netdev_rx_stat, *pos);
3181 break;
3182 } else
3183 ++*pos;
3184 return rc;
3185}
3186
3187static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3188{
3189 return softnet_get_online(pos);
3190}
3191
3192static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3193{
3194 ++*pos;
3195 return softnet_get_online(pos);
3196}
3197
3198static void softnet_seq_stop(struct seq_file *seq, void *v)
3199{
3200}
3201
3202static int softnet_seq_show(struct seq_file *seq, void *v)
3203{
3204 struct netif_rx_stats *s = v;
3205
3206 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07003207 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07003208 0, 0, 0, 0, /* was fastroute */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003209 s->cpu_collision);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003210 return 0;
3211}
3212
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003213static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003214 .start = dev_seq_start,
3215 .next = dev_seq_next,
3216 .stop = dev_seq_stop,
3217 .show = dev_seq_show,
3218};
3219
3220static int dev_seq_open(struct inode *inode, struct file *file)
3221{
Denis V. Luneve372c412007-11-19 22:31:54 -08003222 return seq_open_net(inode, file, &dev_seq_ops,
3223 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003224}
3225
Arjan van de Ven9a321442007-02-12 00:55:35 -08003226static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003227 .owner = THIS_MODULE,
3228 .open = dev_seq_open,
3229 .read = seq_read,
3230 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08003231 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003232};
3233
Stephen Hemmingerf6908082007-03-12 14:34:29 -07003234static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003235 .start = softnet_seq_start,
3236 .next = softnet_seq_next,
3237 .stop = softnet_seq_stop,
3238 .show = softnet_seq_show,
3239};
3240
3241static int softnet_seq_open(struct inode *inode, struct file *file)
3242{
3243 return seq_open(file, &softnet_seq_ops);
3244}
3245
Arjan van de Ven9a321442007-02-12 00:55:35 -08003246static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003247 .owner = THIS_MODULE,
3248 .open = softnet_seq_open,
3249 .read = seq_read,
3250 .llseek = seq_lseek,
3251 .release = seq_release,
3252};
3253
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003254static void *ptype_get_idx(loff_t pos)
3255{
3256 struct packet_type *pt = NULL;
3257 loff_t i = 0;
3258 int t;
3259
3260 list_for_each_entry_rcu(pt, &ptype_all, list) {
3261 if (i == pos)
3262 return pt;
3263 ++i;
3264 }
3265
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003266 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003267 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3268 if (i == pos)
3269 return pt;
3270 ++i;
3271 }
3272 }
3273 return NULL;
3274}
3275
3276static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a422008-01-21 02:27:29 -08003277 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003278{
3279 rcu_read_lock();
3280 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3281}
3282
3283static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3284{
3285 struct packet_type *pt;
3286 struct list_head *nxt;
3287 int hash;
3288
3289 ++*pos;
3290 if (v == SEQ_START_TOKEN)
3291 return ptype_get_idx(0);
3292
3293 pt = v;
3294 nxt = pt->list.next;
3295 if (pt->type == htons(ETH_P_ALL)) {
3296 if (nxt != &ptype_all)
3297 goto found;
3298 hash = 0;
3299 nxt = ptype_base[0].next;
3300 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003301 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003302
3303 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08003304 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003305 return NULL;
3306 nxt = ptype_base[hash].next;
3307 }
3308found:
3309 return list_entry(nxt, struct packet_type, list);
3310}
3311
3312static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a422008-01-21 02:27:29 -08003313 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003314{
3315 rcu_read_unlock();
3316}
3317
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003318static int ptype_seq_show(struct seq_file *seq, void *v)
3319{
3320 struct packet_type *pt = v;
3321
3322 if (v == SEQ_START_TOKEN)
3323 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003324 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003325 if (pt->type == htons(ETH_P_ALL))
3326 seq_puts(seq, "ALL ");
3327 else
3328 seq_printf(seq, "%04x", ntohs(pt->type));
3329
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08003330 seq_printf(seq, " %-8s %pF\n",
3331 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003332 }
3333
3334 return 0;
3335}
3336
3337static const struct seq_operations ptype_seq_ops = {
3338 .start = ptype_seq_start,
3339 .next = ptype_seq_next,
3340 .stop = ptype_seq_stop,
3341 .show = ptype_seq_show,
3342};
3343
3344static int ptype_seq_open(struct inode *inode, struct file *file)
3345{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003346 return seq_open_net(inode, file, &ptype_seq_ops,
3347 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003348}
3349
3350static const struct file_operations ptype_seq_fops = {
3351 .owner = THIS_MODULE,
3352 .open = ptype_seq_open,
3353 .read = seq_read,
3354 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07003355 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003356};
3357
3358
Pavel Emelyanov46650792007-10-08 20:38:39 -07003359static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003360{
3361 int rc = -ENOMEM;
3362
Eric W. Biederman881d9662007-09-17 11:56:21 -07003363 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003364 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003365 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003366 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003367 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003368 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07003369
Eric W. Biederman881d9662007-09-17 11:56:21 -07003370 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003371 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003372 rc = 0;
3373out:
3374 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02003375out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003376 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003377out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003378 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003379out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003380 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003381 goto out;
3382}
Eric W. Biederman881d9662007-09-17 11:56:21 -07003383
Pavel Emelyanov46650792007-10-08 20:38:39 -07003384static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003385{
3386 wext_proc_exit(net);
3387
3388 proc_net_remove(net, "ptype");
3389 proc_net_remove(net, "softnet_stat");
3390 proc_net_remove(net, "dev");
3391}
3392
Denis V. Lunev022cbae2007-11-13 03:23:50 -08003393static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003394 .init = dev_proc_net_init,
3395 .exit = dev_proc_net_exit,
3396};
3397
3398static int __init dev_proc_init(void)
3399{
3400 return register_pernet_subsys(&dev_proc_ops);
3401}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003402#else
3403#define dev_proc_init() 0
3404#endif /* CONFIG_PROC_FS */
3405
3406
3407/**
3408 * netdev_set_master - set up master/slave pair
3409 * @slave: slave device
3410 * @master: new master device
3411 *
3412 * Changes the master device of the slave. Pass %NULL to break the
3413 * bonding. The caller must hold the RTNL semaphore. On a failure
3414 * a negative errno code is returned. On success the reference counts
3415 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
3416 * function returns zero.
3417 */
3418int netdev_set_master(struct net_device *slave, struct net_device *master)
3419{
3420 struct net_device *old = slave->master;
3421
3422 ASSERT_RTNL();
3423
3424 if (master) {
3425 if (old)
3426 return -EBUSY;
3427 dev_hold(master);
3428 }
3429
3430 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003431
Linus Torvalds1da177e2005-04-16 15:20:36 -07003432 synchronize_net();
3433
3434 if (old)
3435 dev_put(old);
3436
3437 if (master)
3438 slave->flags |= IFF_SLAVE;
3439 else
3440 slave->flags &= ~IFF_SLAVE;
3441
3442 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
3443 return 0;
3444}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003445EXPORT_SYMBOL(netdev_set_master);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003446
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003447static void dev_change_rx_flags(struct net_device *dev, int flags)
3448{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003449 const struct net_device_ops *ops = dev->netdev_ops;
3450
3451 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
3452 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003453}
3454
Wang Chendad9b332008-06-18 01:48:28 -07003455static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07003456{
3457 unsigned short old_flags = dev->flags;
David Howells8192b0c2008-11-14 10:39:10 +11003458 uid_t uid;
3459 gid_t gid;
Patrick McHardy4417da62007-06-27 01:28:10 -07003460
Patrick McHardy24023452007-07-14 18:51:31 -07003461 ASSERT_RTNL();
3462
Wang Chendad9b332008-06-18 01:48:28 -07003463 dev->flags |= IFF_PROMISC;
3464 dev->promiscuity += inc;
3465 if (dev->promiscuity == 0) {
3466 /*
3467 * Avoid overflow.
3468 * If inc causes overflow, untouch promisc and return error.
3469 */
3470 if (inc < 0)
3471 dev->flags &= ~IFF_PROMISC;
3472 else {
3473 dev->promiscuity -= inc;
3474 printk(KERN_WARNING "%s: promiscuity touches roof, "
3475 "set promiscuity failed, promiscuity feature "
3476 "of device might be broken.\n", dev->name);
3477 return -EOVERFLOW;
3478 }
3479 }
Patrick McHardy4417da62007-06-27 01:28:10 -07003480 if (dev->flags != old_flags) {
3481 printk(KERN_INFO "device %s %s promiscuous mode\n",
3482 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
3483 "left");
David Howells8192b0c2008-11-14 10:39:10 +11003484 if (audit_enabled) {
3485 current_uid_gid(&uid, &gid);
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003486 audit_log(current->audit_context, GFP_ATOMIC,
3487 AUDIT_ANOM_PROMISCUOUS,
3488 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
3489 dev->name, (dev->flags & IFF_PROMISC),
3490 (old_flags & IFF_PROMISC),
3491 audit_get_loginuid(current),
David Howells8192b0c2008-11-14 10:39:10 +11003492 uid, gid,
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05003493 audit_get_sessionid(current));
David Howells8192b0c2008-11-14 10:39:10 +11003494 }
Patrick McHardy24023452007-07-14 18:51:31 -07003495
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003496 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07003497 }
Wang Chendad9b332008-06-18 01:48:28 -07003498 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003499}
3500
Linus Torvalds1da177e2005-04-16 15:20:36 -07003501/**
3502 * dev_set_promiscuity - update promiscuity count on a device
3503 * @dev: device
3504 * @inc: modifier
3505 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07003506 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003507 * remains above zero the interface remains promiscuous. Once it hits zero
3508 * the device reverts back to normal filtering operation. A negative inc
3509 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07003510 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003511 */
Wang Chendad9b332008-06-18 01:48:28 -07003512int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003513{
3514 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07003515 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003516
Wang Chendad9b332008-06-18 01:48:28 -07003517 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003518 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003519 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003520 if (dev->flags != old_flags)
3521 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003522 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003523}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003524EXPORT_SYMBOL(dev_set_promiscuity);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003525
3526/**
3527 * dev_set_allmulti - update allmulti count on a device
3528 * @dev: device
3529 * @inc: modifier
3530 *
3531 * Add or remove reception of all multicast frames to a device. While the
3532 * count in the device remains above zero the interface remains listening
3533 * to all interfaces. Once it hits zero the device reverts back to normal
3534 * filtering operation. A negative @inc value is used to drop the counter
3535 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003536 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003537 */
3538
Wang Chendad9b332008-06-18 01:48:28 -07003539int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003540{
3541 unsigned short old_flags = dev->flags;
3542
Patrick McHardy24023452007-07-14 18:51:31 -07003543 ASSERT_RTNL();
3544
Linus Torvalds1da177e2005-04-16 15:20:36 -07003545 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003546 dev->allmulti += inc;
3547 if (dev->allmulti == 0) {
3548 /*
3549 * Avoid overflow.
3550 * If inc causes overflow, untouch allmulti and return error.
3551 */
3552 if (inc < 0)
3553 dev->flags &= ~IFF_ALLMULTI;
3554 else {
3555 dev->allmulti -= inc;
3556 printk(KERN_WARNING "%s: allmulti touches roof, "
3557 "set allmulti failed, allmulti feature of "
3558 "device might be broken.\n", dev->name);
3559 return -EOVERFLOW;
3560 }
3561 }
Patrick McHardy24023452007-07-14 18:51:31 -07003562 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003563 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003564 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003565 }
Wang Chendad9b332008-06-18 01:48:28 -07003566 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003567}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07003568EXPORT_SYMBOL(dev_set_allmulti);
Patrick McHardy4417da62007-06-27 01:28:10 -07003569
3570/*
3571 * Upload unicast and multicast address lists to device and
3572 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003573 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003574 * are present.
3575 */
3576void __dev_set_rx_mode(struct net_device *dev)
3577{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003578 const struct net_device_ops *ops = dev->netdev_ops;
3579
Patrick McHardy4417da62007-06-27 01:28:10 -07003580 /* dev_open will call this function so the list will stay sane. */
3581 if (!(dev->flags&IFF_UP))
3582 return;
3583
3584 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003585 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003586
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003587 if (ops->ndo_set_rx_mode)
3588 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003589 else {
3590 /* Unicast addresses changes may only happen under the rtnl,
3591 * therefore calling __dev_set_promiscuity here is safe.
3592 */
Jiri Pirko31278e72009-06-17 01:12:19 +00003593 if (dev->uc.count > 0 && !dev->uc_promisc) {
Patrick McHardy4417da62007-06-27 01:28:10 -07003594 __dev_set_promiscuity(dev, 1);
3595 dev->uc_promisc = 1;
Jiri Pirko31278e72009-06-17 01:12:19 +00003596 } else if (dev->uc.count == 0 && dev->uc_promisc) {
Patrick McHardy4417da62007-06-27 01:28:10 -07003597 __dev_set_promiscuity(dev, -1);
3598 dev->uc_promisc = 0;
3599 }
3600
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003601 if (ops->ndo_set_multicast_list)
3602 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003603 }
3604}
3605
3606void dev_set_rx_mode(struct net_device *dev)
3607{
David S. Millerb9e40852008-07-15 00:15:08 -07003608 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003609 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003610 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003611}
3612
Jiri Pirkof001fde2009-05-05 02:48:28 +00003613/* hw addresses list handling functions */
3614
Jiri Pirko31278e72009-06-17 01:12:19 +00003615static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
3616 int addr_len, unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003617{
3618 struct netdev_hw_addr *ha;
3619 int alloc_size;
3620
3621 if (addr_len > MAX_ADDR_LEN)
3622 return -EINVAL;
3623
Jiri Pirko31278e72009-06-17 01:12:19 +00003624 list_for_each_entry(ha, &list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003625 if (!memcmp(ha->addr, addr, addr_len) &&
3626 ha->type == addr_type) {
3627 ha->refcount++;
3628 return 0;
3629 }
3630 }
3631
3632
Jiri Pirkof001fde2009-05-05 02:48:28 +00003633 alloc_size = sizeof(*ha);
3634 if (alloc_size < L1_CACHE_BYTES)
3635 alloc_size = L1_CACHE_BYTES;
3636 ha = kmalloc(alloc_size, GFP_ATOMIC);
3637 if (!ha)
3638 return -ENOMEM;
3639 memcpy(ha->addr, addr, addr_len);
3640 ha->type = addr_type;
Jiri Pirkoccffad252009-05-22 23:22:17 +00003641 ha->refcount = 1;
3642 ha->synced = false;
Jiri Pirko31278e72009-06-17 01:12:19 +00003643 list_add_tail_rcu(&ha->list, &list->list);
3644 list->count++;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003645 return 0;
3646}
3647
3648static void ha_rcu_free(struct rcu_head *head)
3649{
3650 struct netdev_hw_addr *ha;
3651
3652 ha = container_of(head, struct netdev_hw_addr, rcu_head);
3653 kfree(ha);
3654}
3655
Jiri Pirko31278e72009-06-17 01:12:19 +00003656static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
3657 int addr_len, unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003658{
3659 struct netdev_hw_addr *ha;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003660
Jiri Pirko31278e72009-06-17 01:12:19 +00003661 list_for_each_entry(ha, &list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003662 if (!memcmp(ha->addr, addr, addr_len) &&
Jiri Pirkof001fde2009-05-05 02:48:28 +00003663 (ha->type == addr_type || !addr_type)) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003664 if (--ha->refcount)
3665 return 0;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003666 list_del_rcu(&ha->list);
3667 call_rcu(&ha->rcu_head, ha_rcu_free);
Jiri Pirko31278e72009-06-17 01:12:19 +00003668 list->count--;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003669 return 0;
3670 }
3671 }
3672 return -ENOENT;
3673}
3674
Jiri Pirko31278e72009-06-17 01:12:19 +00003675static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
3676 struct netdev_hw_addr_list *from_list,
3677 int addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003678 unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003679{
3680 int err;
3681 struct netdev_hw_addr *ha, *ha2;
3682 unsigned char type;
3683
Jiri Pirko31278e72009-06-17 01:12:19 +00003684 list_for_each_entry(ha, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003685 type = addr_type ? addr_type : ha->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003686 err = __hw_addr_add(to_list, ha->addr, addr_len, type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003687 if (err)
3688 goto unroll;
3689 }
3690 return 0;
3691
3692unroll:
Jiri Pirko31278e72009-06-17 01:12:19 +00003693 list_for_each_entry(ha2, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003694 if (ha2 == ha)
3695 break;
3696 type = addr_type ? addr_type : ha2->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003697 __hw_addr_del(to_list, ha2->addr, addr_len, type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003698 }
3699 return err;
3700}
3701
Jiri Pirko31278e72009-06-17 01:12:19 +00003702static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
3703 struct netdev_hw_addr_list *from_list,
3704 int addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003705 unsigned char addr_type)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003706{
3707 struct netdev_hw_addr *ha;
3708 unsigned char type;
3709
Jiri Pirko31278e72009-06-17 01:12:19 +00003710 list_for_each_entry(ha, &from_list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003711 type = addr_type ? addr_type : ha->type;
Jiri Pirko31278e72009-06-17 01:12:19 +00003712 __hw_addr_del(to_list, ha->addr, addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003713 }
3714}
3715
Jiri Pirko31278e72009-06-17 01:12:19 +00003716static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
3717 struct netdev_hw_addr_list *from_list,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003718 int addr_len)
3719{
3720 int err = 0;
3721 struct netdev_hw_addr *ha, *tmp;
3722
Jiri Pirko31278e72009-06-17 01:12:19 +00003723 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003724 if (!ha->synced) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003725 err = __hw_addr_add(to_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003726 addr_len, ha->type);
3727 if (err)
3728 break;
3729 ha->synced = true;
3730 ha->refcount++;
3731 } else if (ha->refcount == 1) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003732 __hw_addr_del(to_list, ha->addr, addr_len, ha->type);
3733 __hw_addr_del(from_list, ha->addr, addr_len, ha->type);
Jiri Pirkoccffad252009-05-22 23:22:17 +00003734 }
3735 }
3736 return err;
3737}
3738
Jiri Pirko31278e72009-06-17 01:12:19 +00003739static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
3740 struct netdev_hw_addr_list *from_list,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003741 int addr_len)
3742{
3743 struct netdev_hw_addr *ha, *tmp;
3744
Jiri Pirko31278e72009-06-17 01:12:19 +00003745 list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
Jiri Pirkoccffad252009-05-22 23:22:17 +00003746 if (ha->synced) {
Jiri Pirko31278e72009-06-17 01:12:19 +00003747 __hw_addr_del(to_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003748 addr_len, ha->type);
3749 ha->synced = false;
Jiri Pirko31278e72009-06-17 01:12:19 +00003750 __hw_addr_del(from_list, ha->addr,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003751 addr_len, ha->type);
3752 }
3753 }
3754}
3755
Jiri Pirko31278e72009-06-17 01:12:19 +00003756static void __hw_addr_flush(struct netdev_hw_addr_list *list)
Jiri Pirkof001fde2009-05-05 02:48:28 +00003757{
3758 struct netdev_hw_addr *ha, *tmp;
3759
Jiri Pirko31278e72009-06-17 01:12:19 +00003760 list_for_each_entry_safe(ha, tmp, &list->list, list) {
Jiri Pirkof001fde2009-05-05 02:48:28 +00003761 list_del_rcu(&ha->list);
3762 call_rcu(&ha->rcu_head, ha_rcu_free);
3763 }
Jiri Pirko31278e72009-06-17 01:12:19 +00003764 list->count = 0;
3765}
3766
3767static void __hw_addr_init(struct netdev_hw_addr_list *list)
3768{
3769 INIT_LIST_HEAD(&list->list);
3770 list->count = 0;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003771}
3772
3773/* Device addresses handling functions */
3774
3775static void dev_addr_flush(struct net_device *dev)
3776{
3777 /* rtnl_mutex must be held here */
3778
Jiri Pirko31278e72009-06-17 01:12:19 +00003779 __hw_addr_flush(&dev->dev_addrs);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003780 dev->dev_addr = NULL;
3781}
3782
3783static int dev_addr_init(struct net_device *dev)
3784{
3785 unsigned char addr[MAX_ADDR_LEN];
3786 struct netdev_hw_addr *ha;
3787 int err;
3788
3789 /* rtnl_mutex must be held here */
3790
Jiri Pirko31278e72009-06-17 01:12:19 +00003791 __hw_addr_init(&dev->dev_addrs);
Eric Dumazet0c279222009-06-08 03:49:24 +00003792 memset(addr, 0, sizeof(addr));
Jiri Pirko31278e72009-06-17 01:12:19 +00003793 err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
Jiri Pirkof001fde2009-05-05 02:48:28 +00003794 NETDEV_HW_ADDR_T_LAN);
3795 if (!err) {
3796 /*
3797 * Get the first (previously created) address from the list
3798 * and set dev_addr pointer to this location.
3799 */
Jiri Pirko31278e72009-06-17 01:12:19 +00003800 ha = list_first_entry(&dev->dev_addrs.list,
Jiri Pirkof001fde2009-05-05 02:48:28 +00003801 struct netdev_hw_addr, list);
3802 dev->dev_addr = ha->addr;
3803 }
3804 return err;
3805}
3806
3807/**
3808 * dev_addr_add - Add a device address
3809 * @dev: device
3810 * @addr: address to add
3811 * @addr_type: address type
3812 *
3813 * Add a device address to the device or increase the reference count if
3814 * it already exists.
3815 *
3816 * The caller must hold the rtnl_mutex.
3817 */
3818int dev_addr_add(struct net_device *dev, unsigned char *addr,
3819 unsigned char addr_type)
3820{
3821 int err;
3822
3823 ASSERT_RTNL();
3824
Jiri Pirko31278e72009-06-17 01:12:19 +00003825 err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003826 if (!err)
3827 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3828 return err;
3829}
3830EXPORT_SYMBOL(dev_addr_add);
3831
3832/**
3833 * dev_addr_del - Release a device address.
3834 * @dev: device
3835 * @addr: address to delete
3836 * @addr_type: address type
3837 *
3838 * Release reference to a device address and remove it from the device
3839 * if the reference count drops to zero.
3840 *
3841 * The caller must hold the rtnl_mutex.
3842 */
3843int dev_addr_del(struct net_device *dev, unsigned char *addr,
3844 unsigned char addr_type)
3845{
3846 int err;
Jiri Pirkoccffad252009-05-22 23:22:17 +00003847 struct netdev_hw_addr *ha;
Jiri Pirkof001fde2009-05-05 02:48:28 +00003848
3849 ASSERT_RTNL();
3850
Jiri Pirkoccffad252009-05-22 23:22:17 +00003851 /*
3852 * We can not remove the first address from the list because
3853 * dev->dev_addr points to that.
3854 */
Jiri Pirko31278e72009-06-17 01:12:19 +00003855 ha = list_first_entry(&dev->dev_addrs.list,
3856 struct netdev_hw_addr, list);
Jiri Pirkoccffad252009-05-22 23:22:17 +00003857 if (ha->addr == dev->dev_addr && ha->refcount == 1)
3858 return -ENOENT;
3859
Jiri Pirko31278e72009-06-17 01:12:19 +00003860 err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003861 addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003862 if (!err)
3863 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3864 return err;
3865}
3866EXPORT_SYMBOL(dev_addr_del);
3867
3868/**
3869 * dev_addr_add_multiple - Add device addresses from another device
3870 * @to_dev: device to which addresses will be added
3871 * @from_dev: device from which addresses will be added
3872 * @addr_type: address type - 0 means type will be used from from_dev
3873 *
3874 * Add device addresses of the one device to another.
3875 **
3876 * The caller must hold the rtnl_mutex.
3877 */
3878int dev_addr_add_multiple(struct net_device *to_dev,
3879 struct net_device *from_dev,
3880 unsigned char addr_type)
3881{
3882 int err;
3883
3884 ASSERT_RTNL();
3885
3886 if (from_dev->addr_len != to_dev->addr_len)
3887 return -EINVAL;
Jiri Pirko31278e72009-06-17 01:12:19 +00003888 err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003889 to_dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003890 if (!err)
3891 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3892 return err;
3893}
3894EXPORT_SYMBOL(dev_addr_add_multiple);
3895
3896/**
3897 * dev_addr_del_multiple - Delete device addresses by another device
3898 * @to_dev: device where the addresses will be deleted
3899 * @from_dev: device by which addresses the addresses will be deleted
3900 * @addr_type: address type - 0 means type will used from from_dev
3901 *
3902 * Deletes addresses in to device by the list of addresses in from device.
3903 *
3904 * The caller must hold the rtnl_mutex.
3905 */
3906int dev_addr_del_multiple(struct net_device *to_dev,
3907 struct net_device *from_dev,
3908 unsigned char addr_type)
3909{
3910 ASSERT_RTNL();
3911
3912 if (from_dev->addr_len != to_dev->addr_len)
3913 return -EINVAL;
Jiri Pirko31278e72009-06-17 01:12:19 +00003914 __hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
Jiri Pirkoccffad252009-05-22 23:22:17 +00003915 to_dev->addr_len, addr_type);
Jiri Pirkof001fde2009-05-05 02:48:28 +00003916 call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
3917 return 0;
3918}
3919EXPORT_SYMBOL(dev_addr_del_multiple);
3920
Jiri Pirko31278e72009-06-17 01:12:19 +00003921/* multicast addresses handling functions */
Jiri Pirkof001fde2009-05-05 02:48:28 +00003922
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003923int __dev_addr_delete(struct dev_addr_list **list, int *count,
3924 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003925{
3926 struct dev_addr_list *da;
3927
3928 for (; (da = *list) != NULL; list = &da->next) {
3929 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3930 alen == da->da_addrlen) {
3931 if (glbl) {
3932 int old_glbl = da->da_gusers;
3933 da->da_gusers = 0;
3934 if (old_glbl == 0)
3935 break;
3936 }
3937 if (--da->da_users)
3938 return 0;
3939
3940 *list = da->next;
3941 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003942 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07003943 return 0;
3944 }
3945 }
3946 return -ENOENT;
3947}
3948
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003949int __dev_addr_add(struct dev_addr_list **list, int *count,
3950 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003951{
3952 struct dev_addr_list *da;
3953
3954 for (da = *list; da != NULL; da = da->next) {
3955 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3956 da->da_addrlen == alen) {
3957 if (glbl) {
3958 int old_glbl = da->da_gusers;
3959 da->da_gusers = 1;
3960 if (old_glbl)
3961 return 0;
3962 }
3963 da->da_users++;
3964 return 0;
3965 }
3966 }
3967
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08003968 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07003969 if (da == NULL)
3970 return -ENOMEM;
3971 memcpy(da->da_addr, addr, alen);
3972 da->da_addrlen = alen;
3973 da->da_users = 1;
3974 da->da_gusers = glbl ? 1 : 0;
3975 da->next = *list;
3976 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003977 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07003978 return 0;
3979}
3980
Patrick McHardy4417da62007-06-27 01:28:10 -07003981/**
3982 * dev_unicast_delete - Release secondary unicast address.
3983 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003984 * @addr: address to delete
Patrick McHardy4417da62007-06-27 01:28:10 -07003985 *
3986 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003987 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07003988 *
3989 * The caller must hold the rtnl_mutex.
3990 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00003991int dev_unicast_delete(struct net_device *dev, void *addr)
Patrick McHardy4417da62007-06-27 01:28:10 -07003992{
3993 int err;
3994
3995 ASSERT_RTNL();
3996
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00003997 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00003998 err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
3999 NETDEV_HW_ADDR_T_UNICAST);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004000 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07004001 __dev_set_rx_mode(dev);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004002 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07004003 return err;
4004}
4005EXPORT_SYMBOL(dev_unicast_delete);
4006
4007/**
4008 * dev_unicast_add - add a secondary unicast address
4009 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07004010 * @addr: address to add
Patrick McHardy4417da62007-06-27 01:28:10 -07004011 *
4012 * Add a secondary unicast address to the device or increase
4013 * the reference count if it already exists.
4014 *
4015 * The caller must hold the rtnl_mutex.
4016 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00004017int dev_unicast_add(struct net_device *dev, void *addr)
Patrick McHardy4417da62007-06-27 01:28:10 -07004018{
4019 int err;
4020
4021 ASSERT_RTNL();
4022
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004023 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00004024 err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
4025 NETDEV_HW_ADDR_T_UNICAST);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07004026 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07004027 __dev_set_rx_mode(dev);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004028 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07004029 return err;
4030}
4031EXPORT_SYMBOL(dev_unicast_add);
4032
Chris Leeche83a2ea2008-01-31 16:53:23 -08004033int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4034 struct dev_addr_list **from, int *from_count)
4035{
4036 struct dev_addr_list *da, *next;
4037 int err = 0;
4038
4039 da = *from;
4040 while (da != NULL) {
4041 next = da->next;
4042 if (!da->da_synced) {
4043 err = __dev_addr_add(to, to_count,
4044 da->da_addr, da->da_addrlen, 0);
4045 if (err < 0)
4046 break;
4047 da->da_synced = 1;
4048 da->da_users++;
4049 } else if (da->da_users == 1) {
4050 __dev_addr_delete(to, to_count,
4051 da->da_addr, da->da_addrlen, 0);
4052 __dev_addr_delete(from, from_count,
4053 da->da_addr, da->da_addrlen, 0);
4054 }
4055 da = next;
4056 }
4057 return err;
4058}
Johannes Bergc4029082009-06-17 17:43:30 +02004059EXPORT_SYMBOL_GPL(__dev_addr_sync);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004060
4061void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4062 struct dev_addr_list **from, int *from_count)
4063{
4064 struct dev_addr_list *da, *next;
4065
4066 da = *from;
4067 while (da != NULL) {
4068 next = da->next;
4069 if (da->da_synced) {
4070 __dev_addr_delete(to, to_count,
4071 da->da_addr, da->da_addrlen, 0);
4072 da->da_synced = 0;
4073 __dev_addr_delete(from, from_count,
4074 da->da_addr, da->da_addrlen, 0);
4075 }
4076 da = next;
4077 }
4078}
Johannes Bergc4029082009-06-17 17:43:30 +02004079EXPORT_SYMBOL_GPL(__dev_addr_unsync);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004080
4081/**
4082 * dev_unicast_sync - Synchronize device's unicast list to another device
4083 * @to: destination device
4084 * @from: source device
4085 *
4086 * Add newly added addresses to the destination device and release
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004087 * addresses that have no users left. The source device must be
4088 * locked by netif_tx_lock_bh.
Chris Leeche83a2ea2008-01-31 16:53:23 -08004089 *
4090 * This function is intended to be called from the dev->set_rx_mode
4091 * function of layered software devices.
4092 */
4093int dev_unicast_sync(struct net_device *to, struct net_device *from)
4094{
4095 int err = 0;
4096
Jiri Pirkoccffad252009-05-22 23:22:17 +00004097 if (to->addr_len != from->addr_len)
4098 return -EINVAL;
4099
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004100 netif_addr_lock_bh(to);
Jiri Pirko31278e72009-06-17 01:12:19 +00004101 err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004102 if (!err)
4103 __dev_set_rx_mode(to);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004104 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004105 return err;
4106}
4107EXPORT_SYMBOL(dev_unicast_sync);
4108
4109/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08004110 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08004111 * @to: destination device
4112 * @from: source device
4113 *
4114 * Remove all addresses that were added to the destination device by
4115 * dev_unicast_sync(). This function is intended to be called from the
4116 * dev->stop function of layered software devices.
4117 */
4118void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4119{
Jiri Pirkoccffad252009-05-22 23:22:17 +00004120 if (to->addr_len != from->addr_len)
4121 return;
4122
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004123 netif_addr_lock_bh(from);
4124 netif_addr_lock(to);
Jiri Pirko31278e72009-06-17 01:12:19 +00004125 __hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004126 __dev_set_rx_mode(to);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004127 netif_addr_unlock(to);
4128 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08004129}
4130EXPORT_SYMBOL(dev_unicast_unsync);
4131
Jiri Pirkoccffad252009-05-22 23:22:17 +00004132static void dev_unicast_flush(struct net_device *dev)
4133{
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004134 netif_addr_lock_bh(dev);
Jiri Pirko31278e72009-06-17 01:12:19 +00004135 __hw_addr_flush(&dev->uc);
Jiri Pirkoa6ac65d2009-07-30 01:06:12 +00004136 netif_addr_unlock_bh(dev);
Jiri Pirkoccffad252009-05-22 23:22:17 +00004137}
4138
4139static void dev_unicast_init(struct net_device *dev)
4140{
Jiri Pirko31278e72009-06-17 01:12:19 +00004141 __hw_addr_init(&dev->uc);
Jiri Pirkoccffad252009-05-22 23:22:17 +00004142}
4143
4144
Denis Cheng12972622007-07-18 02:12:56 -07004145static void __dev_addr_discard(struct dev_addr_list **list)
4146{
4147 struct dev_addr_list *tmp;
4148
4149 while (*list != NULL) {
4150 tmp = *list;
4151 *list = tmp->next;
4152 if (tmp->da_users > tmp->da_gusers)
4153 printk("__dev_addr_discard: address leakage! "
4154 "da_users=%d\n", tmp->da_users);
4155 kfree(tmp);
4156 }
4157}
4158
Denis Cheng26cc2522007-07-18 02:12:03 -07004159static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07004160{
David S. Millerb9e40852008-07-15 00:15:08 -07004161 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07004162
Denis Cheng456ad752007-07-18 02:10:54 -07004163 __dev_addr_discard(&dev->mc_list);
4164 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07004165
David S. Millerb9e40852008-07-15 00:15:08 -07004166 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07004167}
4168
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004169/**
4170 * dev_get_flags - get flags reported to userspace
4171 * @dev: device
4172 *
4173 * Get the combination of flag bits exported through APIs to userspace.
4174 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004175unsigned dev_get_flags(const struct net_device *dev)
4176{
4177 unsigned flags;
4178
4179 flags = (dev->flags & ~(IFF_PROMISC |
4180 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08004181 IFF_RUNNING |
4182 IFF_LOWER_UP |
4183 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07004184 (dev->gflags & (IFF_PROMISC |
4185 IFF_ALLMULTI));
4186
Stefan Rompfb00055a2006-03-20 17:09:11 -08004187 if (netif_running(dev)) {
4188 if (netif_oper_up(dev))
4189 flags |= IFF_RUNNING;
4190 if (netif_carrier_ok(dev))
4191 flags |= IFF_LOWER_UP;
4192 if (netif_dormant(dev))
4193 flags |= IFF_DORMANT;
4194 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004195
4196 return flags;
4197}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004198EXPORT_SYMBOL(dev_get_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004199
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004200/**
4201 * dev_change_flags - change device settings
4202 * @dev: device
4203 * @flags: device state flags
4204 *
4205 * Change settings on device based state flags. The flags are
4206 * in the userspace exported format.
4207 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004208int dev_change_flags(struct net_device *dev, unsigned flags)
4209{
Thomas Graf7c355f52007-06-05 16:03:03 -07004210 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004211 int old_flags = dev->flags;
4212
Patrick McHardy24023452007-07-14 18:51:31 -07004213 ASSERT_RTNL();
4214
Linus Torvalds1da177e2005-04-16 15:20:36 -07004215 /*
4216 * Set the flags on our device.
4217 */
4218
4219 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4220 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4221 IFF_AUTOMEDIA)) |
4222 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4223 IFF_ALLMULTI));
4224
4225 /*
4226 * Load in the correct multicast list now the flags have changed.
4227 */
4228
Patrick McHardyb6c40d62008-10-07 15:26:48 -07004229 if ((old_flags ^ flags) & IFF_MULTICAST)
4230 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07004231
Patrick McHardy4417da62007-06-27 01:28:10 -07004232 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004233
4234 /*
4235 * Have we downed the interface. We handle IFF_UP ourselves
4236 * according to user attempts to set it, rather than blindly
4237 * setting it.
4238 */
4239
4240 ret = 0;
4241 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
4242 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4243
4244 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07004245 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004246 }
4247
4248 if (dev->flags & IFF_UP &&
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004249 ((old_flags ^ dev->flags) & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
Linus Torvalds1da177e2005-04-16 15:20:36 -07004250 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004251 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004252
4253 if ((flags ^ dev->gflags) & IFF_PROMISC) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004254 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4255
Linus Torvalds1da177e2005-04-16 15:20:36 -07004256 dev->gflags ^= IFF_PROMISC;
4257 dev_set_promiscuity(dev, inc);
4258 }
4259
4260 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4261 is important. Some (broken) drivers set IFF_PROMISC, when
4262 IFF_ALLMULTI is requested not asking us and not reporting.
4263 */
4264 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004265 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4266
Linus Torvalds1da177e2005-04-16 15:20:36 -07004267 dev->gflags ^= IFF_ALLMULTI;
4268 dev_set_allmulti(dev, inc);
4269 }
4270
Thomas Graf7c355f52007-06-05 16:03:03 -07004271 /* Exclude state transition flags, already notified */
4272 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4273 if (changes)
4274 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004275
4276 return ret;
4277}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004278EXPORT_SYMBOL(dev_change_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004279
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004280/**
4281 * dev_set_mtu - Change maximum transfer unit
4282 * @dev: device
4283 * @new_mtu: new transfer unit
4284 *
4285 * Change the maximum transfer size of the network device.
4286 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004287int dev_set_mtu(struct net_device *dev, int new_mtu)
4288{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004289 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004290 int err;
4291
4292 if (new_mtu == dev->mtu)
4293 return 0;
4294
4295 /* MTU must be positive. */
4296 if (new_mtu < 0)
4297 return -EINVAL;
4298
4299 if (!netif_device_present(dev))
4300 return -ENODEV;
4301
4302 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004303 if (ops->ndo_change_mtu)
4304 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004305 else
4306 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004307
Linus Torvalds1da177e2005-04-16 15:20:36 -07004308 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004309 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004310 return err;
4311}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004312EXPORT_SYMBOL(dev_set_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004313
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004314/**
4315 * dev_set_mac_address - Change Media Access Control Address
4316 * @dev: device
4317 * @sa: new address
4318 *
4319 * Change the hardware (MAC) address of the device
4320 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004321int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4322{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004323 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004324 int err;
4325
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004326 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004327 return -EOPNOTSUPP;
4328 if (sa->sa_family != dev->type)
4329 return -EINVAL;
4330 if (!netif_device_present(dev))
4331 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004332 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004333 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004334 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004335 return err;
4336}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004337EXPORT_SYMBOL(dev_set_mac_address);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004338
4339/*
Eric Dumazet3710bec2009-11-01 19:42:09 +00004340 * Perform the SIOCxIFxxx calls, inside rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07004341 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07004342static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004343{
4344 int err;
Eric Dumazet3710bec2009-11-01 19:42:09 +00004345 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004346
4347 if (!dev)
4348 return -ENODEV;
4349
4350 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004351 case SIOCGIFFLAGS: /* Get interface flags */
4352 ifr->ifr_flags = (short) dev_get_flags(dev);
4353 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004354
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004355 case SIOCGIFMETRIC: /* Get the metric on the interface
4356 (currently unused) */
4357 ifr->ifr_metric = 0;
4358 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004359
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004360 case SIOCGIFMTU: /* Get the MTU of a device */
4361 ifr->ifr_mtu = dev->mtu;
4362 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004363
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004364 case SIOCGIFHWADDR:
4365 if (!dev->addr_len)
4366 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4367 else
4368 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4369 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4370 ifr->ifr_hwaddr.sa_family = dev->type;
4371 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004372
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004373 case SIOCGIFSLAVE:
4374 err = -EINVAL;
4375 break;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004376
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004377 case SIOCGIFMAP:
4378 ifr->ifr_map.mem_start = dev->mem_start;
4379 ifr->ifr_map.mem_end = dev->mem_end;
4380 ifr->ifr_map.base_addr = dev->base_addr;
4381 ifr->ifr_map.irq = dev->irq;
4382 ifr->ifr_map.dma = dev->dma;
4383 ifr->ifr_map.port = dev->if_port;
4384 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004385
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004386 case SIOCGIFINDEX:
4387 ifr->ifr_ifindex = dev->ifindex;
4388 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004389
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004390 case SIOCGIFTXQLEN:
4391 ifr->ifr_qlen = dev->tx_queue_len;
4392 return 0;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004393
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004394 default:
4395 /* dev_ioctl() should ensure this case
4396 * is never reached
4397 */
4398 WARN_ON(1);
4399 err = -EINVAL;
4400 break;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004401
4402 }
4403 return err;
4404}
4405
4406/*
4407 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
4408 */
4409static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4410{
4411 int err;
4412 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08004413 const struct net_device_ops *ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004414
4415 if (!dev)
4416 return -ENODEV;
4417
Jarek Poplawski5f2f6da2008-12-22 19:35:28 -08004418 ops = dev->netdev_ops;
4419
Jeff Garzik14e3e072007-10-08 00:06:32 -07004420 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004421 case SIOCSIFFLAGS: /* Set interface flags */
4422 return dev_change_flags(dev, ifr->ifr_flags);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004423
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004424 case SIOCSIFMETRIC: /* Set the metric on the interface
4425 (currently unused) */
4426 return -EOPNOTSUPP;
Jeff Garzik14e3e072007-10-08 00:06:32 -07004427
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004428 case SIOCSIFMTU: /* Set the MTU of a device */
4429 return dev_set_mtu(dev, ifr->ifr_mtu);
Jeff Garzik14e3e072007-10-08 00:06:32 -07004430
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004431 case SIOCSIFHWADDR:
4432 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004433
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004434 case SIOCSIFHWBROADCAST:
4435 if (ifr->ifr_hwaddr.sa_family != dev->type)
4436 return -EINVAL;
4437 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4438 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4439 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4440 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004441
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004442 case SIOCSIFMAP:
4443 if (ops->ndo_set_config) {
4444 if (!netif_device_present(dev))
4445 return -ENODEV;
4446 return ops->ndo_set_config(dev, &ifr->ifr_map);
4447 }
4448 return -EOPNOTSUPP;
4449
4450 case SIOCADDMULTI:
4451 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4452 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4453 return -EINVAL;
4454 if (!netif_device_present(dev))
4455 return -ENODEV;
4456 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4457 dev->addr_len, 1);
4458
4459 case SIOCDELMULTI:
4460 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4461 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4462 return -EINVAL;
4463 if (!netif_device_present(dev))
4464 return -ENODEV;
4465 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4466 dev->addr_len, 1);
4467
4468 case SIOCSIFTXQLEN:
4469 if (ifr->ifr_qlen < 0)
4470 return -EINVAL;
4471 dev->tx_queue_len = ifr->ifr_qlen;
4472 return 0;
4473
4474 case SIOCSIFNAME:
4475 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4476 return dev_change_name(dev, ifr->ifr_newname);
4477
4478 /*
4479 * Unknown or private ioctl
4480 */
4481 default:
4482 if ((cmd >= SIOCDEVPRIVATE &&
4483 cmd <= SIOCDEVPRIVATE + 15) ||
4484 cmd == SIOCBONDENSLAVE ||
4485 cmd == SIOCBONDRELEASE ||
4486 cmd == SIOCBONDSETHWADDR ||
4487 cmd == SIOCBONDSLAVEINFOQUERY ||
4488 cmd == SIOCBONDINFOQUERY ||
4489 cmd == SIOCBONDCHANGEACTIVE ||
4490 cmd == SIOCGMIIPHY ||
4491 cmd == SIOCGMIIREG ||
4492 cmd == SIOCSMIIREG ||
4493 cmd == SIOCBRADDIF ||
4494 cmd == SIOCBRDELIF ||
4495 cmd == SIOCSHWTSTAMP ||
4496 cmd == SIOCWANDEV) {
4497 err = -EOPNOTSUPP;
4498 if (ops->ndo_do_ioctl) {
4499 if (netif_device_present(dev))
4500 err = ops->ndo_do_ioctl(dev, ifr, cmd);
4501 else
4502 err = -ENODEV;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004503 }
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004504 } else
4505 err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004506
4507 }
4508 return err;
4509}
4510
4511/*
4512 * This function handles all "interface"-type I/O control requests. The actual
4513 * 'doing' part of this is dev_ifsioc above.
4514 */
4515
4516/**
4517 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004518 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004519 * @cmd: command to issue
4520 * @arg: pointer to a struct ifreq in user space
4521 *
4522 * Issue ioctl functions to devices. This is normally called by the
4523 * user space syscall interfaces but can sometimes be useful for
4524 * other purposes. The return value is the return from the syscall if
4525 * positive or a negative errno code on error.
4526 */
4527
Eric W. Biederman881d9662007-09-17 11:56:21 -07004528int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004529{
4530 struct ifreq ifr;
4531 int ret;
4532 char *colon;
4533
4534 /* One special case: SIOCGIFCONF takes ifconf argument
4535 and requires shared lock, because it sleeps writing
4536 to user space.
4537 */
4538
4539 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004540 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004541 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004542 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004543 return ret;
4544 }
4545 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004546 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004547
4548 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4549 return -EFAULT;
4550
4551 ifr.ifr_name[IFNAMSIZ-1] = 0;
4552
4553 colon = strchr(ifr.ifr_name, ':');
4554 if (colon)
4555 *colon = 0;
4556
4557 /*
4558 * See which interface the caller is talking about.
4559 */
4560
4561 switch (cmd) {
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004562 /*
4563 * These ioctl calls:
4564 * - can be done by all.
4565 * - atomic and do not require locking.
4566 * - return a value
4567 */
4568 case SIOCGIFFLAGS:
4569 case SIOCGIFMETRIC:
4570 case SIOCGIFMTU:
4571 case SIOCGIFHWADDR:
4572 case SIOCGIFSLAVE:
4573 case SIOCGIFMAP:
4574 case SIOCGIFINDEX:
4575 case SIOCGIFTXQLEN:
4576 dev_load(net, ifr.ifr_name);
Eric Dumazet3710bec2009-11-01 19:42:09 +00004577 rcu_read_lock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004578 ret = dev_ifsioc_locked(net, &ifr, cmd);
Eric Dumazet3710bec2009-11-01 19:42:09 +00004579 rcu_read_unlock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004580 if (!ret) {
4581 if (colon)
4582 *colon = ':';
4583 if (copy_to_user(arg, &ifr,
4584 sizeof(struct ifreq)))
4585 ret = -EFAULT;
4586 }
4587 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004588
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004589 case SIOCETHTOOL:
4590 dev_load(net, ifr.ifr_name);
4591 rtnl_lock();
4592 ret = dev_ethtool(net, &ifr);
4593 rtnl_unlock();
4594 if (!ret) {
4595 if (colon)
4596 *colon = ':';
4597 if (copy_to_user(arg, &ifr,
4598 sizeof(struct ifreq)))
4599 ret = -EFAULT;
4600 }
4601 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004602
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004603 /*
4604 * These ioctl calls:
4605 * - require superuser power.
4606 * - require strict serialization.
4607 * - return a value
4608 */
4609 case SIOCGMIIPHY:
4610 case SIOCGMIIREG:
4611 case SIOCSIFNAME:
4612 if (!capable(CAP_NET_ADMIN))
4613 return -EPERM;
4614 dev_load(net, ifr.ifr_name);
4615 rtnl_lock();
4616 ret = dev_ifsioc(net, &ifr, cmd);
4617 rtnl_unlock();
4618 if (!ret) {
4619 if (colon)
4620 *colon = ':';
4621 if (copy_to_user(arg, &ifr,
4622 sizeof(struct ifreq)))
4623 ret = -EFAULT;
4624 }
4625 return ret;
4626
4627 /*
4628 * These ioctl calls:
4629 * - require superuser power.
4630 * - require strict serialization.
4631 * - do not return a value
4632 */
4633 case SIOCSIFFLAGS:
4634 case SIOCSIFMETRIC:
4635 case SIOCSIFMTU:
4636 case SIOCSIFMAP:
4637 case SIOCSIFHWADDR:
4638 case SIOCSIFSLAVE:
4639 case SIOCADDMULTI:
4640 case SIOCDELMULTI:
4641 case SIOCSIFHWBROADCAST:
4642 case SIOCSIFTXQLEN:
4643 case SIOCSMIIREG:
4644 case SIOCBONDENSLAVE:
4645 case SIOCBONDRELEASE:
4646 case SIOCBONDSETHWADDR:
4647 case SIOCBONDCHANGEACTIVE:
4648 case SIOCBRADDIF:
4649 case SIOCBRDELIF:
4650 case SIOCSHWTSTAMP:
4651 if (!capable(CAP_NET_ADMIN))
4652 return -EPERM;
4653 /* fall through */
4654 case SIOCBONDSLAVEINFOQUERY:
4655 case SIOCBONDINFOQUERY:
4656 dev_load(net, ifr.ifr_name);
4657 rtnl_lock();
4658 ret = dev_ifsioc(net, &ifr, cmd);
4659 rtnl_unlock();
4660 return ret;
4661
4662 case SIOCGIFMEM:
4663 /* Get the per device memory space. We can add this but
4664 * currently do not support it */
4665 case SIOCSIFMEM:
4666 /* Set the per device memory buffer space.
4667 * Not applicable in our case */
4668 case SIOCSIFLINK:
4669 return -EINVAL;
4670
4671 /*
4672 * Unknown or private ioctl.
4673 */
4674 default:
4675 if (cmd == SIOCWANDEV ||
4676 (cmd >= SIOCDEVPRIVATE &&
4677 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004678 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004679 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07004680 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004681 rtnl_unlock();
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004682 if (!ret && copy_to_user(arg, &ifr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004683 sizeof(struct ifreq)))
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004684 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004685 return ret;
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004686 }
4687 /* Take care of Wireless Extensions */
4688 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
4689 return wext_handle_ioctl(net, &ifr, cmd, arg);
4690 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004691 }
4692}
4693
4694
4695/**
4696 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07004697 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07004698 *
4699 * Returns a suitable unique value for a new device interface
4700 * number. The caller must hold the rtnl semaphore or the
4701 * dev_base_lock to be sure it remains unique.
4702 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004703static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004704{
4705 static int ifindex;
4706 for (;;) {
4707 if (++ifindex <= 0)
4708 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004709 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004710 return ifindex;
4711 }
4712}
4713
Linus Torvalds1da177e2005-04-16 15:20:36 -07004714/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08004715static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004716
Stephen Hemminger6f05f622007-03-08 20:46:03 -08004717static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004718{
Linus Torvalds1da177e2005-04-16 15:20:36 -07004719 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004720}
4721
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004722static void rollback_registered_many(struct list_head *head)
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004723{
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004724 struct net_device *dev;
4725
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004726 BUG_ON(dev_boot_phase);
4727 ASSERT_RTNL();
4728
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004729 list_for_each_entry(dev, head, unreg_list) {
4730 /* Some devices call without registering
4731 * for initialization unwind.
4732 */
4733 if (dev->reg_state == NETREG_UNINITIALIZED) {
4734 pr_debug("unregister_netdevice: device %s/%p never "
4735 "was registered\n", dev->name, dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004736
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004737 WARN_ON(1);
4738 return;
4739 }
4740
4741 BUG_ON(dev->reg_state != NETREG_REGISTERED);
4742
4743 /* If device is running, close it first. */
4744 dev_close(dev);
4745
4746 /* And unlink it from device chain. */
4747 unlist_netdevice(dev);
4748
4749 dev->reg_state = NETREG_UNREGISTERING;
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004750 }
4751
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004752 synchronize_net();
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004753
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004754 list_for_each_entry(dev, head, unreg_list) {
4755 /* Shutdown queueing discipline. */
4756 dev_shutdown(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004757
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004758
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004759 /* Notify protocols, that we are about to destroy
4760 this device. They should clean all the things.
4761 */
4762 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4763
4764 /*
4765 * Flush the unicast and multicast chains
4766 */
4767 dev_unicast_flush(dev);
4768 dev_addr_discard(dev);
4769
4770 if (dev->netdev_ops->ndo_uninit)
4771 dev->netdev_ops->ndo_uninit(dev);
4772
4773 /* Notifier chain MUST detach us from master device. */
4774 WARN_ON(dev->master);
4775
4776 /* Remove entries from kobject tree */
4777 netdev_unregister_kobject(dev);
4778 }
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004779
4780 synchronize_net();
4781
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004782 list_for_each_entry(dev, head, unreg_list)
4783 dev_put(dev);
4784}
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004785
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004786static void rollback_registered(struct net_device *dev)
4787{
4788 LIST_HEAD(single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004789
Eric Dumazet9b5e3832009-10-27 07:04:19 +00004790 list_add(&dev->unreg_list, &single);
4791 rollback_registered_many(&single);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004792}
4793
David S. Millere8a04642008-07-17 00:34:19 -07004794static void __netdev_init_queue_locks_one(struct net_device *dev,
4795 struct netdev_queue *dev_queue,
4796 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07004797{
4798 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004799 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07004800 dev_queue->xmit_lock_owner = -1;
4801}
4802
4803static void netdev_init_queue_locks(struct net_device *dev)
4804{
David S. Millere8a04642008-07-17 00:34:19 -07004805 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4806 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07004807}
4808
Herbert Xub63365a2008-10-23 01:11:29 -07004809unsigned long netdev_fix_features(unsigned long features, const char *name)
4810{
4811 /* Fix illegal SG+CSUM combinations. */
4812 if ((features & NETIF_F_SG) &&
4813 !(features & NETIF_F_ALL_CSUM)) {
4814 if (name)
4815 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
4816 "checksum feature.\n", name);
4817 features &= ~NETIF_F_SG;
4818 }
4819
4820 /* TSO requires that SG is present as well. */
4821 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
4822 if (name)
4823 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
4824 "SG feature.\n", name);
4825 features &= ~NETIF_F_TSO;
4826 }
4827
4828 if (features & NETIF_F_UFO) {
4829 if (!(features & NETIF_F_GEN_CSUM)) {
4830 if (name)
4831 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4832 "since no NETIF_F_HW_CSUM feature.\n",
4833 name);
4834 features &= ~NETIF_F_UFO;
4835 }
4836
4837 if (!(features & NETIF_F_SG)) {
4838 if (name)
4839 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
4840 "since no NETIF_F_SG feature.\n", name);
4841 features &= ~NETIF_F_UFO;
4842 }
4843 }
4844
4845 return features;
4846}
4847EXPORT_SYMBOL(netdev_fix_features);
4848
Linus Torvalds1da177e2005-04-16 15:20:36 -07004849/**
4850 * register_netdevice - register a network device
4851 * @dev: device to register
4852 *
4853 * Take a completed network device structure and add it to the kernel
4854 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4855 * chain. 0 is returned on success. A negative errno code is returned
4856 * on a failure to set up the device, or if the name is a duplicate.
4857 *
4858 * Callers must hold the rtnl semaphore. You may want
4859 * register_netdev() instead of this.
4860 *
4861 * BUGS:
4862 * The locking appears insufficient to guarantee two parallel registers
4863 * will not get the same name.
4864 */
4865
4866int register_netdevice(struct net_device *dev)
4867{
4868 struct hlist_head *head;
4869 struct hlist_node *p;
4870 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004871 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004872
4873 BUG_ON(dev_boot_phase);
4874 ASSERT_RTNL();
4875
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004876 might_sleep();
4877
Linus Torvalds1da177e2005-04-16 15:20:36 -07004878 /* When net_device's are persistent, this will be fatal. */
4879 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004880 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004881
David S. Millerf1f28aa2008-07-15 00:08:33 -07004882 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004883 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07004884 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004885
Linus Torvalds1da177e2005-04-16 15:20:36 -07004886 dev->iflink = -1;
4887
4888 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004889 if (dev->netdev_ops->ndo_init) {
4890 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004891 if (ret) {
4892 if (ret > 0)
4893 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08004894 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004895 }
4896 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004897
Linus Torvalds1da177e2005-04-16 15:20:36 -07004898 if (!dev_valid_name(dev->name)) {
4899 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004900 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004901 }
4902
Eric W. Biederman881d9662007-09-17 11:56:21 -07004903 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004904 if (dev->iflink == -1)
4905 dev->iflink = dev->ifindex;
4906
4907 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004908 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004909 hlist_for_each(p, head) {
4910 struct net_device *d
4911 = hlist_entry(p, struct net_device, name_hlist);
4912 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4913 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004914 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004915 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004916 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004917
Stephen Hemmingerd212f872007-06-27 00:47:37 -07004918 /* Fix illegal checksum combinations */
4919 if ((dev->features & NETIF_F_HW_CSUM) &&
4920 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4921 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4922 dev->name);
4923 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4924 }
4925
4926 if ((dev->features & NETIF_F_NO_CSUM) &&
4927 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4928 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4929 dev->name);
4930 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4931 }
4932
Herbert Xub63365a2008-10-23 01:11:29 -07004933 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004934
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07004935 /* Enable software GSO if SG is supported. */
4936 if (dev->features & NETIF_F_SG)
4937 dev->features |= NETIF_F_GSO;
4938
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004939 netdev_initialize_kobject(dev);
Johannes Berg7ffbe3f2009-10-02 05:15:27 +00004940
4941 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
4942 ret = notifier_to_errno(ret);
4943 if (ret)
4944 goto err_uninit;
4945
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004946 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004947 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004948 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004949 dev->reg_state = NETREG_REGISTERED;
4950
Linus Torvalds1da177e2005-04-16 15:20:36 -07004951 /*
4952 * Default initial state at registry is that the
4953 * device is present.
4954 */
4955
4956 set_bit(__LINK_STATE_PRESENT, &dev->state);
4957
Linus Torvalds1da177e2005-04-16 15:20:36 -07004958 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004959 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004960 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004961
4962 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004963 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07004964 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004965 if (ret) {
4966 rollback_registered(dev);
4967 dev->reg_state = NETREG_UNREGISTERED;
4968 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004969
4970out:
4971 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004972
4973err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004974 if (dev->netdev_ops->ndo_uninit)
4975 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004976 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004977}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07004978EXPORT_SYMBOL(register_netdevice);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004979
4980/**
Benjamin Herrenschmidt937f1ba2009-01-14 21:05:05 -08004981 * init_dummy_netdev - init a dummy network device for NAPI
4982 * @dev: device to init
4983 *
4984 * This takes a network device structure and initialize the minimum
4985 * amount of fields so it can be used to schedule NAPI polls without
4986 * registering a full blown interface. This is to be used by drivers
4987 * that need to tie several hardware interfaces to a single NAPI
4988 * poll scheduler due to HW limitations.
4989 */
4990int init_dummy_netdev(struct net_device *dev)
4991{
4992 /* Clear everything. Note we don't initialize spinlocks
4993 * are they aren't supposed to be taken by any of the
4994 * NAPI code and this dummy netdev is supposed to be
4995 * only ever used for NAPI polls
4996 */
4997 memset(dev, 0, sizeof(struct net_device));
4998
4999 /* make sure we BUG if trying to hit standard
5000 * register/unregister code path
5001 */
5002 dev->reg_state = NETREG_DUMMY;
5003
5004 /* initialize the ref count */
5005 atomic_set(&dev->refcnt, 1);
5006
5007 /* NAPI wants this */
5008 INIT_LIST_HEAD(&dev->napi_list);
5009
5010 /* a dummy interface is started by default */
5011 set_bit(__LINK_STATE_PRESENT, &dev->state);
5012 set_bit(__LINK_STATE_START, &dev->state);
5013
5014 return 0;
5015}
5016EXPORT_SYMBOL_GPL(init_dummy_netdev);
5017
5018
5019/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005020 * register_netdev - register a network device
5021 * @dev: device to register
5022 *
5023 * Take a completed network device structure and add it to the kernel
5024 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5025 * chain. 0 is returned on success. A negative errno code is returned
5026 * on a failure to set up the device, or if the name is a duplicate.
5027 *
Borislav Petkov38b4da382007-04-20 22:14:10 -07005028 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07005029 * and expands the device name if you passed a format string to
5030 * alloc_netdev.
5031 */
5032int register_netdev(struct net_device *dev)
5033{
5034 int err;
5035
5036 rtnl_lock();
5037
5038 /*
5039 * If the name is a format string the caller wants us to do a
5040 * name allocation.
5041 */
5042 if (strchr(dev->name, '%')) {
5043 err = dev_alloc_name(dev, dev->name);
5044 if (err < 0)
5045 goto out;
5046 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005047
Linus Torvalds1da177e2005-04-16 15:20:36 -07005048 err = register_netdevice(dev);
5049out:
5050 rtnl_unlock();
5051 return err;
5052}
5053EXPORT_SYMBOL(register_netdev);
5054
5055/*
5056 * netdev_wait_allrefs - wait until all references are gone.
5057 *
5058 * This is called when unregistering network devices.
5059 *
5060 * Any protocol or device that holds a reference should register
5061 * for netdevice notification, and cleanup and put back the
5062 * reference if they receive an UNREGISTER event.
5063 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005064 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005065 */
5066static void netdev_wait_allrefs(struct net_device *dev)
5067{
5068 unsigned long rebroadcast_time, warning_time;
5069
5070 rebroadcast_time = warning_time = jiffies;
5071 while (atomic_read(&dev->refcnt) != 0) {
5072 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005073 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005074
5075 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07005076 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005077
5078 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5079 &dev->state)) {
5080 /* We must not have linkwatch events
5081 * pending on unregister. If this
5082 * happens, we simply run the queue
5083 * unscheduled, resulting in a noop
5084 * for this device.
5085 */
5086 linkwatch_run_queue();
5087 }
5088
Stephen Hemminger6756ae42006-03-20 22:23:58 -08005089 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005090
5091 rebroadcast_time = jiffies;
5092 }
5093
5094 msleep(250);
5095
5096 if (time_after(jiffies, warning_time + 10 * HZ)) {
5097 printk(KERN_EMERG "unregister_netdevice: "
5098 "waiting for %s to become free. Usage "
5099 "count = %d\n",
5100 dev->name, atomic_read(&dev->refcnt));
5101 warning_time = jiffies;
5102 }
5103 }
5104}
5105
5106/* The sequence is:
5107 *
5108 * rtnl_lock();
5109 * ...
5110 * register_netdevice(x1);
5111 * register_netdevice(x2);
5112 * ...
5113 * unregister_netdevice(y1);
5114 * unregister_netdevice(y2);
5115 * ...
5116 * rtnl_unlock();
5117 * free_netdev(y1);
5118 * free_netdev(y2);
5119 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07005120 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07005121 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005122 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07005123 * without deadlocking with linkwatch via keventd.
5124 * 2) Since we run with the RTNL semaphore not held, we can sleep
5125 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07005126 *
5127 * We must not return until all unregister events added during
5128 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005129 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005130void netdev_run_todo(void)
5131{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005132 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005133
Linus Torvalds1da177e2005-04-16 15:20:36 -07005134 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005135 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07005136
5137 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07005138
Linus Torvalds1da177e2005-04-16 15:20:36 -07005139 while (!list_empty(&list)) {
5140 struct net_device *dev
5141 = list_entry(list.next, struct net_device, todo_list);
5142 list_del(&dev->todo_list);
5143
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005144 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005145 printk(KERN_ERR "network todo '%s' but state %d\n",
5146 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005147 dump_stack();
5148 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005149 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005150
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005151 dev->reg_state = NETREG_UNREGISTERED;
5152
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07005153 on_each_cpu(flush_backlog, dev, 1);
5154
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005155 netdev_wait_allrefs(dev);
5156
5157 /* paranoia */
5158 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07005159 WARN_ON(dev->ip_ptr);
5160 WARN_ON(dev->ip6_ptr);
5161 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005162
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07005163 if (dev->destructor)
5164 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07005165
5166 /* Free network device */
5167 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005168 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005169}
5170
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005171/**
Eric Dumazetd83345a2009-11-16 03:36:51 +00005172 * dev_txq_stats_fold - fold tx_queues stats
5173 * @dev: device to get statistics from
5174 * @stats: struct net_device_stats to hold results
5175 */
5176void dev_txq_stats_fold(const struct net_device *dev,
5177 struct net_device_stats *stats)
5178{
5179 unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
5180 unsigned int i;
5181 struct netdev_queue *txq;
5182
5183 for (i = 0; i < dev->num_tx_queues; i++) {
5184 txq = netdev_get_tx_queue(dev, i);
5185 tx_bytes += txq->tx_bytes;
5186 tx_packets += txq->tx_packets;
5187 tx_dropped += txq->tx_dropped;
5188 }
5189 if (tx_bytes || tx_packets || tx_dropped) {
5190 stats->tx_bytes = tx_bytes;
5191 stats->tx_packets = tx_packets;
5192 stats->tx_dropped = tx_dropped;
5193 }
5194}
5195EXPORT_SYMBOL(dev_txq_stats_fold);
5196
5197/**
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005198 * dev_get_stats - get network device statistics
5199 * @dev: device to get statistics from
5200 *
5201 * Get network statistics from device. The device driver may provide
5202 * its own method by setting dev->netdev_ops->get_stats; otherwise
5203 * the internal statistics structure is used.
5204 */
5205const struct net_device_stats *dev_get_stats(struct net_device *dev)
Eric Dumazet7004bf22009-05-18 00:34:33 +00005206{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005207 const struct net_device_ops *ops = dev->netdev_ops;
5208
5209 if (ops->ndo_get_stats)
5210 return ops->ndo_get_stats(dev);
Eric Dumazet7004bf22009-05-18 00:34:33 +00005211
Eric Dumazetd83345a2009-11-16 03:36:51 +00005212 dev_txq_stats_fold(dev, &dev->stats);
5213 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07005214}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08005215EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07005216
David S. Millerdc2b4842008-07-08 17:18:23 -07005217static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07005218 struct netdev_queue *queue,
5219 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07005220{
David S. Millerdc2b4842008-07-08 17:18:23 -07005221 queue->dev = dev;
5222}
5223
David S. Millerbb949fb2008-07-08 16:55:56 -07005224static void netdev_init_queues(struct net_device *dev)
5225{
David S. Millere8a04642008-07-17 00:34:19 -07005226 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5227 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07005228 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07005229}
5230
Linus Torvalds1da177e2005-04-16 15:20:36 -07005231/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005232 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07005233 * @sizeof_priv: size of private data to allocate space for
5234 * @name: device name format string
5235 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005236 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07005237 *
5238 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005239 * and performs basic initialization. Also allocates subquue structs
5240 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005241 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005242struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5243 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005244{
David S. Millere8a04642008-07-17 00:34:19 -07005245 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005246 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07005247 size_t alloc_size;
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005248 struct net_device *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005249
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07005250 BUG_ON(strlen(name) >= sizeof(dev->name));
5251
David S. Millerfd2ea0a2008-07-17 01:56:23 -07005252 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07005253 if (sizeof_priv) {
5254 /* ensure 32-byte alignment of private area */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005255 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07005256 alloc_size += sizeof_priv;
5257 }
5258 /* ensure 32-byte alignment of whole construct */
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005259 alloc_size += NETDEV_ALIGN - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005260
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07005261 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005262 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07005263 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07005264 return NULL;
5265 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005266
Stephen Hemminger79439862008-07-21 13:28:44 -07005267 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07005268 if (!tx) {
5269 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5270 "tx qdiscs.\n");
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005271 goto free_p;
David S. Millere8a04642008-07-17 00:34:19 -07005272 }
5273
Eric Dumazet1ce8e7b2009-05-27 04:42:37 +00005274 dev = PTR_ALIGN(p, NETDEV_ALIGN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005275 dev->padded = (char *)dev - (char *)p;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005276
5277 if (dev_addr_init(dev))
5278 goto free_tx;
5279
Jiri Pirkoccffad252009-05-22 23:22:17 +00005280 dev_unicast_init(dev);
5281
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09005282 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005283
David S. Millere8a04642008-07-17 00:34:19 -07005284 dev->_tx = tx;
5285 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07005286 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07005287
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07005288 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005289
David S. Millerbb949fb2008-07-08 16:55:56 -07005290 netdev_init_queues(dev);
5291
Herbert Xud565b0a2008-12-15 23:38:52 -08005292 INIT_LIST_HEAD(&dev->napi_list);
Eric W. Biederman9fdce092009-10-30 14:51:13 +00005293 INIT_LIST_HEAD(&dev->unreg_list);
Eric Dumazet93f154b2009-05-18 22:19:19 -07005294 dev->priv_flags = IFF_XMIT_DST_RELEASE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005295 setup(dev);
5296 strcpy(dev->name, name);
5297 return dev;
Jiri Pirkoab9c73c2009-05-08 13:30:17 +00005298
5299free_tx:
5300 kfree(tx);
5301
5302free_p:
5303 kfree(p);
5304 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005305}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07005306EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005307
5308/**
5309 * free_netdev - free network device
5310 * @dev: device
5311 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005312 * This function does the last stage of destroying an allocated device
5313 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005314 * If this is the last reference then it will be freed.
5315 */
5316void free_netdev(struct net_device *dev)
5317{
Herbert Xud565b0a2008-12-15 23:38:52 -08005318 struct napi_struct *p, *n;
5319
Denis V. Lunevf3005d72008-04-16 02:02:18 -07005320 release_net(dev_net(dev));
5321
David S. Millere8a04642008-07-17 00:34:19 -07005322 kfree(dev->_tx);
5323
Jiri Pirkof001fde2009-05-05 02:48:28 +00005324 /* Flush device addresses */
5325 dev_addr_flush(dev);
5326
Herbert Xud565b0a2008-12-15 23:38:52 -08005327 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5328 netif_napi_del(p);
5329
Stephen Hemminger3041a062006-05-26 13:25:24 -07005330 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07005331 if (dev->reg_state == NETREG_UNINITIALIZED) {
5332 kfree((char *)dev - dev->padded);
5333 return;
5334 }
5335
5336 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5337 dev->reg_state = NETREG_RELEASED;
5338
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07005339 /* will free via device release */
5340 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005341}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005342EXPORT_SYMBOL(free_netdev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005343
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005344/**
5345 * synchronize_net - Synchronize with packet receive processing
5346 *
5347 * Wait for packets currently being received to be done.
5348 * Does not block later packets from starting.
5349 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09005350void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005351{
5352 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07005353 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005354}
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005355EXPORT_SYMBOL(synchronize_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005356
5357/**
Eric Dumazet44a08732009-10-27 07:03:04 +00005358 * unregister_netdevice_queue - remove device from the kernel
Linus Torvalds1da177e2005-04-16 15:20:36 -07005359 * @dev: device
Eric Dumazet44a08732009-10-27 07:03:04 +00005360 * @head: list
5361
Linus Torvalds1da177e2005-04-16 15:20:36 -07005362 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08005363 * from the kernel tables.
Eric Dumazet44a08732009-10-27 07:03:04 +00005364 * If head not NULL, device is queued to be unregistered later.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005365 *
5366 * Callers must hold the rtnl semaphore. You may want
5367 * unregister_netdev() instead of this.
5368 */
5369
Eric Dumazet44a08732009-10-27 07:03:04 +00005370void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005371{
Herbert Xua6620712007-12-12 19:21:56 -08005372 ASSERT_RTNL();
5373
Eric Dumazet44a08732009-10-27 07:03:04 +00005374 if (head) {
Eric W. Biederman9fdce092009-10-30 14:51:13 +00005375 list_move_tail(&dev->unreg_list, head);
Eric Dumazet44a08732009-10-27 07:03:04 +00005376 } else {
5377 rollback_registered(dev);
5378 /* Finish processing unregister after unlock */
5379 net_set_todo(dev);
5380 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005381}
Eric Dumazet44a08732009-10-27 07:03:04 +00005382EXPORT_SYMBOL(unregister_netdevice_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005383
5384/**
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005385 * unregister_netdevice_many - unregister many devices
5386 * @head: list of devices
5387 *
5388 */
5389void unregister_netdevice_many(struct list_head *head)
5390{
5391 struct net_device *dev;
5392
5393 if (!list_empty(head)) {
5394 rollback_registered_many(head);
5395 list_for_each_entry(dev, head, unreg_list)
5396 net_set_todo(dev);
5397 }
5398}
Eric Dumazet63c80992009-10-27 07:06:49 +00005399EXPORT_SYMBOL(unregister_netdevice_many);
Eric Dumazet9b5e3832009-10-27 07:04:19 +00005400
5401/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07005402 * unregister_netdev - remove device from the kernel
5403 * @dev: device
5404 *
5405 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08005406 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07005407 *
5408 * This is just a wrapper for unregister_netdevice that takes
5409 * the rtnl semaphore. In general you want to use this and not
5410 * unregister_netdevice.
5411 */
5412void unregister_netdev(struct net_device *dev)
5413{
5414 rtnl_lock();
5415 unregister_netdevice(dev);
5416 rtnl_unlock();
5417}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005418EXPORT_SYMBOL(unregister_netdev);
5419
Eric W. Biedermance286d32007-09-12 13:53:49 +02005420/**
5421 * dev_change_net_namespace - move device to different nethost namespace
5422 * @dev: device
5423 * @net: network namespace
5424 * @pat: If not NULL name pattern to try if the current device name
5425 * is already taken in the destination network namespace.
5426 *
5427 * This function shuts down a device interface and moves it
5428 * to a new network namespace. On success 0 is returned, on
5429 * a failure a netagive errno code is returned.
5430 *
5431 * Callers must hold the rtnl semaphore.
5432 */
5433
5434int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5435{
5436 char buf[IFNAMSIZ];
5437 const char *destname;
5438 int err;
5439
5440 ASSERT_RTNL();
5441
5442 /* Don't allow namespace local devices to be moved. */
5443 err = -EINVAL;
5444 if (dev->features & NETIF_F_NETNS_LOCAL)
5445 goto out;
5446
Eric W. Biederman38918452008-10-27 17:51:47 -07005447#ifdef CONFIG_SYSFS
5448 /* Don't allow real devices to be moved when sysfs
5449 * is enabled.
5450 */
5451 err = -EINVAL;
5452 if (dev->dev.parent)
5453 goto out;
5454#endif
5455
Eric W. Biedermance286d32007-09-12 13:53:49 +02005456 /* Ensure the device has been registrered */
5457 err = -EINVAL;
5458 if (dev->reg_state != NETREG_REGISTERED)
5459 goto out;
5460
5461 /* Get out if there is nothing todo */
5462 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09005463 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02005464 goto out;
5465
5466 /* Pick the destination device name, and ensure
5467 * we can use it in the destination network namespace.
5468 */
5469 err = -EEXIST;
5470 destname = dev->name;
5471 if (__dev_get_by_name(net, destname)) {
5472 /* We get here if we can't use the current device name */
5473 if (!pat)
5474 goto out;
5475 if (!dev_valid_name(pat))
5476 goto out;
5477 if (strchr(pat, '%')) {
5478 if (__dev_alloc_name(net, pat, buf) < 0)
5479 goto out;
5480 destname = buf;
5481 } else
5482 destname = pat;
5483 if (__dev_get_by_name(net, destname))
5484 goto out;
5485 }
5486
5487 /*
5488 * And now a mini version of register_netdevice unregister_netdevice.
5489 */
5490
5491 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07005492 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005493
5494 /* And unlink it from device chain */
5495 err = -ENODEV;
5496 unlist_netdevice(dev);
5497
5498 synchronize_net();
5499
5500 /* Shutdown queueing discipline. */
5501 dev_shutdown(dev);
5502
5503 /* Notify protocols, that we are about to destroy
5504 this device. They should clean all the things.
5505 */
5506 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5507
5508 /*
5509 * Flush the unicast and multicast chains
5510 */
Jiri Pirkoccffad252009-05-22 23:22:17 +00005511 dev_unicast_flush(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005512 dev_addr_discard(dev);
5513
Eric W. Biederman38918452008-10-27 17:51:47 -07005514 netdev_unregister_kobject(dev);
5515
Eric W. Biedermance286d32007-09-12 13:53:49 +02005516 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09005517 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005518
5519 /* Assign the new device name */
5520 if (destname != dev->name)
5521 strcpy(dev->name, destname);
5522
5523 /* If there is an ifindex conflict assign a new one */
5524 if (__dev_get_by_index(net, dev->ifindex)) {
5525 int iflink = (dev->iflink == dev->ifindex);
5526 dev->ifindex = dev_new_index(net);
5527 if (iflink)
5528 dev->iflink = dev->ifindex;
5529 }
5530
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005531 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07005532 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005533 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005534
5535 /* Add the device back in the hashes */
5536 list_netdevice(dev);
5537
5538 /* Notify protocols, that a new device appeared. */
5539 call_netdevice_notifiers(NETDEV_REGISTER, dev);
5540
5541 synchronize_net();
5542 err = 0;
5543out:
5544 return err;
5545}
Johannes Berg463d0182009-07-14 00:33:35 +02005546EXPORT_SYMBOL_GPL(dev_change_net_namespace);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005547
Linus Torvalds1da177e2005-04-16 15:20:36 -07005548static int dev_cpu_callback(struct notifier_block *nfb,
5549 unsigned long action,
5550 void *ocpu)
5551{
5552 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07005553 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005554 struct sk_buff *skb;
5555 unsigned int cpu, oldcpu = (unsigned long)ocpu;
5556 struct softnet_data *sd, *oldsd;
5557
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07005558 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005559 return NOTIFY_OK;
5560
5561 local_irq_disable();
5562 cpu = smp_processor_id();
5563 sd = &per_cpu(softnet_data, cpu);
5564 oldsd = &per_cpu(softnet_data, oldcpu);
5565
5566 /* Find end of our completion_queue. */
5567 list_skb = &sd->completion_queue;
5568 while (*list_skb)
5569 list_skb = &(*list_skb)->next;
5570 /* Append completion queue from offline CPU. */
5571 *list_skb = oldsd->completion_queue;
5572 oldsd->completion_queue = NULL;
5573
5574 /* Find end of our output_queue. */
5575 list_net = &sd->output_queue;
5576 while (*list_net)
5577 list_net = &(*list_net)->next_sched;
5578 /* Append output queue from offline CPU. */
5579 *list_net = oldsd->output_queue;
5580 oldsd->output_queue = NULL;
5581
5582 raise_softirq_irqoff(NET_TX_SOFTIRQ);
5583 local_irq_enable();
5584
5585 /* Process offline CPU's input_pkt_queue */
5586 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5587 netif_rx(skb);
5588
5589 return NOTIFY_OK;
5590}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005591
5592
Herbert Xu7f353bf2007-08-10 15:47:58 -07005593/**
Herbert Xub63365a2008-10-23 01:11:29 -07005594 * netdev_increment_features - increment feature set by one
5595 * @all: current feature set
5596 * @one: new feature set
5597 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07005598 *
5599 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07005600 * @one to the master device with current feature set @all. Will not
5601 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07005602 */
Herbert Xub63365a2008-10-23 01:11:29 -07005603unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5604 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07005605{
Herbert Xub63365a2008-10-23 01:11:29 -07005606 /* If device needs checksumming, downgrade to it. */
Eric Dumazetd1b19df2009-09-03 01:29:39 -07005607 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
Herbert Xub63365a2008-10-23 01:11:29 -07005608 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5609 else if (mask & NETIF_F_ALL_CSUM) {
5610 /* If one device supports v4/v6 checksumming, set for all. */
5611 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5612 !(all & NETIF_F_GEN_CSUM)) {
5613 all &= ~NETIF_F_ALL_CSUM;
5614 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5615 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005616
Herbert Xub63365a2008-10-23 01:11:29 -07005617 /* If one device supports hw checksumming, set for all. */
5618 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5619 all &= ~NETIF_F_ALL_CSUM;
5620 all |= NETIF_F_HW_CSUM;
5621 }
5622 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07005623
Herbert Xub63365a2008-10-23 01:11:29 -07005624 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005625
Herbert Xub63365a2008-10-23 01:11:29 -07005626 one |= all & NETIF_F_ONE_FOR_ALL;
Sridhar Samudralad9f59502009-10-07 12:24:25 +00005627 all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
Herbert Xub63365a2008-10-23 01:11:29 -07005628 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07005629
5630 return all;
5631}
Herbert Xub63365a2008-10-23 01:11:29 -07005632EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07005633
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005634static struct hlist_head *netdev_create_hash(void)
5635{
5636 int i;
5637 struct hlist_head *hash;
5638
5639 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5640 if (hash != NULL)
5641 for (i = 0; i < NETDEV_HASHENTRIES; i++)
5642 INIT_HLIST_HEAD(&hash[i]);
5643
5644 return hash;
5645}
5646
Eric W. Biederman881d9662007-09-17 11:56:21 -07005647/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07005648static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005649{
Eric W. Biederman881d9662007-09-17 11:56:21 -07005650 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07005651
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005652 net->dev_name_head = netdev_create_hash();
5653 if (net->dev_name_head == NULL)
5654 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005655
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005656 net->dev_index_head = netdev_create_hash();
5657 if (net->dev_index_head == NULL)
5658 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005659
5660 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07005661
5662err_idx:
5663 kfree(net->dev_name_head);
5664err_name:
5665 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07005666}
5667
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07005668/**
5669 * netdev_drivername - network driver for the device
5670 * @dev: network device
5671 * @buffer: buffer for resulting name
5672 * @len: size of buffer
5673 *
5674 * Determine network driver for device.
5675 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005676char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07005677{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07005678 const struct device_driver *driver;
5679 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07005680
5681 if (len <= 0 || !buffer)
5682 return buffer;
5683 buffer[0] = 0;
5684
5685 parent = dev->dev.parent;
5686
5687 if (!parent)
5688 return buffer;
5689
5690 driver = parent->driver;
5691 if (driver && driver->name)
5692 strlcpy(buffer, driver->name, len);
5693 return buffer;
5694}
5695
Pavel Emelyanov46650792007-10-08 20:38:39 -07005696static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07005697{
5698 kfree(net->dev_name_head);
5699 kfree(net->dev_index_head);
5700}
5701
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005702static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07005703 .init = netdev_init,
5704 .exit = netdev_exit,
5705};
5706
Pavel Emelyanov46650792007-10-08 20:38:39 -07005707static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02005708{
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005709 struct net_device *dev;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005710 /*
5711 * Push all migratable of the network devices back to the
5712 * initial network namespace
5713 */
5714 rtnl_lock();
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005715restart:
5716 for_each_netdev(net, dev) {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005717 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005718 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02005719
5720 /* Ignore unmoveable devices (i.e. loopback) */
5721 if (dev->features & NETIF_F_NETNS_LOCAL)
5722 continue;
5723
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005724 /* Delete virtual devices */
5725 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
Eric Dumazet23289a32009-10-27 07:06:36 +00005726 dev->rtnl_link_ops->dellink(dev, NULL);
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005727 goto restart;
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08005728 }
5729
Eric W. Biedermance286d32007-09-12 13:53:49 +02005730 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005731 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
5732 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02005733 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005734 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02005735 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07005736 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02005737 }
Eric W. Biederman8eb79862008-12-29 18:21:48 -08005738 goto restart;
Eric W. Biedermance286d32007-09-12 13:53:49 +02005739 }
5740 rtnl_unlock();
5741}
5742
Denis V. Lunev022cbae2007-11-13 03:23:50 -08005743static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02005744 .exit = default_device_exit,
5745};
5746
Linus Torvalds1da177e2005-04-16 15:20:36 -07005747/*
5748 * Initialize the DEV module. At boot time this walks the device list and
5749 * unhooks any devices that fail to initialise (normally hardware not
5750 * present) and leaves us with a valid list of present and active devices.
5751 *
5752 */
5753
5754/*
5755 * This is called single threaded during boot, so no need
5756 * to take the rtnl semaphore.
5757 */
5758static int __init net_dev_init(void)
5759{
5760 int i, rc = -ENOMEM;
5761
5762 BUG_ON(!dev_boot_phase);
5763
Linus Torvalds1da177e2005-04-16 15:20:36 -07005764 if (dev_proc_init())
5765 goto out;
5766
Eric W. Biederman8b41d182007-09-26 22:02:53 -07005767 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07005768 goto out;
5769
5770 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08005771 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005772 INIT_LIST_HEAD(&ptype_base[i]);
5773
Eric W. Biederman881d9662007-09-17 11:56:21 -07005774 if (register_pernet_subsys(&netdev_net_ops))
5775 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005776
5777 /*
5778 * Initialise the packet receive queues.
5779 */
5780
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07005781 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005782 struct softnet_data *queue;
5783
5784 queue = &per_cpu(softnet_data, i);
5785 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005786 queue->completion_queue = NULL;
5787 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07005788
5789 queue->backlog.poll = process_backlog;
5790 queue->backlog.weight = weight_p;
Herbert Xud565b0a2008-12-15 23:38:52 -08005791 queue->backlog.gro_list = NULL;
Herbert Xu4ae55442009-02-08 18:00:36 +00005792 queue->backlog.gro_count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005793 }
5794
Linus Torvalds1da177e2005-04-16 15:20:36 -07005795 dev_boot_phase = 0;
5796
Eric W. Biederman505d4f72008-11-07 22:54:20 -08005797 /* The loopback device is special if any other network devices
5798 * is present in a network namespace the loopback device must
5799 * be present. Since we now dynamically allocate and free the
5800 * loopback device ensure this invariant is maintained by
5801 * keeping the loopback device as the first device on the
5802 * list of network devices. Ensuring the loopback devices
5803 * is the first device that appears and the last network device
5804 * that disappears.
5805 */
5806 if (register_pernet_device(&loopback_net_ops))
5807 goto out;
5808
5809 if (register_pernet_device(&default_device_ops))
5810 goto out;
5811
Carlos R. Mafra962cf362008-05-15 11:15:37 -03005812 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
5813 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005814
5815 hotcpu_notifier(dev_cpu_callback, 0);
5816 dst_init();
5817 dev_mcast_init();
5818 rc = 0;
5819out:
5820 return rc;
5821}
5822
5823subsys_initcall(net_dev_init);
5824
Krishna Kumare88721f2009-02-18 17:55:02 -08005825static int __init initialize_hashrnd(void)
5826{
5827 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5828 return 0;
5829}
5830
5831late_initcall_sync(initialize_hashrnd);
5832