blob: 99b55e641e8d4d06ae02b91dc8ec47ce01b1b1d8 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090014 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070015 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090037 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070039 * Eric Biederman : Allow for > 8 byte hardware addresses.
40 * The convention is that longer addresses
41 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090042 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070043 * and packet_mreq.
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 *
45 * This program is free software; you can redistribute it and/or
46 * modify it under the terms of the GNU General Public License
47 * as published by the Free Software Foundation; either version
48 * 2 of the License, or (at your option) any later version.
49 *
50 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090051
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080054#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <linux/fcntl.h>
56#include <linux/socket.h>
57#include <linux/in.h>
58#include <linux/inet.h>
59#include <linux/netdevice.h>
60#include <linux/if_packet.h>
61#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080062#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070063#include <linux/kmod.h>
64#include <net/ip.h>
65#include <net/protocol.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/errno.h>
69#include <linux/timer.h>
70#include <asm/system.h>
71#include <asm/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040074#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070075#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
81
82#ifdef CONFIG_INET
83#include <net/inet_common.h>
84#endif
85
86#define CONFIG_SOCK_PACKET 1
87
88/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070089 Assumptions:
90 - if device has no dev->hard_header routine, it adds and removes ll header
91 inside itself. In this case ll header is invisible outside of device,
92 but higher levels still should reserve dev->hard_header_len.
93 Some devices are enough clever to reallocate skb, when header
94 will not fit to reserved space (tunnel), another ones are silly
95 (PPP).
96 - packet socket receives packets with pulled ll header,
97 so that SOCK_RAW should push it back.
98
99On receive:
100-----------
101
102Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700103 mac_header -> ll header
104 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105
106Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700107 mac_header -> ll header
108 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109
110Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700111 mac_header -> UNKNOWN position. It is very likely, that it points to ll
112 header. PPP makes it, that is wrong, because introduce
113 assymetry between rx and tx paths.
114 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
116Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700117 mac_header -> data. ll header is still not built!
118 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
120Resume
121 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
122
123
124On transmit:
125------------
126
127dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700128 mac_header -> ll header
129 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130
131dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700132 mac_header -> data
133 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134
135 We should set nh.raw on output to correct posistion,
136 packet classifier depends on it.
137 */
138
139/* List of all packet sockets. */
140static HLIST_HEAD(packet_sklist);
141static DEFINE_RWLOCK(packet_sklist_lock);
142
143static atomic_t packet_socks_nr;
144
145
146/* Private packet socket structures. */
147
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148struct packet_mclist
149{
150 struct packet_mclist *next;
151 int ifindex;
152 int count;
153 unsigned short type;
154 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700155 unsigned char addr[MAX_ADDR_LEN];
156};
157/* identical to struct packet_mreq except it has
158 * a longer address field.
159 */
160struct packet_mreq_max
161{
162 int mr_ifindex;
163 unsigned short mr_type;
164 unsigned short mr_alen;
165 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166};
David S. Millera2efcfa2007-05-29 13:12:50 -0700167
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168#ifdef CONFIG_PACKET_MMAP
169static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
170#endif
171
172static void packet_flush_mclist(struct sock *sk);
173
174struct packet_sock {
175 /* struct sock has to be the first member of packet_sock */
176 struct sock sk;
177 struct tpacket_stats stats;
178#ifdef CONFIG_PACKET_MMAP
179 char * *pg_vec;
180 unsigned int head;
181 unsigned int frames_per_block;
182 unsigned int frame_size;
183 unsigned int frame_max;
184 int copy_thresh;
185#endif
186 struct packet_type prot_hook;
187 spinlock_t bind_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800188 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700189 auxdata:1,
190 origdev:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800192 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194#ifdef CONFIG_PACKET_MMAP
195 atomic_t mapped;
196 unsigned int pg_vec_order;
197 unsigned int pg_vec_pages;
198 unsigned int pg_vec_len;
199#endif
200};
201
Herbert Xuffbc6112007-02-04 23:33:10 -0800202struct packet_skb_cb {
203 unsigned int origlen;
204 union {
205 struct sockaddr_pkt pkt;
206 struct sockaddr_ll ll;
207 } sa;
208};
209
210#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800211
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212#ifdef CONFIG_PACKET_MMAP
213
Jason Lunzad930652007-02-20 23:19:54 -0800214static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215{
216 unsigned int pg_vec_pos, frame_offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218 pg_vec_pos = position / po->frames_per_block;
219 frame_offset = position % po->frames_per_block;
220
Jason Lunzad930652007-02-20 23:19:54 -0800221 return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222}
223#endif
224
225static inline struct packet_sock *pkt_sk(struct sock *sk)
226{
227 return (struct packet_sock *)sk;
228}
229
230static void packet_sock_destruct(struct sock *sk)
231{
232 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
233 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
234
235 if (!sock_flag(sk, SOCK_DEAD)) {
236 printk("Attempt to release alive packet socket: %p\n", sk);
237 return;
238 }
239
240 atomic_dec(&packet_socks_nr);
241#ifdef PACKET_REFCNT_DEBUG
242 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
243#endif
244}
245
246
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800247static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248
249#ifdef CONFIG_SOCK_PACKET
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800250static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700252static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
254 struct sock *sk;
255 struct sockaddr_pkt *spkt;
256
257 /*
258 * When we registered the protocol we saved the socket in the data
259 * field for just this event.
260 */
261
262 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900263
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 /*
265 * Yank back the headers [hope the device set this
266 * right or kerboom...]
267 *
268 * Incoming packets have ll header pulled,
269 * push it back.
270 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700271 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 * so that this procedure is noop.
273 */
274
275 if (skb->pkt_type == PACKET_LOOPBACK)
276 goto out;
277
278 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
279 goto oom;
280
281 /* drop any routing info */
282 dst_release(skb->dst);
283 skb->dst = NULL;
284
Phil Oester84531c22005-07-12 11:57:52 -0700285 /* drop conntrack reference */
286 nf_reset(skb);
287
Herbert Xuffbc6112007-02-04 23:33:10 -0800288 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700290 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
292 /*
293 * The SOCK_PACKET socket receives _all_ frames.
294 */
295
296 spkt->spkt_family = dev->type;
297 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
298 spkt->spkt_protocol = skb->protocol;
299
300 /*
301 * Charge the memory to the socket. This is done specifically
302 * to prevent sockets using all the memory up.
303 */
304
305 if (sock_queue_rcv_skb(sk,skb) == 0)
306 return 0;
307
308out:
309 kfree_skb(skb);
310oom:
311 return 0;
312}
313
314
315/*
316 * Output a raw packet to a device layer. This bypasses all the other
317 * protocol layers and you must therefore supply it with a complete frame
318 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900319
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
321 struct msghdr *msg, size_t len)
322{
323 struct sock *sk = sock->sk;
324 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
325 struct sk_buff *skb;
326 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800327 __be16 proto=0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900329
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900331 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 */
333
334 if (saddr)
335 {
336 if (msg->msg_namelen < sizeof(struct sockaddr))
337 return(-EINVAL);
338 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
339 proto=saddr->spkt_protocol;
340 }
341 else
342 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
343
344 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900345 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 */
347
348 saddr->spkt_device[13] = 0;
349 dev = dev_get_by_name(saddr->spkt_device);
350 err = -ENODEV;
351 if (dev == NULL)
352 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900353
David S. Millerd5e76b02007-01-25 19:30:36 -0800354 err = -ENETDOWN;
355 if (!(dev->flags & IFF_UP))
356 goto out_unlock;
357
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 /*
359 * You may not queue a frame bigger than the mtu. This is the lowest level
360 * raw protocol and you must do your own fragmentation at this level.
361 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900362
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800364 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 goto out_unlock;
366
367 err = -ENOBUFS;
368 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
369
370 /*
371 * If the write buffer is full, then tough. At this level the user gets to
372 * deal with the problem - do your own algorithmic backoffs. That's far
373 * more flexible.
374 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900375
376 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 goto out_unlock;
378
379 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900380 * Fill it in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900382
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 /* FIXME: Save some space for broken drivers that write a
384 * hard header at transmission time by themselves. PPP is the
385 * notable one here. This should really be fixed at the driver level.
386 */
387 skb_reserve(skb, LL_RESERVED_SPACE(dev));
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700388 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389
390 /* Try to align data part correctly */
391 if (dev->hard_header) {
392 skb->data -= dev->hard_header_len;
393 skb->tail -= dev->hard_header_len;
394 if (len < dev->hard_header_len)
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700395 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 }
397
398 /* Returns -EFAULT on error */
399 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
400 skb->protocol = proto;
401 skb->dev = dev;
402 skb->priority = sk->sk_priority;
403 if (err)
404 goto out_free;
405
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 /*
407 * Now send it
408 */
409
410 dev_queue_xmit(skb);
411 dev_put(dev);
412 return(len);
413
414out_free:
415 kfree_skb(skb);
416out_unlock:
417 if (dev)
418 dev_put(dev);
419 return err;
420}
421#endif
422
David S. Millerdbcb5852007-01-24 15:21:02 -0800423static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
424 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425{
426 struct sk_filter *filter;
427
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700428 rcu_read_lock_bh();
429 filter = rcu_dereference(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800430 if (filter != NULL)
431 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700432 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
David S. Millerdbcb5852007-01-24 15:21:02 -0800434 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435}
436
437/*
438 This function makes lazy skb cloning in hope that most of packets
439 are discarded by BPF.
440
441 Note tricky part: we DO mangle shared skb! skb->data, skb->len
442 and skb->cb are mangled. It works because (and until) packets
443 falling here are owned by current CPU. Output packets are cloned
444 by dev_queue_xmit_nit(), input packets are processed by net_bh
445 sequencially, so that if we return skb to original state on exit,
446 we will not harm anyone.
447 */
448
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700449static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450{
451 struct sock *sk;
452 struct sockaddr_ll *sll;
453 struct packet_sock *po;
454 u8 * skb_head = skb->data;
455 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800456 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
458 if (skb->pkt_type == PACKET_LOOPBACK)
459 goto drop;
460
461 sk = pt->af_packet_priv;
462 po = pkt_sk(sk);
463
464 skb->dev = dev;
465
466 if (dev->hard_header) {
467 /* The device has an explicit notion of ll header,
468 exported to higher levels.
469
470 Otherwise, the device hides datails of it frame
471 structure, so that corresponding packet head
472 never delivered to user.
473 */
474 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700475 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 else if (skb->pkt_type == PACKET_OUTGOING) {
477 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300478 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 }
480 }
481
482 snaplen = skb->len;
483
David S. Millerdbcb5852007-01-24 15:21:02 -0800484 res = run_filter(skb, sk, snaplen);
485 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700486 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800487 if (snaplen > res)
488 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489
490 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
491 (unsigned)sk->sk_rcvbuf)
492 goto drop_n_acct;
493
494 if (skb_shared(skb)) {
495 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
496 if (nskb == NULL)
497 goto drop_n_acct;
498
499 if (skb_head != skb->data) {
500 skb->data = skb_head;
501 skb->len = skb_len;
502 }
503 kfree_skb(skb);
504 skb = nskb;
505 }
506
Herbert Xuffbc6112007-02-04 23:33:10 -0800507 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
508 sizeof(skb->cb));
509
510 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 sll->sll_family = AF_PACKET;
512 sll->sll_hatype = dev->type;
513 sll->sll_protocol = skb->protocol;
514 sll->sll_pkttype = skb->pkt_type;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700515 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
516 sll->sll_ifindex = orig_dev->ifindex;
517 else
518 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 sll->sll_halen = 0;
520
521 if (dev->hard_header_parse)
522 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
523
Herbert Xuffbc6112007-02-04 23:33:10 -0800524 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800525
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 if (pskb_trim(skb, snaplen))
527 goto drop_n_acct;
528
529 skb_set_owner_r(skb, sk);
530 skb->dev = NULL;
531 dst_release(skb->dst);
532 skb->dst = NULL;
533
Phil Oester84531c22005-07-12 11:57:52 -0700534 /* drop conntrack reference */
535 nf_reset(skb);
536
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 spin_lock(&sk->sk_receive_queue.lock);
538 po->stats.tp_packets++;
539 __skb_queue_tail(&sk->sk_receive_queue, skb);
540 spin_unlock(&sk->sk_receive_queue.lock);
541 sk->sk_data_ready(sk, skb->len);
542 return 0;
543
544drop_n_acct:
545 spin_lock(&sk->sk_receive_queue.lock);
546 po->stats.tp_drops++;
547 spin_unlock(&sk->sk_receive_queue.lock);
548
549drop_n_restore:
550 if (skb_head != skb->data && skb_shared(skb)) {
551 skb->data = skb_head;
552 skb->len = skb_len;
553 }
554drop:
555 kfree_skb(skb);
556 return 0;
557}
558
559#ifdef CONFIG_PACKET_MMAP
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700560static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561{
562 struct sock *sk;
563 struct packet_sock *po;
564 struct sockaddr_ll *sll;
565 struct tpacket_hdr *h;
566 u8 * skb_head = skb->data;
567 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800568 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
570 unsigned short macoff, netoff;
571 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700572 struct timeval tv;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573
574 if (skb->pkt_type == PACKET_LOOPBACK)
575 goto drop;
576
577 sk = pt->af_packet_priv;
578 po = pkt_sk(sk);
579
580 if (dev->hard_header) {
581 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700582 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 else if (skb->pkt_type == PACKET_OUTGOING) {
584 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300585 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 }
587 }
588
Herbert Xu8dc41942007-02-04 23:31:32 -0800589 if (skb->ip_summed == CHECKSUM_PARTIAL)
590 status |= TP_STATUS_CSUMNOTREADY;
591
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 snaplen = skb->len;
593
David S. Millerdbcb5852007-01-24 15:21:02 -0800594 res = run_filter(skb, sk, snaplen);
595 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700596 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800597 if (snaplen > res)
598 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
600 if (sk->sk_type == SOCK_DGRAM) {
601 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
602 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300603 unsigned maclen = skb_network_offset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
605 macoff = netoff - maclen;
606 }
607
608 if (macoff + snaplen > po->frame_size) {
609 if (po->copy_thresh &&
610 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
611 (unsigned)sk->sk_rcvbuf) {
612 if (skb_shared(skb)) {
613 copy_skb = skb_clone(skb, GFP_ATOMIC);
614 } else {
615 copy_skb = skb_get(skb);
616 skb_head = skb->data;
617 }
618 if (copy_skb)
619 skb_set_owner_r(copy_skb, sk);
620 }
621 snaplen = po->frame_size - macoff;
622 if ((int)snaplen < 0)
623 snaplen = 0;
624 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625
626 spin_lock(&sk->sk_receive_queue.lock);
Jason Lunzad930652007-02-20 23:19:54 -0800627 h = packet_lookup_frame(po, po->head);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900628
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 if (h->tp_status)
630 goto ring_is_full;
631 po->head = po->head != po->frame_max ? po->head+1 : 0;
632 po->stats.tp_packets++;
633 if (copy_skb) {
634 status |= TP_STATUS_COPY;
635 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
636 }
637 if (!po->stats.tp_drops)
638 status &= ~TP_STATUS_LOSING;
639 spin_unlock(&sk->sk_receive_queue.lock);
640
Patrick McHardycbe21d82006-09-17 23:59:57 -0700641 skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
643 h->tp_len = skb->len;
644 h->tp_snaplen = snaplen;
645 h->tp_mac = macoff;
646 h->tp_net = netoff;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700647 if (skb->tstamp.tv64 == 0) {
Patrick McHardya61bbcf2005-08-14 17:24:31 -0700648 __net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 sock_enable_timestamp(sk);
650 }
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700651 tv = ktime_to_timeval(skb->tstamp);
652 h->tp_sec = tv.tv_sec;
653 h->tp_usec = tv.tv_usec;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
655 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
656 sll->sll_halen = 0;
657 if (dev->hard_header_parse)
658 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
659 sll->sll_family = AF_PACKET;
660 sll->sll_hatype = dev->type;
661 sll->sll_protocol = skb->protocol;
662 sll->sll_pkttype = skb->pkt_type;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700663 if (unlikely(po->origdev) && skb->pkt_type == PACKET_HOST)
664 sll->sll_ifindex = orig_dev->ifindex;
665 else
666 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667
668 h->tp_status = status;
Ralf Baechlee16aa202006-12-07 00:11:33 -0800669 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
671 {
672 struct page *p_start, *p_end;
673 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
674
675 p_start = virt_to_page(h);
676 p_end = virt_to_page(h_end);
677 while (p_start <= p_end) {
678 flush_dcache_page(p_start);
679 p_start++;
680 }
681 }
682
683 sk->sk_data_ready(sk, 0);
684
685drop_n_restore:
686 if (skb_head != skb->data && skb_shared(skb)) {
687 skb->data = skb_head;
688 skb->len = skb_len;
689 }
690drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900691 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 return 0;
693
694ring_is_full:
695 po->stats.tp_drops++;
696 spin_unlock(&sk->sk_receive_queue.lock);
697
698 sk->sk_data_ready(sk, 0);
699 if (copy_skb)
700 kfree_skb(copy_skb);
701 goto drop_n_restore;
702}
703
704#endif
705
706
707static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
708 struct msghdr *msg, size_t len)
709{
710 struct sock *sk = sock->sk;
711 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
712 struct sk_buff *skb;
713 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800714 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 unsigned char *addr;
716 int ifindex, err, reserve = 0;
717
718 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900719 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900721
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 if (saddr == NULL) {
723 struct packet_sock *po = pkt_sk(sk);
724
725 ifindex = po->ifindex;
726 proto = po->num;
727 addr = NULL;
728 } else {
729 err = -EINVAL;
730 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
731 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700732 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
733 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 ifindex = saddr->sll_ifindex;
735 proto = saddr->sll_protocol;
736 addr = saddr->sll_addr;
737 }
738
739
740 dev = dev_get_by_index(ifindex);
741 err = -ENXIO;
742 if (dev == NULL)
743 goto out_unlock;
744 if (sock->type == SOCK_RAW)
745 reserve = dev->hard_header_len;
746
David S. Millerd5e76b02007-01-25 19:30:36 -0800747 err = -ENETDOWN;
748 if (!(dev->flags & IFF_UP))
749 goto out_unlock;
750
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 err = -EMSGSIZE;
752 if (len > dev->mtu+reserve)
753 goto out_unlock;
754
755 skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
756 msg->msg_flags & MSG_DONTWAIT, &err);
757 if (skb==NULL)
758 goto out_unlock;
759
760 skb_reserve(skb, LL_RESERVED_SPACE(dev));
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700761 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762
763 if (dev->hard_header) {
764 int res;
765 err = -EINVAL;
766 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
767 if (sock->type != SOCK_DGRAM) {
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -0700768 skb_reset_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 skb->len = 0;
770 } else if (res < 0)
771 goto out_free;
772 }
773
774 /* Returns -EFAULT on error */
775 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
776 if (err)
777 goto out_free;
778
779 skb->protocol = proto;
780 skb->dev = dev;
781 skb->priority = sk->sk_priority;
782
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783 /*
784 * Now send it
785 */
786
787 err = dev_queue_xmit(skb);
788 if (err > 0 && (err = net_xmit_errno(err)) != 0)
789 goto out_unlock;
790
791 dev_put(dev);
792
793 return(len);
794
795out_free:
796 kfree_skb(skb);
797out_unlock:
798 if (dev)
799 dev_put(dev);
800out:
801 return err;
802}
803
804/*
805 * Close a PACKET socket. This is fairly simple. We immediately go
806 * to 'closed' state and remove our protocol entry in the device list.
807 */
808
809static int packet_release(struct socket *sock)
810{
811 struct sock *sk = sock->sk;
812 struct packet_sock *po;
813
814 if (!sk)
815 return 0;
816
817 po = pkt_sk(sk);
818
819 write_lock_bh(&packet_sklist_lock);
820 sk_del_node_init(sk);
821 write_unlock_bh(&packet_sklist_lock);
822
823 /*
824 * Unhook packet receive handler.
825 */
826
827 if (po->running) {
828 /*
829 * Remove the protocol hook
830 */
831 dev_remove_pack(&po->prot_hook);
832 po->running = 0;
833 po->num = 0;
834 __sock_put(sk);
835 }
836
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838
839#ifdef CONFIG_PACKET_MMAP
840 if (po->pg_vec) {
841 struct tpacket_req req;
842 memset(&req, 0, sizeof(req));
843 packet_set_ring(sk, &req, 1);
844 }
845#endif
846
847 /*
848 * Now the socket is dead. No more input will appear.
849 */
850
851 sock_orphan(sk);
852 sock->sk = NULL;
853
854 /* Purge queues */
855
856 skb_queue_purge(&sk->sk_receive_queue);
857
858 sock_put(sk);
859 return 0;
860}
861
862/*
863 * Attach a packet hook.
864 */
865
Al Viro0e11c912006-11-08 00:26:29 -0800866static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867{
868 struct packet_sock *po = pkt_sk(sk);
869 /*
870 * Detach an existing hook if present.
871 */
872
873 lock_sock(sk);
874
875 spin_lock(&po->bind_lock);
876 if (po->running) {
877 __sock_put(sk);
878 po->running = 0;
879 po->num = 0;
880 spin_unlock(&po->bind_lock);
881 dev_remove_pack(&po->prot_hook);
882 spin_lock(&po->bind_lock);
883 }
884
885 po->num = protocol;
886 po->prot_hook.type = protocol;
887 po->prot_hook.dev = dev;
888
889 po->ifindex = dev ? dev->ifindex : 0;
890
891 if (protocol == 0)
892 goto out_unlock;
893
894 if (dev) {
895 if (dev->flags&IFF_UP) {
896 dev_add_pack(&po->prot_hook);
897 sock_hold(sk);
898 po->running = 1;
899 } else {
900 sk->sk_err = ENETDOWN;
901 if (!sock_flag(sk, SOCK_DEAD))
902 sk->sk_error_report(sk);
903 }
904 } else {
905 dev_add_pack(&po->prot_hook);
906 sock_hold(sk);
907 po->running = 1;
908 }
909
910out_unlock:
911 spin_unlock(&po->bind_lock);
912 release_sock(sk);
913 return 0;
914}
915
916/*
917 * Bind a packet socket to a device
918 */
919
920#ifdef CONFIG_SOCK_PACKET
921
922static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
923{
924 struct sock *sk=sock->sk;
925 char name[15];
926 struct net_device *dev;
927 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900928
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 /*
930 * Check legality
931 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900932
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800933 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 return -EINVAL;
935 strlcpy(name,uaddr->sa_data,sizeof(name));
936
937 dev = dev_get_by_name(name);
938 if (dev) {
939 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
940 dev_put(dev);
941 }
942 return err;
943}
944#endif
945
946static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
947{
948 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
949 struct sock *sk=sock->sk;
950 struct net_device *dev = NULL;
951 int err;
952
953
954 /*
955 * Check legality
956 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900957
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 if (addr_len < sizeof(struct sockaddr_ll))
959 return -EINVAL;
960 if (sll->sll_family != AF_PACKET)
961 return -EINVAL;
962
963 if (sll->sll_ifindex) {
964 err = -ENODEV;
965 dev = dev_get_by_index(sll->sll_ifindex);
966 if (dev == NULL)
967 goto out;
968 }
969 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
970 if (dev)
971 dev_put(dev);
972
973out:
974 return err;
975}
976
977static struct proto packet_proto = {
978 .name = "PACKET",
979 .owner = THIS_MODULE,
980 .obj_size = sizeof(struct packet_sock),
981};
982
983/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900984 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 */
986
987static int packet_create(struct socket *sock, int protocol)
988{
989 struct sock *sk;
990 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -0800991 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 int err;
993
994 if (!capable(CAP_NET_RAW))
995 return -EPERM;
996 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
997#ifdef CONFIG_SOCK_PACKET
998 && sock->type != SOCK_PACKET
999#endif
1000 )
1001 return -ESOCKTNOSUPPORT;
1002
1003 sock->state = SS_UNCONNECTED;
1004
1005 err = -ENOBUFS;
1006 sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1007 if (sk == NULL)
1008 goto out;
1009
1010 sock->ops = &packet_ops;
1011#ifdef CONFIG_SOCK_PACKET
1012 if (sock->type == SOCK_PACKET)
1013 sock->ops = &packet_ops_spkt;
1014#endif
1015 sock_init_data(sock, sk);
1016
1017 po = pkt_sk(sk);
1018 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001019 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020
1021 sk->sk_destruct = packet_sock_destruct;
1022 atomic_inc(&packet_socks_nr);
1023
1024 /*
1025 * Attach a protocol block
1026 */
1027
1028 spin_lock_init(&po->bind_lock);
1029 po->prot_hook.func = packet_rcv;
1030#ifdef CONFIG_SOCK_PACKET
1031 if (sock->type == SOCK_PACKET)
1032 po->prot_hook.func = packet_rcv_spkt;
1033#endif
1034 po->prot_hook.af_packet_priv = sk;
1035
Al Viro0e11c912006-11-08 00:26:29 -08001036 if (proto) {
1037 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 dev_add_pack(&po->prot_hook);
1039 sock_hold(sk);
1040 po->running = 1;
1041 }
1042
1043 write_lock_bh(&packet_sklist_lock);
1044 sk_add_node(sk, &packet_sklist);
1045 write_unlock_bh(&packet_sklist_lock);
1046 return(0);
1047out:
1048 return err;
1049}
1050
1051/*
1052 * Pull a packet from our receive queue and hand it to the user.
1053 * If necessary we block.
1054 */
1055
1056static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1057 struct msghdr *msg, size_t len, int flags)
1058{
1059 struct sock *sk = sock->sk;
1060 struct sk_buff *skb;
1061 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001062 struct sockaddr_ll *sll;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063
1064 err = -EINVAL;
1065 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1066 goto out;
1067
1068#if 0
1069 /* What error should we return now? EUNATTACH? */
1070 if (pkt_sk(sk)->ifindex < 0)
1071 return -ENODEV;
1072#endif
1073
1074 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 * Call the generic datagram receiver. This handles all sorts
1076 * of horrible races and re-entrancy so we can forget about it
1077 * in the protocol layers.
1078 *
1079 * Now it will return ENETDOWN, if device have just gone down,
1080 * but then it will block.
1081 */
1082
1083 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1084
1085 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001086 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 * handles the blocking we don't see and worry about blocking
1088 * retries.
1089 */
1090
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001091 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 goto out;
1093
1094 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001095 * If the address length field is there to be filled in, we fill
1096 * it in now.
1097 */
1098
Herbert Xuffbc6112007-02-04 23:33:10 -08001099 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001100 if (sock->type == SOCK_PACKET)
1101 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1102 else
1103 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1104
1105 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 * You lose any data beyond the buffer you gave. If it worries a
1107 * user program they can ask the device for its MTU anyway.
1108 */
1109
1110 copied = skb->len;
1111 if (copied > len)
1112 {
1113 copied=len;
1114 msg->msg_flags|=MSG_TRUNC;
1115 }
1116
1117 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1118 if (err)
1119 goto out_free;
1120
1121 sock_recv_timestamp(msg, sk, skb);
1122
1123 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001124 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1125 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126
Herbert Xu8dc41942007-02-04 23:31:32 -08001127 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001128 struct tpacket_auxdata aux;
1129
1130 aux.tp_status = TP_STATUS_USER;
1131 if (skb->ip_summed == CHECKSUM_PARTIAL)
1132 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1133 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1134 aux.tp_snaplen = skb->len;
1135 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001136 aux.tp_net = skb_network_offset(skb);
Herbert Xuffbc6112007-02-04 23:33:10 -08001137
1138 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001139 }
1140
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 /*
1142 * Free or return the buffer as appropriate. Again this
1143 * hides all the races and re-entrancy issues from us.
1144 */
1145 err = (flags&MSG_TRUNC) ? skb->len : copied;
1146
1147out_free:
1148 skb_free_datagram(sk, skb);
1149out:
1150 return err;
1151}
1152
1153#ifdef CONFIG_SOCK_PACKET
1154static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1155 int *uaddr_len, int peer)
1156{
1157 struct net_device *dev;
1158 struct sock *sk = sock->sk;
1159
1160 if (peer)
1161 return -EOPNOTSUPP;
1162
1163 uaddr->sa_family = AF_PACKET;
1164 dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1165 if (dev) {
1166 strlcpy(uaddr->sa_data, dev->name, 15);
1167 dev_put(dev);
1168 } else
1169 memset(uaddr->sa_data, 0, 14);
1170 *uaddr_len = sizeof(*uaddr);
1171
1172 return 0;
1173}
1174#endif
1175
1176static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1177 int *uaddr_len, int peer)
1178{
1179 struct net_device *dev;
1180 struct sock *sk = sock->sk;
1181 struct packet_sock *po = pkt_sk(sk);
1182 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1183
1184 if (peer)
1185 return -EOPNOTSUPP;
1186
1187 sll->sll_family = AF_PACKET;
1188 sll->sll_ifindex = po->ifindex;
1189 sll->sll_protocol = po->num;
1190 dev = dev_get_by_index(po->ifindex);
1191 if (dev) {
1192 sll->sll_hatype = dev->type;
1193 sll->sll_halen = dev->addr_len;
1194 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1195 dev_put(dev);
1196 } else {
1197 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1198 sll->sll_halen = 0;
1199 }
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001200 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201
1202 return 0;
1203}
1204
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1206{
1207 switch (i->type) {
1208 case PACKET_MR_MULTICAST:
1209 if (what > 0)
1210 dev_mc_add(dev, i->addr, i->alen, 0);
1211 else
1212 dev_mc_delete(dev, i->addr, i->alen, 0);
1213 break;
1214 case PACKET_MR_PROMISC:
1215 dev_set_promiscuity(dev, what);
1216 break;
1217 case PACKET_MR_ALLMULTI:
1218 dev_set_allmulti(dev, what);
1219 break;
1220 default:;
1221 }
1222}
1223
1224static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1225{
1226 for ( ; i; i=i->next) {
1227 if (i->ifindex == dev->ifindex)
1228 packet_dev_mc(dev, i, what);
1229 }
1230}
1231
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001232static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233{
1234 struct packet_sock *po = pkt_sk(sk);
1235 struct packet_mclist *ml, *i;
1236 struct net_device *dev;
1237 int err;
1238
1239 rtnl_lock();
1240
1241 err = -ENODEV;
1242 dev = __dev_get_by_index(mreq->mr_ifindex);
1243 if (!dev)
1244 goto done;
1245
1246 err = -EINVAL;
1247 if (mreq->mr_alen > dev->addr_len)
1248 goto done;
1249
1250 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001251 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 if (i == NULL)
1253 goto done;
1254
1255 err = 0;
1256 for (ml = po->mclist; ml; ml = ml->next) {
1257 if (ml->ifindex == mreq->mr_ifindex &&
1258 ml->type == mreq->mr_type &&
1259 ml->alen == mreq->mr_alen &&
1260 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1261 ml->count++;
1262 /* Free the new element ... */
1263 kfree(i);
1264 goto done;
1265 }
1266 }
1267
1268 i->type = mreq->mr_type;
1269 i->ifindex = mreq->mr_ifindex;
1270 i->alen = mreq->mr_alen;
1271 memcpy(i->addr, mreq->mr_address, i->alen);
1272 i->count = 1;
1273 i->next = po->mclist;
1274 po->mclist = i;
1275 packet_dev_mc(dev, i, +1);
1276
1277done:
1278 rtnl_unlock();
1279 return err;
1280}
1281
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001282static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283{
1284 struct packet_mclist *ml, **mlp;
1285
1286 rtnl_lock();
1287
1288 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1289 if (ml->ifindex == mreq->mr_ifindex &&
1290 ml->type == mreq->mr_type &&
1291 ml->alen == mreq->mr_alen &&
1292 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1293 if (--ml->count == 0) {
1294 struct net_device *dev;
1295 *mlp = ml->next;
1296 dev = dev_get_by_index(ml->ifindex);
1297 if (dev) {
1298 packet_dev_mc(dev, ml, -1);
1299 dev_put(dev);
1300 }
1301 kfree(ml);
1302 }
1303 rtnl_unlock();
1304 return 0;
1305 }
1306 }
1307 rtnl_unlock();
1308 return -EADDRNOTAVAIL;
1309}
1310
1311static void packet_flush_mclist(struct sock *sk)
1312{
1313 struct packet_sock *po = pkt_sk(sk);
1314 struct packet_mclist *ml;
1315
1316 if (!po->mclist)
1317 return;
1318
1319 rtnl_lock();
1320 while ((ml = po->mclist) != NULL) {
1321 struct net_device *dev;
1322
1323 po->mclist = ml->next;
1324 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1325 packet_dev_mc(dev, ml, -1);
1326 dev_put(dev);
1327 }
1328 kfree(ml);
1329 }
1330 rtnl_unlock();
1331}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
1333static int
1334packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1335{
1336 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001337 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 int ret;
1339
1340 if (level != SOL_PACKET)
1341 return -ENOPROTOOPT;
1342
1343 switch(optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001344 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 case PACKET_DROP_MEMBERSHIP:
1346 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001347 struct packet_mreq_max mreq;
1348 int len = optlen;
1349 memset(&mreq, 0, sizeof(mreq));
1350 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001352 if (len > sizeof(mreq))
1353 len = sizeof(mreq);
1354 if (copy_from_user(&mreq,optval,len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001356 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1357 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 if (optname == PACKET_ADD_MEMBERSHIP)
1359 ret = packet_mc_add(sk, &mreq);
1360 else
1361 ret = packet_mc_drop(sk, &mreq);
1362 return ret;
1363 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001364
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365#ifdef CONFIG_PACKET_MMAP
1366 case PACKET_RX_RING:
1367 {
1368 struct tpacket_req req;
1369
1370 if (optlen<sizeof(req))
1371 return -EINVAL;
1372 if (copy_from_user(&req,optval,sizeof(req)))
1373 return -EFAULT;
1374 return packet_set_ring(sk, &req, 0);
1375 }
1376 case PACKET_COPY_THRESH:
1377 {
1378 int val;
1379
1380 if (optlen!=sizeof(val))
1381 return -EINVAL;
1382 if (copy_from_user(&val,optval,sizeof(val)))
1383 return -EFAULT;
1384
1385 pkt_sk(sk)->copy_thresh = val;
1386 return 0;
1387 }
1388#endif
Herbert Xu8dc41942007-02-04 23:31:32 -08001389 case PACKET_AUXDATA:
1390 {
1391 int val;
1392
1393 if (optlen < sizeof(val))
1394 return -EINVAL;
1395 if (copy_from_user(&val, optval, sizeof(val)))
1396 return -EFAULT;
1397
1398 po->auxdata = !!val;
1399 return 0;
1400 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001401 case PACKET_ORIGDEV:
1402 {
1403 int val;
1404
1405 if (optlen < sizeof(val))
1406 return -EINVAL;
1407 if (copy_from_user(&val, optval, sizeof(val)))
1408 return -EFAULT;
1409
1410 po->origdev = !!val;
1411 return 0;
1412 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 default:
1414 return -ENOPROTOOPT;
1415 }
1416}
1417
1418static int packet_getsockopt(struct socket *sock, int level, int optname,
1419 char __user *optval, int __user *optlen)
1420{
1421 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08001422 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 struct sock *sk = sock->sk;
1424 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08001425 void *data;
1426 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427
1428 if (level != SOL_PACKET)
1429 return -ENOPROTOOPT;
1430
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001431 if (get_user(len, optlen))
1432 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433
1434 if (len < 0)
1435 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001436
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 switch(optname) {
1438 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 if (len > sizeof(struct tpacket_stats))
1440 len = sizeof(struct tpacket_stats);
1441 spin_lock_bh(&sk->sk_receive_queue.lock);
1442 st = po->stats;
1443 memset(&po->stats, 0, sizeof(st));
1444 spin_unlock_bh(&sk->sk_receive_queue.lock);
1445 st.tp_packets += st.tp_drops;
1446
Herbert Xu8dc41942007-02-04 23:31:32 -08001447 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08001449 case PACKET_AUXDATA:
1450 if (len > sizeof(int))
1451 len = sizeof(int);
1452 val = po->auxdata;
1453
1454 data = &val;
1455 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001456 case PACKET_ORIGDEV:
1457 if (len > sizeof(int))
1458 len = sizeof(int);
1459 val = po->origdev;
1460
1461 data = &val;
1462 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 default:
1464 return -ENOPROTOOPT;
1465 }
1466
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001467 if (put_user(len, optlen))
1468 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08001469 if (copy_to_user(optval, data, len))
1470 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001471 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472}
1473
1474
1475static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1476{
1477 struct sock *sk;
1478 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08001479 struct net_device *dev = data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480
1481 read_lock(&packet_sklist_lock);
1482 sk_for_each(sk, node, &packet_sklist) {
1483 struct packet_sock *po = pkt_sk(sk);
1484
1485 switch (msg) {
1486 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 if (po->mclist)
1488 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07001489 /* fallthrough */
1490
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 case NETDEV_DOWN:
1492 if (dev->ifindex == po->ifindex) {
1493 spin_lock(&po->bind_lock);
1494 if (po->running) {
1495 __dev_remove_pack(&po->prot_hook);
1496 __sock_put(sk);
1497 po->running = 0;
1498 sk->sk_err = ENETDOWN;
1499 if (!sock_flag(sk, SOCK_DEAD))
1500 sk->sk_error_report(sk);
1501 }
1502 if (msg == NETDEV_UNREGISTER) {
1503 po->ifindex = -1;
1504 po->prot_hook.dev = NULL;
1505 }
1506 spin_unlock(&po->bind_lock);
1507 }
1508 break;
1509 case NETDEV_UP:
1510 spin_lock(&po->bind_lock);
1511 if (dev->ifindex == po->ifindex && po->num &&
1512 !po->running) {
1513 dev_add_pack(&po->prot_hook);
1514 sock_hold(sk);
1515 po->running = 1;
1516 }
1517 spin_unlock(&po->bind_lock);
1518 break;
1519 }
1520 }
1521 read_unlock(&packet_sklist_lock);
1522 return NOTIFY_DONE;
1523}
1524
1525
1526static int packet_ioctl(struct socket *sock, unsigned int cmd,
1527 unsigned long arg)
1528{
1529 struct sock *sk = sock->sk;
1530
1531 switch(cmd) {
1532 case SIOCOUTQ:
1533 {
1534 int amount = atomic_read(&sk->sk_wmem_alloc);
1535 return put_user(amount, (int __user *)arg);
1536 }
1537 case SIOCINQ:
1538 {
1539 struct sk_buff *skb;
1540 int amount = 0;
1541
1542 spin_lock_bh(&sk->sk_receive_queue.lock);
1543 skb = skb_peek(&sk->sk_receive_queue);
1544 if (skb)
1545 amount = skb->len;
1546 spin_unlock_bh(&sk->sk_receive_queue.lock);
1547 return put_user(amount, (int __user *)arg);
1548 }
1549 case SIOCGSTAMP:
1550 return sock_get_timestamp(sk, (struct timeval __user *)arg);
Eric Dumazetae40eb12007-03-18 17:33:16 -07001551 case SIOCGSTAMPNS:
1552 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001553
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554#ifdef CONFIG_INET
1555 case SIOCADDRT:
1556 case SIOCDELRT:
1557 case SIOCDARP:
1558 case SIOCGARP:
1559 case SIOCSARP:
1560 case SIOCGIFADDR:
1561 case SIOCSIFADDR:
1562 case SIOCGIFBRDADDR:
1563 case SIOCSIFBRDADDR:
1564 case SIOCGIFNETMASK:
1565 case SIOCSIFNETMASK:
1566 case SIOCGIFDSTADDR:
1567 case SIOCSIFDSTADDR:
1568 case SIOCSIFFLAGS:
1569 return inet_dgram_ops.ioctl(sock, cmd, arg);
1570#endif
1571
1572 default:
Christoph Hellwigb5e5fa52006-01-03 14:18:33 -08001573 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 }
1575 return 0;
1576}
1577
1578#ifndef CONFIG_PACKET_MMAP
1579#define packet_mmap sock_no_mmap
1580#define packet_poll datagram_poll
1581#else
1582
1583static unsigned int packet_poll(struct file * file, struct socket *sock,
1584 poll_table *wait)
1585{
1586 struct sock *sk = sock->sk;
1587 struct packet_sock *po = pkt_sk(sk);
1588 unsigned int mask = datagram_poll(file, sock, wait);
1589
1590 spin_lock_bh(&sk->sk_receive_queue.lock);
1591 if (po->pg_vec) {
1592 unsigned last = po->head ? po->head-1 : po->frame_max;
1593 struct tpacket_hdr *h;
1594
Jason Lunzad930652007-02-20 23:19:54 -08001595 h = packet_lookup_frame(po, last);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596
1597 if (h->tp_status)
1598 mask |= POLLIN | POLLRDNORM;
1599 }
1600 spin_unlock_bh(&sk->sk_receive_queue.lock);
1601 return mask;
1602}
1603
1604
1605/* Dirty? Well, I still did not learn better way to account
1606 * for user mmaps.
1607 */
1608
1609static void packet_mm_open(struct vm_area_struct *vma)
1610{
1611 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001612 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001614
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 if (sk)
1616 atomic_inc(&pkt_sk(sk)->mapped);
1617}
1618
1619static void packet_mm_close(struct vm_area_struct *vma)
1620{
1621 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001622 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001624
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 if (sk)
1626 atomic_dec(&pkt_sk(sk)->mapped);
1627}
1628
1629static struct vm_operations_struct packet_mmap_ops = {
1630 .open = packet_mm_open,
1631 .close =packet_mm_close,
1632};
1633
1634static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1635{
1636 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1637}
1638
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001639static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640{
1641 int i;
1642
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001643 for (i = 0; i < len; i++) {
1644 if (likely(pg_vec[i]))
1645 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 }
1647 kfree(pg_vec);
1648}
1649
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001650static inline char *alloc_one_pg_vec_page(unsigned long order)
1651{
1652 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1653 order);
1654}
1655
1656static char **alloc_pg_vec(struct tpacket_req *req, int order)
1657{
1658 unsigned int block_nr = req->tp_block_nr;
1659 char **pg_vec;
1660 int i;
1661
1662 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1663 if (unlikely(!pg_vec))
1664 goto out;
1665
1666 for (i = 0; i < block_nr; i++) {
1667 pg_vec[i] = alloc_one_pg_vec_page(order);
1668 if (unlikely(!pg_vec[i]))
1669 goto out_free_pgvec;
1670 }
1671
1672out:
1673 return pg_vec;
1674
1675out_free_pgvec:
1676 free_pg_vec(pg_vec, order, block_nr);
1677 pg_vec = NULL;
1678 goto out;
1679}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680
1681static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1682{
1683 char **pg_vec = NULL;
1684 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08001685 int was_running, order = 0;
1686 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687 int err = 0;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001688
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689 if (req->tp_block_nr) {
1690 int i, l;
1691
1692 /* Sanity tests and some calculations */
1693
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001694 if (unlikely(po->pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 return -EBUSY;
1696
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001697 if (unlikely((int)req->tp_block_size <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001699 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001701 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001703 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 return -EINVAL;
1705
1706 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001707 if (unlikely(po->frames_per_block <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001709 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1710 req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712
1713 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001714 order = get_order(req->tp_block_size);
1715 pg_vec = alloc_pg_vec(req, order);
1716 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718
1719 l = 0;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001720 for (i = 0; i < req->tp_block_nr; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 char *ptr = pg_vec[i];
1722 struct tpacket_hdr *header;
1723 int k;
1724
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001725 for (k = 0; k < po->frames_per_block; k++) {
1726 header = (struct tpacket_hdr *) ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 header->tp_status = TP_STATUS_KERNEL;
1728 ptr += req->tp_frame_size;
1729 }
1730 }
1731 /* Done */
1732 } else {
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001733 if (unlikely(req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 return -EINVAL;
1735 }
1736
1737 lock_sock(sk);
1738
1739 /* Detach socket from network */
1740 spin_lock(&po->bind_lock);
1741 was_running = po->running;
1742 num = po->num;
1743 if (was_running) {
1744 __dev_remove_pack(&po->prot_hook);
1745 po->num = 0;
1746 po->running = 0;
1747 __sock_put(sk);
1748 }
1749 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001750
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 synchronize_net();
1752
1753 err = -EBUSY;
1754 if (closing || atomic_read(&po->mapped) == 0) {
1755 err = 0;
1756#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1757
1758 spin_lock_bh(&sk->sk_receive_queue.lock);
1759 pg_vec = XC(po->pg_vec, pg_vec);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001760 po->frame_max = (req->tp_frame_nr - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 po->head = 0;
1762 po->frame_size = req->tp_frame_size;
1763 spin_unlock_bh(&sk->sk_receive_queue.lock);
1764
1765 order = XC(po->pg_vec_order, order);
1766 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1767
1768 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1769 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1770 skb_queue_purge(&sk->sk_receive_queue);
1771#undef XC
1772 if (atomic_read(&po->mapped))
1773 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1774 }
1775
1776 spin_lock(&po->bind_lock);
1777 if (was_running && !po->running) {
1778 sock_hold(sk);
1779 po->running = 1;
1780 po->num = num;
1781 dev_add_pack(&po->prot_hook);
1782 }
1783 spin_unlock(&po->bind_lock);
1784
1785 release_sock(sk);
1786
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 if (pg_vec)
1788 free_pg_vec(pg_vec, order, req->tp_block_nr);
1789out:
1790 return err;
1791}
1792
1793static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1794{
1795 struct sock *sk = sock->sk;
1796 struct packet_sock *po = pkt_sk(sk);
1797 unsigned long size;
1798 unsigned long start;
1799 int err = -EINVAL;
1800 int i;
1801
1802 if (vma->vm_pgoff)
1803 return -EINVAL;
1804
1805 size = vma->vm_end - vma->vm_start;
1806
1807 lock_sock(sk);
1808 if (po->pg_vec == NULL)
1809 goto out;
1810 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1811 goto out;
1812
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 start = vma->vm_start;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001814 for (i = 0; i < po->pg_vec_len; i++) {
1815 struct page *page = virt_to_page(po->pg_vec[i]);
1816 int pg_num;
1817
1818 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1819 err = vm_insert_page(vma, start, page);
1820 if (unlikely(err))
1821 goto out;
1822 start += PAGE_SIZE;
1823 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001825 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 vma->vm_ops = &packet_mmap_ops;
1827 err = 0;
1828
1829out:
1830 release_sock(sk);
1831 return err;
1832}
1833#endif
1834
1835
1836#ifdef CONFIG_SOCK_PACKET
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001837static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838 .family = PF_PACKET,
1839 .owner = THIS_MODULE,
1840 .release = packet_release,
1841 .bind = packet_bind_spkt,
1842 .connect = sock_no_connect,
1843 .socketpair = sock_no_socketpair,
1844 .accept = sock_no_accept,
1845 .getname = packet_getname_spkt,
1846 .poll = datagram_poll,
1847 .ioctl = packet_ioctl,
1848 .listen = sock_no_listen,
1849 .shutdown = sock_no_shutdown,
1850 .setsockopt = sock_no_setsockopt,
1851 .getsockopt = sock_no_getsockopt,
1852 .sendmsg = packet_sendmsg_spkt,
1853 .recvmsg = packet_recvmsg,
1854 .mmap = sock_no_mmap,
1855 .sendpage = sock_no_sendpage,
1856};
1857#endif
1858
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001859static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 .family = PF_PACKET,
1861 .owner = THIS_MODULE,
1862 .release = packet_release,
1863 .bind = packet_bind,
1864 .connect = sock_no_connect,
1865 .socketpair = sock_no_socketpair,
1866 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001867 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 .poll = packet_poll,
1869 .ioctl = packet_ioctl,
1870 .listen = sock_no_listen,
1871 .shutdown = sock_no_shutdown,
1872 .setsockopt = packet_setsockopt,
1873 .getsockopt = packet_getsockopt,
1874 .sendmsg = packet_sendmsg,
1875 .recvmsg = packet_recvmsg,
1876 .mmap = packet_mmap,
1877 .sendpage = sock_no_sendpage,
1878};
1879
1880static struct net_proto_family packet_family_ops = {
1881 .family = PF_PACKET,
1882 .create = packet_create,
1883 .owner = THIS_MODULE,
1884};
1885
1886static struct notifier_block packet_netdev_notifier = {
1887 .notifier_call =packet_notifier,
1888};
1889
1890#ifdef CONFIG_PROC_FS
1891static inline struct sock *packet_seq_idx(loff_t off)
1892{
1893 struct sock *s;
1894 struct hlist_node *node;
1895
1896 sk_for_each(s, node, &packet_sklist) {
1897 if (!off--)
1898 return s;
1899 }
1900 return NULL;
1901}
1902
1903static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1904{
1905 read_lock(&packet_sklist_lock);
1906 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1907}
1908
1909static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1910{
1911 ++*pos;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001912 return (v == SEQ_START_TOKEN)
1913 ? sk_head(&packet_sklist)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 : sk_next((struct sock*)v) ;
1915}
1916
1917static void packet_seq_stop(struct seq_file *seq, void *v)
1918{
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001919 read_unlock(&packet_sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920}
1921
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001922static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923{
1924 if (v == SEQ_START_TOKEN)
1925 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
1926 else {
1927 struct sock *s = v;
1928 const struct packet_sock *po = pkt_sk(s);
1929
1930 seq_printf(seq,
1931 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1932 s,
1933 atomic_read(&s->sk_refcnt),
1934 s->sk_type,
1935 ntohs(po->num),
1936 po->ifindex,
1937 po->running,
1938 atomic_read(&s->sk_rmem_alloc),
1939 sock_i_uid(s),
1940 sock_i_ino(s) );
1941 }
1942
1943 return 0;
1944}
1945
1946static struct seq_operations packet_seq_ops = {
1947 .start = packet_seq_start,
1948 .next = packet_seq_next,
1949 .stop = packet_seq_stop,
1950 .show = packet_seq_show,
1951};
1952
1953static int packet_seq_open(struct inode *inode, struct file *file)
1954{
1955 return seq_open(file, &packet_seq_ops);
1956}
1957
Arjan van de Venda7071d2007-02-12 00:55:36 -08001958static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 .owner = THIS_MODULE,
1960 .open = packet_seq_open,
1961 .read = seq_read,
1962 .llseek = seq_lseek,
1963 .release = seq_release,
1964};
1965
1966#endif
1967
1968static void __exit packet_exit(void)
1969{
1970 proc_net_remove("packet");
1971 unregister_netdevice_notifier(&packet_netdev_notifier);
1972 sock_unregister(PF_PACKET);
1973 proto_unregister(&packet_proto);
1974}
1975
1976static int __init packet_init(void)
1977{
1978 int rc = proto_register(&packet_proto, 0);
1979
1980 if (rc != 0)
1981 goto out;
1982
1983 sock_register(&packet_family_ops);
1984 register_netdevice_notifier(&packet_netdev_notifier);
1985 proc_net_fops_create("packet", 0, &packet_seq_fops);
1986out:
1987 return rc;
1988}
1989
1990module_init(packet_init);
1991module_exit(packet_exit);
1992MODULE_LICENSE("GPL");
1993MODULE_ALIAS_NETPROTO(PF_PACKET);