blob: efc1174af7169a8eff2d7d89e576daf8c8e0e918 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090012 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090035 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070037 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090040 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070041 * and packet_mreq.
Johann Baudy69e3c752009-05-18 22:11:22 -070042 * Johann Baudy : Added TX RING.
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090050
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080053#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080061#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <linux/kmod.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020063#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <net/ip.h>
65#include <net/protocol.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/errno.h>
69#include <linux/timer.h>
70#include <asm/system.h>
71#include <asm/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040074#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070075#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
Herbert Xu905db442009-01-30 14:12:06 -080081#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070082
83#ifdef CONFIG_INET
84#include <net/inet_common.h>
85#endif
86
Linus Torvalds1da177e2005-04-16 15:20:36 -070087/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 Assumptions:
89 - if device has no dev->hard_header routine, it adds and removes ll header
90 inside itself. In this case ll header is invisible outside of device,
91 but higher levels still should reserve dev->hard_header_len.
92 Some devices are enough clever to reallocate skb, when header
93 will not fit to reserved space (tunnel), another ones are silly
94 (PPP).
95 - packet socket receives packets with pulled ll header,
96 so that SOCK_RAW should push it back.
97
98On receive:
99-----------
100
101Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700102 mac_header -> ll header
103 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104
105Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700106 mac_header -> ll header
107 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
109Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700110 mac_header -> UNKNOWN position. It is very likely, that it points to ll
111 header. PPP makes it, that is wrong, because introduce
YOSHIFUJI Hideakidb0c58f2007-07-19 10:44:35 +0900112 assymetry between rx and tx paths.
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700113 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700116 mac_header -> data. ll header is still not built!
117 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
119Resume
120 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
121
122
123On transmit:
124------------
125
126dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700127 mac_header -> ll header
128 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
130dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700131 mac_header -> data
132 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133
134 We should set nh.raw on output to correct posistion,
135 packet classifier depends on it.
136 */
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138/* Private packet socket structures. */
139
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000140struct packet_mclist {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 struct packet_mclist *next;
142 int ifindex;
143 int count;
144 unsigned short type;
145 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700146 unsigned char addr[MAX_ADDR_LEN];
147};
148/* identical to struct packet_mreq except it has
149 * a longer address field.
150 */
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000151struct packet_mreq_max {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700152 int mr_ifindex;
153 unsigned short mr_type;
154 unsigned short mr_alen;
155 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156};
David S. Millera2efcfa2007-05-29 13:12:50 -0700157
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158#ifdef CONFIG_PACKET_MMAP
Johann Baudy69e3c752009-05-18 22:11:22 -0700159static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
160 int closing, int tx_ring);
161
162struct packet_ring_buffer {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000163 char **pg_vec;
Johann Baudy69e3c752009-05-18 22:11:22 -0700164 unsigned int head;
165 unsigned int frames_per_block;
166 unsigned int frame_size;
167 unsigned int frame_max;
168
169 unsigned int pg_vec_order;
170 unsigned int pg_vec_pages;
171 unsigned int pg_vec_len;
172
173 atomic_t pending;
174};
175
176struct packet_sock;
177static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178#endif
179
180static void packet_flush_mclist(struct sock *sk);
181
182struct packet_sock {
183 /* struct sock has to be the first member of packet_sock */
184 struct sock sk;
185 struct tpacket_stats stats;
186#ifdef CONFIG_PACKET_MMAP
Johann Baudy69e3c752009-05-18 22:11:22 -0700187 struct packet_ring_buffer rx_ring;
188 struct packet_ring_buffer tx_ring;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 int copy_thresh;
190#endif
191 struct packet_type prot_hook;
192 spinlock_t bind_lock;
Herbert Xu905db442009-01-30 14:12:06 -0800193 struct mutex pg_vec_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800194 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700195 auxdata:1,
196 origdev:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800198 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200#ifdef CONFIG_PACKET_MMAP
201 atomic_t mapped;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700202 enum tpacket_versions tp_version;
203 unsigned int tp_hdrlen;
Patrick McHardy8913336a2008-07-18 18:05:19 -0700204 unsigned int tp_reserve;
Johann Baudy69e3c752009-05-18 22:11:22 -0700205 unsigned int tp_loss:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206#endif
207};
208
Herbert Xuffbc6112007-02-04 23:33:10 -0800209struct packet_skb_cb {
210 unsigned int origlen;
211 union {
212 struct sockaddr_pkt pkt;
213 struct sockaddr_ll ll;
214 } sa;
215};
216
217#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800218
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219#ifdef CONFIG_PACKET_MMAP
220
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700221static void __packet_set_status(struct packet_sock *po, void *frame, int status)
222{
223 union {
224 struct tpacket_hdr *h1;
225 struct tpacket2_hdr *h2;
226 void *raw;
227 } h;
228
229 h.raw = frame;
230 switch (po->tp_version) {
231 case TPACKET_V1:
232 h.h1->tp_status = status;
Johann Baudy69e3c752009-05-18 22:11:22 -0700233 flush_dcache_page(virt_to_page(&h.h1->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700234 break;
235 case TPACKET_V2:
236 h.h2->tp_status = status;
Johann Baudy69e3c752009-05-18 22:11:22 -0700237 flush_dcache_page(virt_to_page(&h.h2->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700238 break;
Johann Baudy69e3c752009-05-18 22:11:22 -0700239 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000240 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700241 BUG();
242 }
243
244 smp_wmb();
245}
246
247static int __packet_get_status(struct packet_sock *po, void *frame)
248{
249 union {
250 struct tpacket_hdr *h1;
251 struct tpacket2_hdr *h2;
252 void *raw;
253 } h;
254
255 smp_rmb();
256
257 h.raw = frame;
258 switch (po->tp_version) {
259 case TPACKET_V1:
260 flush_dcache_page(virt_to_page(&h.h1->tp_status));
261 return h.h1->tp_status;
262 case TPACKET_V2:
263 flush_dcache_page(virt_to_page(&h.h2->tp_status));
264 return h.h2->tp_status;
265 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000266 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700267 BUG();
268 return 0;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700269 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270}
Johann Baudy69e3c752009-05-18 22:11:22 -0700271
272static void *packet_lookup_frame(struct packet_sock *po,
273 struct packet_ring_buffer *rb,
274 unsigned int position,
275 int status)
276{
277 unsigned int pg_vec_pos, frame_offset;
278 union {
279 struct tpacket_hdr *h1;
280 struct tpacket2_hdr *h2;
281 void *raw;
282 } h;
283
284 pg_vec_pos = position / rb->frames_per_block;
285 frame_offset = position % rb->frames_per_block;
286
287 h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
288
289 if (status != __packet_get_status(po, h.raw))
290 return NULL;
291
292 return h.raw;
293}
294
295static inline void *packet_current_frame(struct packet_sock *po,
296 struct packet_ring_buffer *rb,
297 int status)
298{
299 return packet_lookup_frame(po, rb, rb->head, status);
300}
301
302static inline void *packet_previous_frame(struct packet_sock *po,
303 struct packet_ring_buffer *rb,
304 int status)
305{
306 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
307 return packet_lookup_frame(po, rb, previous, status);
308}
309
310static inline void packet_increment_head(struct packet_ring_buffer *buff)
311{
312 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
313}
314
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315#endif
316
317static inline struct packet_sock *pkt_sk(struct sock *sk)
318{
319 return (struct packet_sock *)sk;
320}
321
322static void packet_sock_destruct(struct sock *sk)
323{
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700324 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
325 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326
327 if (!sock_flag(sk, SOCK_DEAD)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000328 pr_err("Attempt to release alive packet socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 return;
330 }
331
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800332 sk_refcnt_debug_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333}
334
335
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800336static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800338static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000340static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
341 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342{
343 struct sock *sk;
344 struct sockaddr_pkt *spkt;
345
346 /*
347 * When we registered the protocol we saved the socket in the data
348 * field for just this event.
349 */
350
351 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900352
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 /*
354 * Yank back the headers [hope the device set this
355 * right or kerboom...]
356 *
357 * Incoming packets have ll header pulled,
358 * push it back.
359 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700360 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 * so that this procedure is noop.
362 */
363
364 if (skb->pkt_type == PACKET_LOOPBACK)
365 goto out;
366
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900367 if (dev_net(dev) != sock_net(sk))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800368 goto out;
369
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000370 skb = skb_share_check(skb, GFP_ATOMIC);
371 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 goto oom;
373
374 /* drop any routing info */
Eric Dumazetadf30902009-06-02 05:19:30 +0000375 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376
Phil Oester84531c22005-07-12 11:57:52 -0700377 /* drop conntrack reference */
378 nf_reset(skb);
379
Herbert Xuffbc6112007-02-04 23:33:10 -0800380 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700382 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383
384 /*
385 * The SOCK_PACKET socket receives _all_ frames.
386 */
387
388 spkt->spkt_family = dev->type;
389 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
390 spkt->spkt_protocol = skb->protocol;
391
392 /*
393 * Charge the memory to the socket. This is done specifically
394 * to prevent sockets using all the memory up.
395 */
396
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000397 if (sock_queue_rcv_skb(sk, skb) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 return 0;
399
400out:
401 kfree_skb(skb);
402oom:
403 return 0;
404}
405
406
407/*
408 * Output a raw packet to a device layer. This bypasses all the other
409 * protocol layers and you must therefore supply it with a complete frame
410 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900411
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
413 struct msghdr *msg, size_t len)
414{
415 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000416 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 struct sk_buff *skb;
418 struct net_device *dev;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000419 __be16 proto = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900421
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900423 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 */
425
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000426 if (saddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 if (msg->msg_namelen < sizeof(struct sockaddr))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000428 return -EINVAL;
429 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
430 proto = saddr->spkt_protocol;
431 } else
432 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
434 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900435 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 */
437
438 saddr->spkt_device[13] = 0;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900439 dev = dev_get_by_name(sock_net(sk), saddr->spkt_device);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 err = -ENODEV;
441 if (dev == NULL)
442 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900443
David S. Millerd5e76b02007-01-25 19:30:36 -0800444 err = -ENETDOWN;
445 if (!(dev->flags & IFF_UP))
446 goto out_unlock;
447
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 /*
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000449 * You may not queue a frame bigger than the mtu. This is the lowest level
450 * raw protocol and you must do your own fragmentation at this level.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900452
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800454 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 goto out_unlock;
456
457 err = -ENOBUFS;
458 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
459
460 /*
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000461 * If the write buffer is full, then tough. At this level the user
462 * gets to deal with the problem - do your own algorithmic backoffs.
463 * That's far more flexible.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900465
466 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 goto out_unlock;
468
469 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900470 * Fill it in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900472
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 /* FIXME: Save some space for broken drivers that write a
474 * hard header at transmission time by themselves. PPP is the
475 * notable one here. This should really be fixed at the driver level.
476 */
477 skb_reserve(skb, LL_RESERVED_SPACE(dev));
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700478 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479
480 /* Try to align data part correctly */
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700481 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 skb->data -= dev->hard_header_len;
483 skb->tail -= dev->hard_header_len;
484 if (len < dev->hard_header_len)
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700485 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 }
487
488 /* Returns -EFAULT on error */
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000489 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 skb->protocol = proto;
491 skb->dev = dev;
492 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000493 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 if (err)
495 goto out_free;
496
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497 /*
498 * Now send it
499 */
500
501 dev_queue_xmit(skb);
502 dev_put(dev);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000503 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504
505out_free:
506 kfree_skb(skb);
507out_unlock:
508 if (dev)
509 dev_put(dev);
510 return err;
511}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
David S. Millerdbcb5852007-01-24 15:21:02 -0800513static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
514 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515{
516 struct sk_filter *filter;
517
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700518 rcu_read_lock_bh();
519 filter = rcu_dereference(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800520 if (filter != NULL)
521 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700522 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523
David S. Millerdbcb5852007-01-24 15:21:02 -0800524 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525}
526
527/*
Neil Horman97775002009-10-02 06:56:41 +0000528 * If we've lost frames since the last time we queued one to the
529 * sk_receive_queue, we need to record it here.
530 * This must be called under the protection of the socket lock
531 * to prevent racing with other softirqs and user space
532 */
533static inline void record_packet_gap(struct sk_buff *skb,
534 struct packet_sock *po)
535{
536 /*
537 * We overload the mark field here, since we're about
538 * to enqueue to a receive queue and no body else will
539 * use this field at this point
540 */
541 skb->mark = po->stats.tp_gap;
542 po->stats.tp_gap = 0;
543 return;
544
545}
546
547static inline __u32 check_packet_gap(struct sk_buff *skb)
548{
549 return skb->mark;
550}
551
552/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 This function makes lazy skb cloning in hope that most of packets
554 are discarded by BPF.
555
556 Note tricky part: we DO mangle shared skb! skb->data, skb->len
557 and skb->cb are mangled. It works because (and until) packets
558 falling here are owned by current CPU. Output packets are cloned
559 by dev_queue_xmit_nit(), input packets are processed by net_bh
560 sequencially, so that if we return skb to original state on exit,
561 we will not harm anyone.
562 */
563
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000564static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
565 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566{
567 struct sock *sk;
568 struct sockaddr_ll *sll;
569 struct packet_sock *po;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000570 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800572 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573
574 if (skb->pkt_type == PACKET_LOOPBACK)
575 goto drop;
576
577 sk = pt->af_packet_priv;
578 po = pkt_sk(sk);
579
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900580 if (dev_net(dev) != sock_net(sk))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800581 goto drop;
582
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 skb->dev = dev;
584
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700585 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 /* The device has an explicit notion of ll header,
587 exported to higher levels.
588
589 Otherwise, the device hides datails of it frame
590 structure, so that corresponding packet head
591 never delivered to user.
592 */
593 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700594 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 else if (skb->pkt_type == PACKET_OUTGOING) {
596 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300597 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 }
599 }
600
601 snaplen = skb->len;
602
David S. Millerdbcb5852007-01-24 15:21:02 -0800603 res = run_filter(skb, sk, snaplen);
604 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700605 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800606 if (snaplen > res)
607 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
610 (unsigned)sk->sk_rcvbuf)
611 goto drop_n_acct;
612
613 if (skb_shared(skb)) {
614 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
615 if (nskb == NULL)
616 goto drop_n_acct;
617
618 if (skb_head != skb->data) {
619 skb->data = skb_head;
620 skb->len = skb_len;
621 }
622 kfree_skb(skb);
623 skb = nskb;
624 }
625
Herbert Xuffbc6112007-02-04 23:33:10 -0800626 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
627 sizeof(skb->cb));
628
629 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 sll->sll_family = AF_PACKET;
631 sll->sll_hatype = dev->type;
632 sll->sll_protocol = skb->protocol;
633 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800634 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700635 sll->sll_ifindex = orig_dev->ifindex;
636 else
637 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700639 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
Herbert Xuffbc6112007-02-04 23:33:10 -0800641 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800642
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 if (pskb_trim(skb, snaplen))
644 goto drop_n_acct;
645
646 skb_set_owner_r(skb, sk);
647 skb->dev = NULL;
Eric Dumazetadf30902009-06-02 05:19:30 +0000648 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649
Phil Oester84531c22005-07-12 11:57:52 -0700650 /* drop conntrack reference */
651 nf_reset(skb);
652
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 spin_lock(&sk->sk_receive_queue.lock);
654 po->stats.tp_packets++;
Neil Horman97775002009-10-02 06:56:41 +0000655 record_packet_gap(skb, po);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 __skb_queue_tail(&sk->sk_receive_queue, skb);
657 spin_unlock(&sk->sk_receive_queue.lock);
658 sk->sk_data_ready(sk, skb->len);
659 return 0;
660
661drop_n_acct:
662 spin_lock(&sk->sk_receive_queue.lock);
663 po->stats.tp_drops++;
Neil Horman97775002009-10-02 06:56:41 +0000664 po->stats.tp_gap++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 spin_unlock(&sk->sk_receive_queue.lock);
666
667drop_n_restore:
668 if (skb_head != skb->data && skb_shared(skb)) {
669 skb->data = skb_head;
670 skb->len = skb_len;
671 }
672drop:
Neil Hormanead2ceb2009-03-11 09:49:55 +0000673 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 return 0;
675}
676
677#ifdef CONFIG_PACKET_MMAP
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000678static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
679 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680{
681 struct sock *sk;
682 struct packet_sock *po;
683 struct sockaddr_ll *sll;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700684 union {
685 struct tpacket_hdr *h1;
686 struct tpacket2_hdr *h2;
687 void *raw;
688 } h;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000689 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800691 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700693 unsigned short macoff, netoff, hdrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700695 struct timeval tv;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700696 struct timespec ts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
698 if (skb->pkt_type == PACKET_LOOPBACK)
699 goto drop;
700
701 sk = pt->af_packet_priv;
702 po = pkt_sk(sk);
703
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900704 if (dev_net(dev) != sock_net(sk))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800705 goto drop;
706
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700707 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700709 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 else if (skb->pkt_type == PACKET_OUTGOING) {
711 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300712 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 }
714 }
715
Herbert Xu8dc41942007-02-04 23:31:32 -0800716 if (skb->ip_summed == CHECKSUM_PARTIAL)
717 status |= TP_STATUS_CSUMNOTREADY;
718
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 snaplen = skb->len;
720
David S. Millerdbcb5852007-01-24 15:21:02 -0800721 res = run_filter(skb, sk, snaplen);
722 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700723 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800724 if (snaplen > res)
725 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
727 if (sk->sk_type == SOCK_DGRAM) {
Patrick McHardy8913336a2008-07-18 18:05:19 -0700728 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
729 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300731 unsigned maclen = skb_network_offset(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700732 netoff = TPACKET_ALIGN(po->tp_hdrlen +
Patrick McHardy8913336a2008-07-18 18:05:19 -0700733 (maclen < 16 ? 16 : maclen)) +
734 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 macoff = netoff - maclen;
736 }
737
Johann Baudy69e3c752009-05-18 22:11:22 -0700738 if (macoff + snaplen > po->rx_ring.frame_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 if (po->copy_thresh &&
740 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
741 (unsigned)sk->sk_rcvbuf) {
742 if (skb_shared(skb)) {
743 copy_skb = skb_clone(skb, GFP_ATOMIC);
744 } else {
745 copy_skb = skb_get(skb);
746 skb_head = skb->data;
747 }
748 if (copy_skb)
749 skb_set_owner_r(copy_skb, sk);
750 }
Johann Baudy69e3c752009-05-18 22:11:22 -0700751 snaplen = po->rx_ring.frame_size - macoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 if ((int)snaplen < 0)
753 snaplen = 0;
754 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755
756 spin_lock(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -0700757 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700758 if (!h.raw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 goto ring_is_full;
Johann Baudy69e3c752009-05-18 22:11:22 -0700760 packet_increment_head(&po->rx_ring);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 po->stats.tp_packets++;
762 if (copy_skb) {
763 status |= TP_STATUS_COPY;
764 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
765 }
766 if (!po->stats.tp_drops)
767 status &= ~TP_STATUS_LOSING;
768 spin_unlock(&sk->sk_receive_queue.lock);
769
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700770 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700772 switch (po->tp_version) {
773 case TPACKET_V1:
774 h.h1->tp_len = skb->len;
775 h.h1->tp_snaplen = snaplen;
776 h.h1->tp_mac = macoff;
777 h.h1->tp_net = netoff;
778 if (skb->tstamp.tv64)
779 tv = ktime_to_timeval(skb->tstamp);
780 else
781 do_gettimeofday(&tv);
782 h.h1->tp_sec = tv.tv_sec;
783 h.h1->tp_usec = tv.tv_usec;
784 hdrlen = sizeof(*h.h1);
785 break;
786 case TPACKET_V2:
787 h.h2->tp_len = skb->len;
788 h.h2->tp_snaplen = snaplen;
789 h.h2->tp_mac = macoff;
790 h.h2->tp_net = netoff;
791 if (skb->tstamp.tv64)
792 ts = ktime_to_timespec(skb->tstamp);
793 else
794 getnstimeofday(&ts);
795 h.h2->tp_sec = ts.tv_sec;
796 h.h2->tp_nsec = ts.tv_nsec;
Patrick McHardy393e52e2008-07-14 22:50:39 -0700797 h.h2->tp_vlan_tci = skb->vlan_tci;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700798 hdrlen = sizeof(*h.h2);
799 break;
800 default:
801 BUG();
802 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700804 sll = h.raw + TPACKET_ALIGN(hdrlen);
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700805 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 sll->sll_family = AF_PACKET;
807 sll->sll_hatype = dev->type;
808 sll->sll_protocol = skb->protocol;
809 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800810 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700811 sll->sll_ifindex = orig_dev->ifindex;
812 else
813 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700815 __packet_set_status(po, h.raw, status);
Ralf Baechlee16aa202006-12-07 00:11:33 -0800816 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 {
818 struct page *p_start, *p_end;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700819 u8 *h_end = h.raw + macoff + snaplen - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700821 p_start = virt_to_page(h.raw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 p_end = virt_to_page(h_end);
823 while (p_start <= p_end) {
824 flush_dcache_page(p_start);
825 p_start++;
826 }
827 }
828
829 sk->sk_data_ready(sk, 0);
830
831drop_n_restore:
832 if (skb_head != skb->data && skb_shared(skb)) {
833 skb->data = skb_head;
834 skb->len = skb_len;
835 }
836drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900837 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 return 0;
839
840ring_is_full:
841 po->stats.tp_drops++;
Neil Horman97775002009-10-02 06:56:41 +0000842 po->stats.tp_gap++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 spin_unlock(&sk->sk_receive_queue.lock);
844
845 sk->sk_data_ready(sk, 0);
Wei Yongjunacb5d752009-02-25 00:36:42 +0000846 kfree_skb(copy_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 goto drop_n_restore;
848}
849
Johann Baudy69e3c752009-05-18 22:11:22 -0700850static void tpacket_destruct_skb(struct sk_buff *skb)
851{
852 struct packet_sock *po = pkt_sk(skb->sk);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000853 void *ph;
Johann Baudy69e3c752009-05-18 22:11:22 -0700854
855 BUG_ON(skb == NULL);
856
857 if (likely(po->tx_ring.pg_vec)) {
858 ph = skb_shinfo(skb)->destructor_arg;
859 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
860 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
861 atomic_dec(&po->tx_ring.pending);
862 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
863 }
864
865 sock_wfree(skb);
866}
867
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000868static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
869 void *frame, struct net_device *dev, int size_max,
870 __be16 proto, unsigned char *addr)
Johann Baudy69e3c752009-05-18 22:11:22 -0700871{
872 union {
873 struct tpacket_hdr *h1;
874 struct tpacket2_hdr *h2;
875 void *raw;
876 } ph;
877 int to_write, offset, len, tp_len, nr_frags, len_max;
878 struct socket *sock = po->sk.sk_socket;
879 struct page *page;
880 void *data;
881 int err;
882
883 ph.raw = frame;
884
885 skb->protocol = proto;
886 skb->dev = dev;
887 skb->priority = po->sk.sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000888 skb->mark = po->sk.sk_mark;
Johann Baudy69e3c752009-05-18 22:11:22 -0700889 skb_shinfo(skb)->destructor_arg = ph.raw;
890
891 switch (po->tp_version) {
892 case TPACKET_V2:
893 tp_len = ph.h2->tp_len;
894 break;
895 default:
896 tp_len = ph.h1->tp_len;
897 break;
898 }
899 if (unlikely(tp_len > size_max)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000900 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
Johann Baudy69e3c752009-05-18 22:11:22 -0700901 return -EMSGSIZE;
902 }
903
904 skb_reserve(skb, LL_RESERVED_SPACE(dev));
905 skb_reset_network_header(skb);
906
907 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
908 to_write = tp_len;
909
910 if (sock->type == SOCK_DGRAM) {
911 err = dev_hard_header(skb, dev, ntohs(proto), addr,
912 NULL, tp_len);
913 if (unlikely(err < 0))
914 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000915 } else if (dev->hard_header_len) {
Johann Baudy69e3c752009-05-18 22:11:22 -0700916 /* net device doesn't like empty head */
917 if (unlikely(tp_len <= dev->hard_header_len)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000918 pr_err("packet size is too short (%d < %d)\n",
919 tp_len, dev->hard_header_len);
Johann Baudy69e3c752009-05-18 22:11:22 -0700920 return -EINVAL;
921 }
922
923 skb_push(skb, dev->hard_header_len);
924 err = skb_store_bits(skb, 0, data,
925 dev->hard_header_len);
926 if (unlikely(err))
927 return err;
928
929 data += dev->hard_header_len;
930 to_write -= dev->hard_header_len;
931 }
932
933 err = -EFAULT;
934 page = virt_to_page(data);
935 offset = offset_in_page(data);
936 len_max = PAGE_SIZE - offset;
937 len = ((to_write > len_max) ? len_max : to_write);
938
939 skb->data_len = to_write;
940 skb->len += to_write;
941 skb->truesize += to_write;
942 atomic_add(to_write, &po->sk.sk_wmem_alloc);
943
944 while (likely(to_write)) {
945 nr_frags = skb_shinfo(skb)->nr_frags;
946
947 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000948 pr_err("Packet exceed the number of skb frags(%lu)\n",
949 MAX_SKB_FRAGS);
Johann Baudy69e3c752009-05-18 22:11:22 -0700950 return -EFAULT;
951 }
952
953 flush_dcache_page(page);
954 get_page(page);
955 skb_fill_page_desc(skb,
956 nr_frags,
957 page++, offset, len);
958 to_write -= len;
959 offset = 0;
960 len_max = PAGE_SIZE;
961 len = ((to_write > len_max) ? len_max : to_write);
962 }
963
964 return tp_len;
965}
966
967static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
968{
969 struct socket *sock;
970 struct sk_buff *skb;
971 struct net_device *dev;
972 __be16 proto;
973 int ifindex, err, reserve = 0;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000974 void *ph;
975 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Johann Baudy69e3c752009-05-18 22:11:22 -0700976 int tp_len, size_max;
977 unsigned char *addr;
978 int len_sum = 0;
979 int status = 0;
980
981 sock = po->sk.sk_socket;
982
983 mutex_lock(&po->pg_vec_lock);
984
985 err = -EBUSY;
986 if (saddr == NULL) {
987 ifindex = po->ifindex;
988 proto = po->num;
989 addr = NULL;
990 } else {
991 err = -EINVAL;
992 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
993 goto out;
994 if (msg->msg_namelen < (saddr->sll_halen
995 + offsetof(struct sockaddr_ll,
996 sll_addr)))
997 goto out;
998 ifindex = saddr->sll_ifindex;
999 proto = saddr->sll_protocol;
1000 addr = saddr->sll_addr;
1001 }
1002
1003 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
1004 err = -ENXIO;
1005 if (unlikely(dev == NULL))
1006 goto out;
1007
1008 reserve = dev->hard_header_len;
1009
1010 err = -ENETDOWN;
1011 if (unlikely(!(dev->flags & IFF_UP)))
1012 goto out_put;
1013
1014 size_max = po->tx_ring.frame_size
1015 - sizeof(struct skb_shared_info)
1016 - po->tp_hdrlen
1017 - LL_ALLOCATED_SPACE(dev)
1018 - sizeof(struct sockaddr_ll);
1019
1020 if (size_max > dev->mtu + reserve)
1021 size_max = dev->mtu + reserve;
1022
1023 do {
1024 ph = packet_current_frame(po, &po->tx_ring,
1025 TP_STATUS_SEND_REQUEST);
1026
1027 if (unlikely(ph == NULL)) {
1028 schedule();
1029 continue;
1030 }
1031
1032 status = TP_STATUS_SEND_REQUEST;
1033 skb = sock_alloc_send_skb(&po->sk,
1034 LL_ALLOCATED_SPACE(dev)
1035 + sizeof(struct sockaddr_ll),
1036 0, &err);
1037
1038 if (unlikely(skb == NULL))
1039 goto out_status;
1040
1041 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1042 addr);
1043
1044 if (unlikely(tp_len < 0)) {
1045 if (po->tp_loss) {
1046 __packet_set_status(po, ph,
1047 TP_STATUS_AVAILABLE);
1048 packet_increment_head(&po->tx_ring);
1049 kfree_skb(skb);
1050 continue;
1051 } else {
1052 status = TP_STATUS_WRONG_FORMAT;
1053 err = tp_len;
1054 goto out_status;
1055 }
1056 }
1057
1058 skb->destructor = tpacket_destruct_skb;
1059 __packet_set_status(po, ph, TP_STATUS_SENDING);
1060 atomic_inc(&po->tx_ring.pending);
1061
1062 status = TP_STATUS_SEND_REQUEST;
1063 err = dev_queue_xmit(skb);
1064 if (unlikely(err > 0 && (err = net_xmit_errno(err)) != 0))
1065 goto out_xmit;
1066 packet_increment_head(&po->tx_ring);
1067 len_sum += tp_len;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001068 } while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT))
Johann Baudy69e3c752009-05-18 22:11:22 -07001069 && (atomic_read(&po->tx_ring.pending))))
1070 );
1071
1072 err = len_sum;
1073 goto out_put;
1074
1075out_xmit:
1076 skb->destructor = sock_wfree;
1077 atomic_dec(&po->tx_ring.pending);
1078out_status:
1079 __packet_set_status(po, ph, status);
1080 kfree_skb(skb);
1081out_put:
1082 dev_put(dev);
1083out:
1084 mutex_unlock(&po->pg_vec_lock);
1085 return err;
1086}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087#endif
1088
Johann Baudy69e3c752009-05-18 22:11:22 -07001089static int packet_snd(struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 struct msghdr *msg, size_t len)
1091{
1092 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001093 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 struct sk_buff *skb;
1095 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -08001096 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097 unsigned char *addr;
1098 int ifindex, err, reserve = 0;
1099
1100 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001101 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001103
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 if (saddr == NULL) {
1105 struct packet_sock *po = pkt_sk(sk);
1106
1107 ifindex = po->ifindex;
1108 proto = po->num;
1109 addr = NULL;
1110 } else {
1111 err = -EINVAL;
1112 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1113 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001114 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1115 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 ifindex = saddr->sll_ifindex;
1117 proto = saddr->sll_protocol;
1118 addr = saddr->sll_addr;
1119 }
1120
1121
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001122 dev = dev_get_by_index(sock_net(sk), ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 err = -ENXIO;
1124 if (dev == NULL)
1125 goto out_unlock;
1126 if (sock->type == SOCK_RAW)
1127 reserve = dev->hard_header_len;
1128
David S. Millerd5e76b02007-01-25 19:30:36 -08001129 err = -ENETDOWN;
1130 if (!(dev->flags & IFF_UP))
1131 goto out_unlock;
1132
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 err = -EMSGSIZE;
1134 if (len > dev->mtu+reserve)
1135 goto out_unlock;
1136
Johannes Bergf5184d22008-05-12 20:48:31 -07001137 skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 msg->msg_flags & MSG_DONTWAIT, &err);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001139 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 goto out_unlock;
1141
1142 skb_reserve(skb, LL_RESERVED_SPACE(dev));
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001143 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001145 err = -EINVAL;
1146 if (sock->type == SOCK_DGRAM &&
1147 dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
1148 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149
1150 /* Returns -EFAULT on error */
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001151 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 if (err)
1153 goto out_free;
1154
1155 skb->protocol = proto;
1156 skb->dev = dev;
1157 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +00001158 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 /*
1161 * Now send it
1162 */
1163
1164 err = dev_queue_xmit(skb);
1165 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1166 goto out_unlock;
1167
1168 dev_put(dev);
1169
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001170 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
1172out_free:
1173 kfree_skb(skb);
1174out_unlock:
1175 if (dev)
1176 dev_put(dev);
1177out:
1178 return err;
1179}
1180
Johann Baudy69e3c752009-05-18 22:11:22 -07001181static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1182 struct msghdr *msg, size_t len)
1183{
1184#ifdef CONFIG_PACKET_MMAP
1185 struct sock *sk = sock->sk;
1186 struct packet_sock *po = pkt_sk(sk);
1187 if (po->tx_ring.pg_vec)
1188 return tpacket_snd(po, msg);
1189 else
1190#endif
1191 return packet_snd(sock, msg, len);
1192}
1193
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194/*
1195 * Close a PACKET socket. This is fairly simple. We immediately go
1196 * to 'closed' state and remove our protocol entry in the device list.
1197 */
1198
1199static int packet_release(struct socket *sock)
1200{
1201 struct sock *sk = sock->sk;
1202 struct packet_sock *po;
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001203 struct net *net;
Johann Baudy69e3c752009-05-18 22:11:22 -07001204#ifdef CONFIG_PACKET_MMAP
1205 struct tpacket_req req;
1206#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207
1208 if (!sk)
1209 return 0;
1210
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001211 net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 po = pkt_sk(sk);
1213
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001214 write_lock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 sk_del_node_init(sk);
Eric Dumazet920de802008-11-24 00:09:29 -08001216 sock_prot_inuse_add(net, sk->sk_prot, -1);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001217 write_unlock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218
1219 /*
1220 * Unhook packet receive handler.
1221 */
1222
1223 if (po->running) {
1224 /*
1225 * Remove the protocol hook
1226 */
1227 dev_remove_pack(&po->prot_hook);
1228 po->running = 0;
1229 po->num = 0;
1230 __sock_put(sk);
1231 }
1232
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
1235#ifdef CONFIG_PACKET_MMAP
Johann Baudy69e3c752009-05-18 22:11:22 -07001236 memset(&req, 0, sizeof(req));
1237
1238 if (po->rx_ring.pg_vec)
1239 packet_set_ring(sk, &req, 1, 0);
1240
1241 if (po->tx_ring.pg_vec)
1242 packet_set_ring(sk, &req, 1, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243#endif
1244
1245 /*
1246 * Now the socket is dead. No more input will appear.
1247 */
1248
1249 sock_orphan(sk);
1250 sock->sk = NULL;
1251
1252 /* Purge queues */
1253
1254 skb_queue_purge(&sk->sk_receive_queue);
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001255 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
1257 sock_put(sk);
1258 return 0;
1259}
1260
1261/*
1262 * Attach a packet hook.
1263 */
1264
Al Viro0e11c912006-11-08 00:26:29 -08001265static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266{
1267 struct packet_sock *po = pkt_sk(sk);
1268 /*
1269 * Detach an existing hook if present.
1270 */
1271
1272 lock_sock(sk);
1273
1274 spin_lock(&po->bind_lock);
1275 if (po->running) {
1276 __sock_put(sk);
1277 po->running = 0;
1278 po->num = 0;
1279 spin_unlock(&po->bind_lock);
1280 dev_remove_pack(&po->prot_hook);
1281 spin_lock(&po->bind_lock);
1282 }
1283
1284 po->num = protocol;
1285 po->prot_hook.type = protocol;
1286 po->prot_hook.dev = dev;
1287
1288 po->ifindex = dev ? dev->ifindex : 0;
1289
1290 if (protocol == 0)
1291 goto out_unlock;
1292
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001293 if (!dev || (dev->flags & IFF_UP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294 dev_add_pack(&po->prot_hook);
1295 sock_hold(sk);
1296 po->running = 1;
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001297 } else {
1298 sk->sk_err = ENETDOWN;
1299 if (!sock_flag(sk, SOCK_DEAD))
1300 sk->sk_error_report(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 }
1302
1303out_unlock:
1304 spin_unlock(&po->bind_lock);
1305 release_sock(sk);
1306 return 0;
1307}
1308
1309/*
1310 * Bind a packet socket to a device
1311 */
1312
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001313static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1314 int addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001316 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 char name[15];
1318 struct net_device *dev;
1319 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001320
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 /*
1322 * Check legality
1323 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001324
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001325 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001327 strlcpy(name, uaddr->sa_data, sizeof(name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001329 dev = dev_get_by_name(sock_net(sk), name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330 if (dev) {
1331 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1332 dev_put(dev);
1333 }
1334 return err;
1335}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336
1337static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1338{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001339 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1340 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 struct net_device *dev = NULL;
1342 int err;
1343
1344
1345 /*
1346 * Check legality
1347 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001348
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 if (addr_len < sizeof(struct sockaddr_ll))
1350 return -EINVAL;
1351 if (sll->sll_family != AF_PACKET)
1352 return -EINVAL;
1353
1354 if (sll->sll_ifindex) {
1355 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001356 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 if (dev == NULL)
1358 goto out;
1359 }
1360 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1361 if (dev)
1362 dev_put(dev);
1363
1364out:
1365 return err;
1366}
1367
1368static struct proto packet_proto = {
1369 .name = "PACKET",
1370 .owner = THIS_MODULE,
1371 .obj_size = sizeof(struct packet_sock),
1372};
1373
1374/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001375 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 */
1377
Eric W. Biederman1b8d7ae2007-10-08 23:24:22 -07001378static int packet_create(struct net *net, struct socket *sock, int protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379{
1380 struct sock *sk;
1381 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001382 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 int err;
1384
1385 if (!capable(CAP_NET_RAW))
1386 return -EPERM;
David S. Millerbe020972007-05-29 13:16:31 -07001387 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1388 sock->type != SOCK_PACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 return -ESOCKTNOSUPPORT;
1390
1391 sock->state = SS_UNCONNECTED;
1392
1393 err = -ENOBUFS;
Pavel Emelyanov6257ff22007-11-01 00:39:31 -07001394 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 if (sk == NULL)
1396 goto out;
1397
1398 sock->ops = &packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 if (sock->type == SOCK_PACKET)
1400 sock->ops = &packet_ops_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001401
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 sock_init_data(sock, sk);
1403
1404 po = pkt_sk(sk);
1405 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001406 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407
1408 sk->sk_destruct = packet_sock_destruct;
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001409 sk_refcnt_debug_inc(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410
1411 /*
1412 * Attach a protocol block
1413 */
1414
1415 spin_lock_init(&po->bind_lock);
Herbert Xu905db442009-01-30 14:12:06 -08001416 mutex_init(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 po->prot_hook.func = packet_rcv;
David S. Millerbe020972007-05-29 13:16:31 -07001418
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 if (sock->type == SOCK_PACKET)
1420 po->prot_hook.func = packet_rcv_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001421
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 po->prot_hook.af_packet_priv = sk;
1423
Al Viro0e11c912006-11-08 00:26:29 -08001424 if (proto) {
1425 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 dev_add_pack(&po->prot_hook);
1427 sock_hold(sk);
1428 po->running = 1;
1429 }
1430
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001431 write_lock_bh(&net->packet.sklist_lock);
1432 sk_add_node(sk, &net->packet.sklist);
Eric Dumazet36804532008-11-19 14:25:35 -08001433 sock_prot_inuse_add(net, &packet_proto, 1);
Eric Dumazet920de802008-11-24 00:09:29 -08001434 write_unlock_bh(&net->packet.sklist_lock);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001435 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436out:
1437 return err;
1438}
1439
1440/*
1441 * Pull a packet from our receive queue and hand it to the user.
1442 * If necessary we block.
1443 */
1444
1445static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1446 struct msghdr *msg, size_t len, int flags)
1447{
1448 struct sock *sk = sock->sk;
1449 struct sk_buff *skb;
1450 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001451 struct sockaddr_ll *sll;
Neil Horman97775002009-10-02 06:56:41 +00001452 __u32 gap;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453
1454 err = -EINVAL;
1455 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1456 goto out;
1457
1458#if 0
1459 /* What error should we return now? EUNATTACH? */
1460 if (pkt_sk(sk)->ifindex < 0)
1461 return -ENODEV;
1462#endif
1463
1464 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 * Call the generic datagram receiver. This handles all sorts
1466 * of horrible races and re-entrancy so we can forget about it
1467 * in the protocol layers.
1468 *
1469 * Now it will return ENETDOWN, if device have just gone down,
1470 * but then it will block.
1471 */
1472
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001473 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474
1475 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001476 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 * handles the blocking we don't see and worry about blocking
1478 * retries.
1479 */
1480
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001481 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 goto out;
1483
1484 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001485 * If the address length field is there to be filled in, we fill
1486 * it in now.
1487 */
1488
Herbert Xuffbc6112007-02-04 23:33:10 -08001489 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001490 if (sock->type == SOCK_PACKET)
1491 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1492 else
1493 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1494
1495 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 * You lose any data beyond the buffer you gave. If it worries a
1497 * user program they can ask the device for its MTU anyway.
1498 */
1499
1500 copied = skb->len;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001501 if (copied > len) {
1502 copied = len;
1503 msg->msg_flags |= MSG_TRUNC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 }
1505
1506 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1507 if (err)
1508 goto out_free;
1509
1510 sock_recv_timestamp(msg, sk, skb);
1511
1512 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001513 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1514 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515
Herbert Xu8dc41942007-02-04 23:31:32 -08001516 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001517 struct tpacket_auxdata aux;
1518
1519 aux.tp_status = TP_STATUS_USER;
1520 if (skb->ip_summed == CHECKSUM_PARTIAL)
1521 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1522 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1523 aux.tp_snaplen = skb->len;
1524 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001525 aux.tp_net = skb_network_offset(skb);
Patrick McHardy393e52e2008-07-14 22:50:39 -07001526 aux.tp_vlan_tci = skb->vlan_tci;
Herbert Xuffbc6112007-02-04 23:33:10 -08001527
1528 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001529 }
1530
Neil Horman97775002009-10-02 06:56:41 +00001531 gap = check_packet_gap(skb);
1532 if (gap)
1533 put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap);
1534
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 /*
1536 * Free or return the buffer as appropriate. Again this
1537 * hides all the races and re-entrancy issues from us.
1538 */
1539 err = (flags&MSG_TRUNC) ? skb->len : copied;
1540
1541out_free:
1542 skb_free_datagram(sk, skb);
1543out:
1544 return err;
1545}
1546
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1548 int *uaddr_len, int peer)
1549{
1550 struct net_device *dev;
1551 struct sock *sk = sock->sk;
1552
1553 if (peer)
1554 return -EOPNOTSUPP;
1555
1556 uaddr->sa_family = AF_PACKET;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001557 dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 if (dev) {
1559 strlcpy(uaddr->sa_data, dev->name, 15);
1560 dev_put(dev);
1561 } else
1562 memset(uaddr->sa_data, 0, 14);
1563 *uaddr_len = sizeof(*uaddr);
1564
1565 return 0;
1566}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567
1568static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1569 int *uaddr_len, int peer)
1570{
1571 struct net_device *dev;
1572 struct sock *sk = sock->sk;
1573 struct packet_sock *po = pkt_sk(sk);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001574 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575
1576 if (peer)
1577 return -EOPNOTSUPP;
1578
1579 sll->sll_family = AF_PACKET;
1580 sll->sll_ifindex = po->ifindex;
1581 sll->sll_protocol = po->num;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001582 dev = dev_get_by_index(sock_net(sk), po->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 if (dev) {
1584 sll->sll_hatype = dev->type;
1585 sll->sll_halen = dev->addr_len;
1586 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1587 dev_put(dev);
1588 } else {
1589 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1590 sll->sll_halen = 0;
1591 }
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001592 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593
1594 return 0;
1595}
1596
Wang Chen2aeb0b82008-07-14 20:49:46 -07001597static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1598 int what)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599{
1600 switch (i->type) {
1601 case PACKET_MR_MULTICAST:
1602 if (what > 0)
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001603 return dev_mc_add(dev, i->addr, i->alen, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 else
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001605 return dev_mc_delete(dev, i->addr, i->alen, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 break;
1607 case PACKET_MR_PROMISC:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001608 return dev_set_promiscuity(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 break;
1610 case PACKET_MR_ALLMULTI:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001611 return dev_set_allmulti(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 break;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001613 case PACKET_MR_UNICAST:
1614 if (what > 0)
Jiri Pirkoccffad252009-05-22 23:22:17 +00001615 return dev_unicast_add(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001616 else
Jiri Pirkoccffad252009-05-22 23:22:17 +00001617 return dev_unicast_delete(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001618 break;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001619 default:
1620 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 }
Wang Chen2aeb0b82008-07-14 20:49:46 -07001622 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623}
1624
1625static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1626{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001627 for ( ; i; i = i->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 if (i->ifindex == dev->ifindex)
1629 packet_dev_mc(dev, i, what);
1630 }
1631}
1632
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001633static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634{
1635 struct packet_sock *po = pkt_sk(sk);
1636 struct packet_mclist *ml, *i;
1637 struct net_device *dev;
1638 int err;
1639
1640 rtnl_lock();
1641
1642 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001643 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 if (!dev)
1645 goto done;
1646
1647 err = -EINVAL;
1648 if (mreq->mr_alen > dev->addr_len)
1649 goto done;
1650
1651 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001652 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 if (i == NULL)
1654 goto done;
1655
1656 err = 0;
1657 for (ml = po->mclist; ml; ml = ml->next) {
1658 if (ml->ifindex == mreq->mr_ifindex &&
1659 ml->type == mreq->mr_type &&
1660 ml->alen == mreq->mr_alen &&
1661 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1662 ml->count++;
1663 /* Free the new element ... */
1664 kfree(i);
1665 goto done;
1666 }
1667 }
1668
1669 i->type = mreq->mr_type;
1670 i->ifindex = mreq->mr_ifindex;
1671 i->alen = mreq->mr_alen;
1672 memcpy(i->addr, mreq->mr_address, i->alen);
1673 i->count = 1;
1674 i->next = po->mclist;
1675 po->mclist = i;
Wang Chen2aeb0b82008-07-14 20:49:46 -07001676 err = packet_dev_mc(dev, i, 1);
1677 if (err) {
1678 po->mclist = i->next;
1679 kfree(i);
1680 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681
1682done:
1683 rtnl_unlock();
1684 return err;
1685}
1686
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001687static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688{
1689 struct packet_mclist *ml, **mlp;
1690
1691 rtnl_lock();
1692
1693 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1694 if (ml->ifindex == mreq->mr_ifindex &&
1695 ml->type == mreq->mr_type &&
1696 ml->alen == mreq->mr_alen &&
1697 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1698 if (--ml->count == 0) {
1699 struct net_device *dev;
1700 *mlp = ml->next;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001701 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 if (dev) {
1703 packet_dev_mc(dev, ml, -1);
1704 dev_put(dev);
1705 }
1706 kfree(ml);
1707 }
1708 rtnl_unlock();
1709 return 0;
1710 }
1711 }
1712 rtnl_unlock();
1713 return -EADDRNOTAVAIL;
1714}
1715
1716static void packet_flush_mclist(struct sock *sk)
1717{
1718 struct packet_sock *po = pkt_sk(sk);
1719 struct packet_mclist *ml;
1720
1721 if (!po->mclist)
1722 return;
1723
1724 rtnl_lock();
1725 while ((ml = po->mclist) != NULL) {
1726 struct net_device *dev;
1727
1728 po->mclist = ml->next;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001729 dev = dev_get_by_index(sock_net(sk), ml->ifindex);
1730 if (dev != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 packet_dev_mc(dev, ml, -1);
1732 dev_put(dev);
1733 }
1734 kfree(ml);
1735 }
1736 rtnl_unlock();
1737}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738
1739static int
David S. Millerb7058842009-09-30 16:12:20 -07001740packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741{
1742 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001743 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 int ret;
1745
1746 if (level != SOL_PACKET)
1747 return -ENOPROTOOPT;
1748
Johann Baudy69e3c752009-05-18 22:11:22 -07001749 switch (optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001750 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 case PACKET_DROP_MEMBERSHIP:
1752 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001753 struct packet_mreq_max mreq;
1754 int len = optlen;
1755 memset(&mreq, 0, sizeof(mreq));
1756 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001758 if (len > sizeof(mreq))
1759 len = sizeof(mreq);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001760 if (copy_from_user(&mreq, optval, len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001762 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1763 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 if (optname == PACKET_ADD_MEMBERSHIP)
1765 ret = packet_mc_add(sk, &mreq);
1766 else
1767 ret = packet_mc_drop(sk, &mreq);
1768 return ret;
1769 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001770
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771#ifdef CONFIG_PACKET_MMAP
1772 case PACKET_RX_RING:
Johann Baudy69e3c752009-05-18 22:11:22 -07001773 case PACKET_TX_RING:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774 {
1775 struct tpacket_req req;
1776
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001777 if (optlen < sizeof(req))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001779 if (copy_from_user(&req, optval, sizeof(req)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 return -EFAULT;
Johann Baudy69e3c752009-05-18 22:11:22 -07001781 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 }
1783 case PACKET_COPY_THRESH:
1784 {
1785 int val;
1786
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001787 if (optlen != sizeof(val))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001789 if (copy_from_user(&val, optval, sizeof(val)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 return -EFAULT;
1791
1792 pkt_sk(sk)->copy_thresh = val;
1793 return 0;
1794 }
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001795 case PACKET_VERSION:
1796 {
1797 int val;
1798
1799 if (optlen != sizeof(val))
1800 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001801 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001802 return -EBUSY;
1803 if (copy_from_user(&val, optval, sizeof(val)))
1804 return -EFAULT;
1805 switch (val) {
1806 case TPACKET_V1:
1807 case TPACKET_V2:
1808 po->tp_version = val;
1809 return 0;
1810 default:
1811 return -EINVAL;
1812 }
1813 }
Patrick McHardy8913336a2008-07-18 18:05:19 -07001814 case PACKET_RESERVE:
1815 {
1816 unsigned int val;
1817
1818 if (optlen != sizeof(val))
1819 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001820 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardy8913336a2008-07-18 18:05:19 -07001821 return -EBUSY;
1822 if (copy_from_user(&val, optval, sizeof(val)))
1823 return -EFAULT;
1824 po->tp_reserve = val;
1825 return 0;
1826 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001827 case PACKET_LOSS:
1828 {
1829 unsigned int val;
1830
1831 if (optlen != sizeof(val))
1832 return -EINVAL;
1833 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1834 return -EBUSY;
1835 if (copy_from_user(&val, optval, sizeof(val)))
1836 return -EFAULT;
1837 po->tp_loss = !!val;
1838 return 0;
1839 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840#endif
Herbert Xu8dc41942007-02-04 23:31:32 -08001841 case PACKET_AUXDATA:
1842 {
1843 int val;
1844
1845 if (optlen < sizeof(val))
1846 return -EINVAL;
1847 if (copy_from_user(&val, optval, sizeof(val)))
1848 return -EFAULT;
1849
1850 po->auxdata = !!val;
1851 return 0;
1852 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001853 case PACKET_ORIGDEV:
1854 {
1855 int val;
1856
1857 if (optlen < sizeof(val))
1858 return -EINVAL;
1859 if (copy_from_user(&val, optval, sizeof(val)))
1860 return -EFAULT;
1861
1862 po->origdev = !!val;
1863 return 0;
1864 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 default:
1866 return -ENOPROTOOPT;
1867 }
1868}
1869
1870static int packet_getsockopt(struct socket *sock, int level, int optname,
1871 char __user *optval, int __user *optlen)
1872{
1873 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08001874 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 struct sock *sk = sock->sk;
1876 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08001877 void *data;
1878 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879
1880 if (level != SOL_PACKET)
1881 return -ENOPROTOOPT;
1882
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001883 if (get_user(len, optlen))
1884 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
1886 if (len < 0)
1887 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001888
Johann Baudy69e3c752009-05-18 22:11:22 -07001889 switch (optname) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001891 if (len > sizeof(struct tpacket_stats))
1892 len = sizeof(struct tpacket_stats);
1893 spin_lock_bh(&sk->sk_receive_queue.lock);
1894 st = po->stats;
1895 memset(&po->stats, 0, sizeof(st));
1896 spin_unlock_bh(&sk->sk_receive_queue.lock);
1897 st.tp_packets += st.tp_drops;
1898
Herbert Xu8dc41942007-02-04 23:31:32 -08001899 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08001901 case PACKET_AUXDATA:
1902 if (len > sizeof(int))
1903 len = sizeof(int);
1904 val = po->auxdata;
1905
1906 data = &val;
1907 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001908 case PACKET_ORIGDEV:
1909 if (len > sizeof(int))
1910 len = sizeof(int);
1911 val = po->origdev;
1912
1913 data = &val;
1914 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001915#ifdef CONFIG_PACKET_MMAP
1916 case PACKET_VERSION:
1917 if (len > sizeof(int))
1918 len = sizeof(int);
1919 val = po->tp_version;
1920 data = &val;
1921 break;
1922 case PACKET_HDRLEN:
1923 if (len > sizeof(int))
1924 len = sizeof(int);
1925 if (copy_from_user(&val, optval, len))
1926 return -EFAULT;
1927 switch (val) {
1928 case TPACKET_V1:
1929 val = sizeof(struct tpacket_hdr);
1930 break;
1931 case TPACKET_V2:
1932 val = sizeof(struct tpacket2_hdr);
1933 break;
1934 default:
1935 return -EINVAL;
1936 }
1937 data = &val;
1938 break;
Patrick McHardy8913336a2008-07-18 18:05:19 -07001939 case PACKET_RESERVE:
1940 if (len > sizeof(unsigned int))
1941 len = sizeof(unsigned int);
1942 val = po->tp_reserve;
1943 data = &val;
1944 break;
Johann Baudy69e3c752009-05-18 22:11:22 -07001945 case PACKET_LOSS:
1946 if (len > sizeof(unsigned int))
1947 len = sizeof(unsigned int);
1948 val = po->tp_loss;
1949 data = &val;
1950 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001951#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952 default:
1953 return -ENOPROTOOPT;
1954 }
1955
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001956 if (put_user(len, optlen))
1957 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08001958 if (copy_to_user(optval, data, len))
1959 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001960 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961}
1962
1963
1964static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1965{
1966 struct sock *sk;
1967 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08001968 struct net_device *dev = data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001969 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001971 read_lock(&net->packet.sklist_lock);
1972 sk_for_each(sk, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 struct packet_sock *po = pkt_sk(sk);
1974
1975 switch (msg) {
1976 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977 if (po->mclist)
1978 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07001979 /* fallthrough */
1980
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 case NETDEV_DOWN:
1982 if (dev->ifindex == po->ifindex) {
1983 spin_lock(&po->bind_lock);
1984 if (po->running) {
1985 __dev_remove_pack(&po->prot_hook);
1986 __sock_put(sk);
1987 po->running = 0;
1988 sk->sk_err = ENETDOWN;
1989 if (!sock_flag(sk, SOCK_DEAD))
1990 sk->sk_error_report(sk);
1991 }
1992 if (msg == NETDEV_UNREGISTER) {
1993 po->ifindex = -1;
1994 po->prot_hook.dev = NULL;
1995 }
1996 spin_unlock(&po->bind_lock);
1997 }
1998 break;
1999 case NETDEV_UP:
2000 spin_lock(&po->bind_lock);
2001 if (dev->ifindex == po->ifindex && po->num &&
2002 !po->running) {
2003 dev_add_pack(&po->prot_hook);
2004 sock_hold(sk);
2005 po->running = 1;
2006 }
2007 spin_unlock(&po->bind_lock);
2008 break;
2009 }
2010 }
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002011 read_unlock(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012 return NOTIFY_DONE;
2013}
2014
2015
2016static int packet_ioctl(struct socket *sock, unsigned int cmd,
2017 unsigned long arg)
2018{
2019 struct sock *sk = sock->sk;
2020
Johann Baudy69e3c752009-05-18 22:11:22 -07002021 switch (cmd) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002022 case SIOCOUTQ:
2023 {
2024 int amount = sk_wmem_alloc_get(sk);
Eric Dumazet31e6d362009-06-17 19:05:41 -07002025
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002026 return put_user(amount, (int __user *)arg);
2027 }
2028 case SIOCINQ:
2029 {
2030 struct sk_buff *skb;
2031 int amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002033 spin_lock_bh(&sk->sk_receive_queue.lock);
2034 skb = skb_peek(&sk->sk_receive_queue);
2035 if (skb)
2036 amount = skb->len;
2037 spin_unlock_bh(&sk->sk_receive_queue.lock);
2038 return put_user(amount, (int __user *)arg);
2039 }
2040 case SIOCGSTAMP:
2041 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2042 case SIOCGSTAMPNS:
2043 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002044
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045#ifdef CONFIG_INET
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002046 case SIOCADDRT:
2047 case SIOCDELRT:
2048 case SIOCDARP:
2049 case SIOCGARP:
2050 case SIOCSARP:
2051 case SIOCGIFADDR:
2052 case SIOCSIFADDR:
2053 case SIOCGIFBRDADDR:
2054 case SIOCSIFBRDADDR:
2055 case SIOCGIFNETMASK:
2056 case SIOCSIFNETMASK:
2057 case SIOCGIFDSTADDR:
2058 case SIOCSIFDSTADDR:
2059 case SIOCSIFFLAGS:
2060 if (!net_eq(sock_net(sk), &init_net))
2061 return -ENOIOCTLCMD;
2062 return inet_dgram_ops.ioctl(sock, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063#endif
2064
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002065 default:
2066 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067 }
2068 return 0;
2069}
2070
2071#ifndef CONFIG_PACKET_MMAP
2072#define packet_mmap sock_no_mmap
2073#define packet_poll datagram_poll
2074#else
2075
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002076static unsigned int packet_poll(struct file *file, struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077 poll_table *wait)
2078{
2079 struct sock *sk = sock->sk;
2080 struct packet_sock *po = pkt_sk(sk);
2081 unsigned int mask = datagram_poll(file, sock, wait);
2082
2083 spin_lock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002084 if (po->rx_ring.pg_vec) {
2085 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002086 mask |= POLLIN | POLLRDNORM;
2087 }
2088 spin_unlock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002089 spin_lock_bh(&sk->sk_write_queue.lock);
2090 if (po->tx_ring.pg_vec) {
2091 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2092 mask |= POLLOUT | POLLWRNORM;
2093 }
2094 spin_unlock_bh(&sk->sk_write_queue.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 return mask;
2096}
2097
2098
2099/* Dirty? Well, I still did not learn better way to account
2100 * for user mmaps.
2101 */
2102
2103static void packet_mm_open(struct vm_area_struct *vma)
2104{
2105 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002106 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002108
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109 if (sk)
2110 atomic_inc(&pkt_sk(sk)->mapped);
2111}
2112
2113static void packet_mm_close(struct vm_area_struct *vma)
2114{
2115 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002116 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002118
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 if (sk)
2120 atomic_dec(&pkt_sk(sk)->mapped);
2121}
2122
Alexey Dobriyanf0f37e22009-09-27 22:29:37 +04002123static const struct vm_operations_struct packet_mmap_ops = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002124 .open = packet_mm_open,
2125 .close = packet_mm_close,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126};
2127
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002128static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129{
2130 int i;
2131
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002132 for (i = 0; i < len; i++) {
2133 if (likely(pg_vec[i]))
2134 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 }
2136 kfree(pg_vec);
2137}
2138
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002139static inline char *alloc_one_pg_vec_page(unsigned long order)
2140{
Eric Dumazet719bfea2009-04-15 03:39:52 -07002141 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2142
2143 return (char *) __get_free_pages(gfp_flags, order);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002144}
2145
2146static char **alloc_pg_vec(struct tpacket_req *req, int order)
2147{
2148 unsigned int block_nr = req->tp_block_nr;
2149 char **pg_vec;
2150 int i;
2151
2152 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2153 if (unlikely(!pg_vec))
2154 goto out;
2155
2156 for (i = 0; i < block_nr; i++) {
2157 pg_vec[i] = alloc_one_pg_vec_page(order);
2158 if (unlikely(!pg_vec[i]))
2159 goto out_free_pgvec;
2160 }
2161
2162out:
2163 return pg_vec;
2164
2165out_free_pgvec:
2166 free_pg_vec(pg_vec, order, block_nr);
2167 pg_vec = NULL;
2168 goto out;
2169}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170
Johann Baudy69e3c752009-05-18 22:11:22 -07002171static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2172 int closing, int tx_ring)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173{
2174 char **pg_vec = NULL;
2175 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08002176 int was_running, order = 0;
Johann Baudy69e3c752009-05-18 22:11:22 -07002177 struct packet_ring_buffer *rb;
2178 struct sk_buff_head *rb_queue;
Al Viro0e11c912006-11-08 00:26:29 -08002179 __be16 num;
Johann Baudy69e3c752009-05-18 22:11:22 -07002180 int err;
2181
2182 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2183 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2184
2185 err = -EBUSY;
2186 if (!closing) {
2187 if (atomic_read(&po->mapped))
2188 goto out;
2189 if (atomic_read(&rb->pending))
2190 goto out;
2191 }
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002192
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 if (req->tp_block_nr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 /* Sanity tests and some calculations */
Johann Baudy69e3c752009-05-18 22:11:22 -07002195 err = -EBUSY;
2196 if (unlikely(rb->pg_vec))
2197 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002199 switch (po->tp_version) {
2200 case TPACKET_V1:
2201 po->tp_hdrlen = TPACKET_HDRLEN;
2202 break;
2203 case TPACKET_V2:
2204 po->tp_hdrlen = TPACKET2_HDRLEN;
2205 break;
2206 }
2207
Johann Baudy69e3c752009-05-18 22:11:22 -07002208 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002209 if (unlikely((int)req->tp_block_size <= 0))
Johann Baudy69e3c752009-05-18 22:11:22 -07002210 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002211 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002212 goto out;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002213 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
Johann Baudy69e3c752009-05-18 22:11:22 -07002214 po->tp_reserve))
2215 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002216 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002217 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218
Johann Baudy69e3c752009-05-18 22:11:22 -07002219 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2220 if (unlikely(rb->frames_per_block <= 0))
2221 goto out;
2222 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2223 req->tp_frame_nr))
2224 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225
2226 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002227 order = get_order(req->tp_block_size);
2228 pg_vec = alloc_pg_vec(req, order);
2229 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002231 }
2232 /* Done */
2233 else {
2234 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002235 if (unlikely(req->tp_frame_nr))
Johann Baudy69e3c752009-05-18 22:11:22 -07002236 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237 }
2238
2239 lock_sock(sk);
2240
2241 /* Detach socket from network */
2242 spin_lock(&po->bind_lock);
2243 was_running = po->running;
2244 num = po->num;
2245 if (was_running) {
2246 __dev_remove_pack(&po->prot_hook);
2247 po->num = 0;
2248 po->running = 0;
2249 __sock_put(sk);
2250 }
2251 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002252
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253 synchronize_net();
2254
2255 err = -EBUSY;
Herbert Xu905db442009-01-30 14:12:06 -08002256 mutex_lock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 if (closing || atomic_read(&po->mapped) == 0) {
2258 err = 0;
2259#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
Johann Baudy69e3c752009-05-18 22:11:22 -07002260 spin_lock_bh(&rb_queue->lock);
2261 pg_vec = XC(rb->pg_vec, pg_vec);
2262 rb->frame_max = (req->tp_frame_nr - 1);
2263 rb->head = 0;
2264 rb->frame_size = req->tp_frame_size;
2265 spin_unlock_bh(&rb_queue->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266
Johann Baudy69e3c752009-05-18 22:11:22 -07002267 order = XC(rb->pg_vec_order, order);
2268 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269
Johann Baudy69e3c752009-05-18 22:11:22 -07002270 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2271 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2272 tpacket_rcv : packet_rcv;
2273 skb_queue_purge(rb_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274#undef XC
2275 if (atomic_read(&po->mapped))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002276 pr_err("packet_mmap: vma is busy: %d\n",
2277 atomic_read(&po->mapped));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 }
Herbert Xu905db442009-01-30 14:12:06 -08002279 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280
2281 spin_lock(&po->bind_lock);
2282 if (was_running && !po->running) {
2283 sock_hold(sk);
2284 po->running = 1;
2285 po->num = num;
2286 dev_add_pack(&po->prot_hook);
2287 }
2288 spin_unlock(&po->bind_lock);
2289
2290 release_sock(sk);
2291
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 if (pg_vec)
2293 free_pg_vec(pg_vec, order, req->tp_block_nr);
2294out:
2295 return err;
2296}
2297
Johann Baudy69e3c752009-05-18 22:11:22 -07002298static int packet_mmap(struct file *file, struct socket *sock,
2299 struct vm_area_struct *vma)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300{
2301 struct sock *sk = sock->sk;
2302 struct packet_sock *po = pkt_sk(sk);
Johann Baudy69e3c752009-05-18 22:11:22 -07002303 unsigned long size, expected_size;
2304 struct packet_ring_buffer *rb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 unsigned long start;
2306 int err = -EINVAL;
2307 int i;
2308
2309 if (vma->vm_pgoff)
2310 return -EINVAL;
2311
Herbert Xu905db442009-01-30 14:12:06 -08002312 mutex_lock(&po->pg_vec_lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002313
2314 expected_size = 0;
2315 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2316 if (rb->pg_vec) {
2317 expected_size += rb->pg_vec_len
2318 * rb->pg_vec_pages
2319 * PAGE_SIZE;
2320 }
2321 }
2322
2323 if (expected_size == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002325
2326 size = vma->vm_end - vma->vm_start;
2327 if (size != expected_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 goto out;
2329
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 start = vma->vm_start;
Johann Baudy69e3c752009-05-18 22:11:22 -07002331 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2332 if (rb->pg_vec == NULL)
2333 continue;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002334
Johann Baudy69e3c752009-05-18 22:11:22 -07002335 for (i = 0; i < rb->pg_vec_len; i++) {
2336 struct page *page = virt_to_page(rb->pg_vec[i]);
2337 int pg_num;
2338
2339 for (pg_num = 0; pg_num < rb->pg_vec_pages;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002340 pg_num++, page++) {
Johann Baudy69e3c752009-05-18 22:11:22 -07002341 err = vm_insert_page(vma, start, page);
2342 if (unlikely(err))
2343 goto out;
2344 start += PAGE_SIZE;
2345 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002346 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 }
Johann Baudy69e3c752009-05-18 22:11:22 -07002348
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002349 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350 vma->vm_ops = &packet_mmap_ops;
2351 err = 0;
2352
2353out:
Herbert Xu905db442009-01-30 14:12:06 -08002354 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 return err;
2356}
2357#endif
2358
2359
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002360static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 .family = PF_PACKET,
2362 .owner = THIS_MODULE,
2363 .release = packet_release,
2364 .bind = packet_bind_spkt,
2365 .connect = sock_no_connect,
2366 .socketpair = sock_no_socketpair,
2367 .accept = sock_no_accept,
2368 .getname = packet_getname_spkt,
2369 .poll = datagram_poll,
2370 .ioctl = packet_ioctl,
2371 .listen = sock_no_listen,
2372 .shutdown = sock_no_shutdown,
2373 .setsockopt = sock_no_setsockopt,
2374 .getsockopt = sock_no_getsockopt,
2375 .sendmsg = packet_sendmsg_spkt,
2376 .recvmsg = packet_recvmsg,
2377 .mmap = sock_no_mmap,
2378 .sendpage = sock_no_sendpage,
2379};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002381static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 .family = PF_PACKET,
2383 .owner = THIS_MODULE,
2384 .release = packet_release,
2385 .bind = packet_bind,
2386 .connect = sock_no_connect,
2387 .socketpair = sock_no_socketpair,
2388 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002389 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390 .poll = packet_poll,
2391 .ioctl = packet_ioctl,
2392 .listen = sock_no_listen,
2393 .shutdown = sock_no_shutdown,
2394 .setsockopt = packet_setsockopt,
2395 .getsockopt = packet_getsockopt,
2396 .sendmsg = packet_sendmsg,
2397 .recvmsg = packet_recvmsg,
2398 .mmap = packet_mmap,
2399 .sendpage = sock_no_sendpage,
2400};
2401
2402static struct net_proto_family packet_family_ops = {
2403 .family = PF_PACKET,
2404 .create = packet_create,
2405 .owner = THIS_MODULE,
2406};
2407
2408static struct notifier_block packet_netdev_notifier = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002409 .notifier_call = packet_notifier,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410};
2411
2412#ifdef CONFIG_PROC_FS
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002413static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414{
2415 struct sock *s;
2416 struct hlist_node *node;
2417
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002418 sk_for_each(s, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 if (!off--)
2420 return s;
2421 }
2422 return NULL;
2423}
2424
2425static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet40ccbf52008-01-07 22:39:57 -08002426 __acquires(seq_file_net(seq)->packet.sklist_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427{
Denis V. Luneve372c412007-11-19 22:31:54 -08002428 struct net *net = seq_file_net(seq);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002429 read_lock(&net->packet.sklist_lock);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002430 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431}
2432
2433static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2434{
Herbert Xu1bf40952007-12-16 14:04:02 -08002435 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436 ++*pos;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002437 return (v == SEQ_START_TOKEN)
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002438 ? sk_head(&net->packet.sklist)
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002439 : sk_next((struct sock *)v) ;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440}
2441
2442static void packet_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet40ccbf52008-01-07 22:39:57 -08002443 __releases(seq_file_net(seq)->packet.sklist_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444{
Herbert Xu1bf40952007-12-16 14:04:02 -08002445 struct net *net = seq_file_net(seq);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002446 read_unlock(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447}
2448
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002449static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450{
2451 if (v == SEQ_START_TOKEN)
2452 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2453 else {
2454 struct sock *s = v;
2455 const struct packet_sock *po = pkt_sk(s);
2456
2457 seq_printf(seq,
2458 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2459 s,
2460 atomic_read(&s->sk_refcnt),
2461 s->sk_type,
2462 ntohs(po->num),
2463 po->ifindex,
2464 po->running,
2465 atomic_read(&s->sk_rmem_alloc),
2466 sock_i_uid(s),
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002467 sock_i_ino(s));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 }
2469
2470 return 0;
2471}
2472
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002473static const struct seq_operations packet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 .start = packet_seq_start,
2475 .next = packet_seq_next,
2476 .stop = packet_seq_stop,
2477 .show = packet_seq_show,
2478};
2479
2480static int packet_seq_open(struct inode *inode, struct file *file)
2481{
Denis V. Luneve372c412007-11-19 22:31:54 -08002482 return seq_open_net(inode, file, &packet_seq_ops,
2483 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484}
2485
Arjan van de Venda7071d2007-02-12 00:55:36 -08002486static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487 .owner = THIS_MODULE,
2488 .open = packet_seq_open,
2489 .read = seq_read,
2490 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002491 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492};
2493
2494#endif
2495
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002496static int packet_net_init(struct net *net)
2497{
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002498 rwlock_init(&net->packet.sklist_lock);
2499 INIT_HLIST_HEAD(&net->packet.sklist);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002500
2501 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2502 return -ENOMEM;
2503
2504 return 0;
2505}
2506
2507static void packet_net_exit(struct net *net)
2508{
2509 proc_net_remove(net, "packet");
2510}
2511
2512static struct pernet_operations packet_net_ops = {
2513 .init = packet_net_init,
2514 .exit = packet_net_exit,
2515};
2516
2517
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518static void __exit packet_exit(void)
2519{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520 unregister_netdevice_notifier(&packet_netdev_notifier);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002521 unregister_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 sock_unregister(PF_PACKET);
2523 proto_unregister(&packet_proto);
2524}
2525
2526static int __init packet_init(void)
2527{
2528 int rc = proto_register(&packet_proto, 0);
2529
2530 if (rc != 0)
2531 goto out;
2532
2533 sock_register(&packet_family_ops);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002534 register_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535 register_netdevice_notifier(&packet_netdev_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536out:
2537 return rc;
2538}
2539
2540module_init(packet_init);
2541module_exit(packet_exit);
2542MODULE_LICENSE("GPL");
2543MODULE_ALIAS_NETPROTO(PF_PACKET);