blob: 10f7295bcefb1c735452a5706b442e4a19f1024d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090012 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090035 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070037 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090040 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070041 * and packet_mreq.
Johann Baudy69e3c752009-05-18 22:11:22 -070042 * Johann Baudy : Added TX RING.
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090050
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080053#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080061#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <linux/kmod.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020063#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <net/ip.h>
65#include <net/protocol.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/errno.h>
69#include <linux/timer.h>
70#include <asm/system.h>
71#include <asm/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040074#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070075#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
Herbert Xu905db442009-01-30 14:12:06 -080081#include <linux/mutex.h>
Eric Dumazet05423b22009-10-26 18:40:35 -070082#include <linux/if_vlan.h>
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -080083#include <linux/virtio_net.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85#ifdef CONFIG_INET
86#include <net/inet_common.h>
87#endif
88
Linus Torvalds1da177e2005-04-16 15:20:36 -070089/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 Assumptions:
91 - if device has no dev->hard_header routine, it adds and removes ll header
92 inside itself. In this case ll header is invisible outside of device,
93 but higher levels still should reserve dev->hard_header_len.
94 Some devices are enough clever to reallocate skb, when header
95 will not fit to reserved space (tunnel), another ones are silly
96 (PPP).
97 - packet socket receives packets with pulled ll header,
98 so that SOCK_RAW should push it back.
99
100On receive:
101-----------
102
103Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700104 mac_header -> ll header
105 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106
107Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700108 mac_header -> ll header
109 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
111Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700112 mac_header -> UNKNOWN position. It is very likely, that it points to ll
113 header. PPP makes it, that is wrong, because introduce
YOSHIFUJI Hideakidb0c58f2007-07-19 10:44:35 +0900114 assymetry between rx and tx paths.
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700115 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
117Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700118 mac_header -> data. ll header is still not built!
119 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
121Resume
122 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
123
124
125On transmit:
126------------
127
128dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700129 mac_header -> ll header
130 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
132dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700133 mac_header -> data
134 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
136 We should set nh.raw on output to correct posistion,
137 packet classifier depends on it.
138 */
139
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140/* Private packet socket structures. */
141
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000142struct packet_mclist {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 struct packet_mclist *next;
144 int ifindex;
145 int count;
146 unsigned short type;
147 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700148 unsigned char addr[MAX_ADDR_LEN];
149};
150/* identical to struct packet_mreq except it has
151 * a longer address field.
152 */
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000153struct packet_mreq_max {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700154 int mr_ifindex;
155 unsigned short mr_type;
156 unsigned short mr_alen;
157 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158};
David S. Millera2efcfa2007-05-29 13:12:50 -0700159
Johann Baudy69e3c752009-05-18 22:11:22 -0700160static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
161 int closing, int tx_ring);
162
163struct packet_ring_buffer {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000164 char **pg_vec;
Johann Baudy69e3c752009-05-18 22:11:22 -0700165 unsigned int head;
166 unsigned int frames_per_block;
167 unsigned int frame_size;
168 unsigned int frame_max;
169
170 unsigned int pg_vec_order;
171 unsigned int pg_vec_pages;
172 unsigned int pg_vec_len;
173
174 atomic_t pending;
175};
176
177struct packet_sock;
178static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179
180static void packet_flush_mclist(struct sock *sk);
181
182struct packet_sock {
183 /* struct sock has to be the first member of packet_sock */
184 struct sock sk;
185 struct tpacket_stats stats;
Johann Baudy69e3c752009-05-18 22:11:22 -0700186 struct packet_ring_buffer rx_ring;
187 struct packet_ring_buffer tx_ring;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 int copy_thresh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 spinlock_t bind_lock;
Herbert Xu905db442009-01-30 14:12:06 -0800190 struct mutex pg_vec_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800191 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700192 auxdata:1,
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -0800193 origdev:1,
194 has_vnet_hdr:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800196 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 atomic_t mapped;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700199 enum tpacket_versions tp_version;
200 unsigned int tp_hdrlen;
Patrick McHardy8913336a2008-07-18 18:05:19 -0700201 unsigned int tp_reserve;
Johann Baudy69e3c752009-05-18 22:11:22 -0700202 unsigned int tp_loss:1;
Eric Dumazet94b059522009-10-16 04:02:20 +0000203 struct packet_type prot_hook ____cacheline_aligned_in_smp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204};
205
Herbert Xuffbc6112007-02-04 23:33:10 -0800206struct packet_skb_cb {
207 unsigned int origlen;
208 union {
209 struct sockaddr_pkt pkt;
210 struct sockaddr_ll ll;
211 } sa;
212};
213
214#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800215
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700216static void __packet_set_status(struct packet_sock *po, void *frame, int status)
217{
218 union {
219 struct tpacket_hdr *h1;
220 struct tpacket2_hdr *h2;
221 void *raw;
222 } h;
223
224 h.raw = frame;
225 switch (po->tp_version) {
226 case TPACKET_V1:
227 h.h1->tp_status = status;
Johann Baudy69e3c752009-05-18 22:11:22 -0700228 flush_dcache_page(virt_to_page(&h.h1->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700229 break;
230 case TPACKET_V2:
231 h.h2->tp_status = status;
Johann Baudy69e3c752009-05-18 22:11:22 -0700232 flush_dcache_page(virt_to_page(&h.h2->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700233 break;
Johann Baudy69e3c752009-05-18 22:11:22 -0700234 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000235 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700236 BUG();
237 }
238
239 smp_wmb();
240}
241
242static int __packet_get_status(struct packet_sock *po, void *frame)
243{
244 union {
245 struct tpacket_hdr *h1;
246 struct tpacket2_hdr *h2;
247 void *raw;
248 } h;
249
250 smp_rmb();
251
252 h.raw = frame;
253 switch (po->tp_version) {
254 case TPACKET_V1:
255 flush_dcache_page(virt_to_page(&h.h1->tp_status));
256 return h.h1->tp_status;
257 case TPACKET_V2:
258 flush_dcache_page(virt_to_page(&h.h2->tp_status));
259 return h.h2->tp_status;
260 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000261 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700262 BUG();
263 return 0;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700264 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265}
Johann Baudy69e3c752009-05-18 22:11:22 -0700266
267static void *packet_lookup_frame(struct packet_sock *po,
268 struct packet_ring_buffer *rb,
269 unsigned int position,
270 int status)
271{
272 unsigned int pg_vec_pos, frame_offset;
273 union {
274 struct tpacket_hdr *h1;
275 struct tpacket2_hdr *h2;
276 void *raw;
277 } h;
278
279 pg_vec_pos = position / rb->frames_per_block;
280 frame_offset = position % rb->frames_per_block;
281
282 h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
283
284 if (status != __packet_get_status(po, h.raw))
285 return NULL;
286
287 return h.raw;
288}
289
290static inline void *packet_current_frame(struct packet_sock *po,
291 struct packet_ring_buffer *rb,
292 int status)
293{
294 return packet_lookup_frame(po, rb, rb->head, status);
295}
296
297static inline void *packet_previous_frame(struct packet_sock *po,
298 struct packet_ring_buffer *rb,
299 int status)
300{
301 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
302 return packet_lookup_frame(po, rb, previous, status);
303}
304
305static inline void packet_increment_head(struct packet_ring_buffer *buff)
306{
307 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
308}
309
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310static inline struct packet_sock *pkt_sk(struct sock *sk)
311{
312 return (struct packet_sock *)sk;
313}
314
315static void packet_sock_destruct(struct sock *sk)
316{
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700317 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
318 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319
320 if (!sock_flag(sk, SOCK_DEAD)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000321 pr_err("Attempt to release alive packet socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 return;
323 }
324
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800325 sk_refcnt_debug_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326}
327
328
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800329static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800331static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000333static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
334 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335{
336 struct sock *sk;
337 struct sockaddr_pkt *spkt;
338
339 /*
340 * When we registered the protocol we saved the socket in the data
341 * field for just this event.
342 */
343
344 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900345
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 /*
347 * Yank back the headers [hope the device set this
348 * right or kerboom...]
349 *
350 * Incoming packets have ll header pulled,
351 * push it back.
352 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700353 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 * so that this procedure is noop.
355 */
356
357 if (skb->pkt_type == PACKET_LOOPBACK)
358 goto out;
359
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800360 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800361 goto out;
362
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000363 skb = skb_share_check(skb, GFP_ATOMIC);
364 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 goto oom;
366
367 /* drop any routing info */
Eric Dumazetadf30902009-06-02 05:19:30 +0000368 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369
Phil Oester84531c22005-07-12 11:57:52 -0700370 /* drop conntrack reference */
371 nf_reset(skb);
372
Herbert Xuffbc6112007-02-04 23:33:10 -0800373 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700375 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376
377 /*
378 * The SOCK_PACKET socket receives _all_ frames.
379 */
380
381 spkt->spkt_family = dev->type;
382 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
383 spkt->spkt_protocol = skb->protocol;
384
385 /*
386 * Charge the memory to the socket. This is done specifically
387 * to prevent sockets using all the memory up.
388 */
389
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000390 if (sock_queue_rcv_skb(sk, skb) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 return 0;
392
393out:
394 kfree_skb(skb);
395oom:
396 return 0;
397}
398
399
400/*
401 * Output a raw packet to a device layer. This bypasses all the other
402 * protocol layers and you must therefore supply it with a complete frame
403 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900404
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
406 struct msghdr *msg, size_t len)
407{
408 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000409 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000410 struct sk_buff *skb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 struct net_device *dev;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000412 __be16 proto = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900414
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900416 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 */
418
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000419 if (saddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 if (msg->msg_namelen < sizeof(struct sockaddr))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000421 return -EINVAL;
422 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
423 proto = saddr->spkt_protocol;
424 } else
425 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
427 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900428 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 */
430
431 saddr->spkt_device[13] = 0;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000432retry:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100433 rcu_read_lock();
434 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 err = -ENODEV;
436 if (dev == NULL)
437 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900438
David S. Millerd5e76b02007-01-25 19:30:36 -0800439 err = -ENETDOWN;
440 if (!(dev->flags & IFF_UP))
441 goto out_unlock;
442
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 /*
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000444 * You may not queue a frame bigger than the mtu. This is the lowest level
445 * raw protocol and you must do your own fragmentation at this level.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900447
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800449 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 goto out_unlock;
451
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000452 if (!skb) {
453 size_t reserved = LL_RESERVED_SPACE(dev);
454 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000456 rcu_read_unlock();
457 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
458 if (skb == NULL)
459 return -ENOBUFS;
460 /* FIXME: Save some space for broken drivers that write a hard
461 * header at transmission time by themselves. PPP is the notable
462 * one here. This should really be fixed at the driver level.
463 */
464 skb_reserve(skb, reserved);
465 skb_reset_network_header(skb);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900466
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000467 /* Try to align data part correctly */
468 if (hhlen) {
469 skb->data -= hhlen;
470 skb->tail -= hhlen;
471 if (len < hhlen)
472 skb_reset_network_header(skb);
473 }
474 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
475 if (err)
476 goto out_free;
477 goto retry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 }
479
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000480
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 skb->protocol = proto;
482 skb->dev = dev;
483 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000484 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485
486 dev_queue_xmit(skb);
Eric Dumazet654d1f82009-11-02 10:43:32 +0100487 rcu_read_unlock();
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000488 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490out_unlock:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100491 rcu_read_unlock();
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000492out_free:
493 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 return err;
495}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496
David S. Millerdbcb5852007-01-24 15:21:02 -0800497static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
498 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499{
500 struct sk_filter *filter;
501
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700502 rcu_read_lock_bh();
503 filter = rcu_dereference(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800504 if (filter != NULL)
505 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700506 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507
David S. Millerdbcb5852007-01-24 15:21:02 -0800508 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509}
510
511/*
512 This function makes lazy skb cloning in hope that most of packets
513 are discarded by BPF.
514
515 Note tricky part: we DO mangle shared skb! skb->data, skb->len
516 and skb->cb are mangled. It works because (and until) packets
517 falling here are owned by current CPU. Output packets are cloned
518 by dev_queue_xmit_nit(), input packets are processed by net_bh
519 sequencially, so that if we return skb to original state on exit,
520 we will not harm anyone.
521 */
522
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000523static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
524 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525{
526 struct sock *sk;
527 struct sockaddr_ll *sll;
528 struct packet_sock *po;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000529 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800531 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
533 if (skb->pkt_type == PACKET_LOOPBACK)
534 goto drop;
535
536 sk = pt->af_packet_priv;
537 po = pkt_sk(sk);
538
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800539 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800540 goto drop;
541
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 skb->dev = dev;
543
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700544 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 /* The device has an explicit notion of ll header,
546 exported to higher levels.
547
548 Otherwise, the device hides datails of it frame
549 structure, so that corresponding packet head
550 never delivered to user.
551 */
552 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700553 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 else if (skb->pkt_type == PACKET_OUTGOING) {
555 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300556 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 }
558 }
559
560 snaplen = skb->len;
561
David S. Millerdbcb5852007-01-24 15:21:02 -0800562 res = run_filter(skb, sk, snaplen);
563 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700564 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800565 if (snaplen > res)
566 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567
568 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
569 (unsigned)sk->sk_rcvbuf)
570 goto drop_n_acct;
571
572 if (skb_shared(skb)) {
573 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
574 if (nskb == NULL)
575 goto drop_n_acct;
576
577 if (skb_head != skb->data) {
578 skb->data = skb_head;
579 skb->len = skb_len;
580 }
581 kfree_skb(skb);
582 skb = nskb;
583 }
584
Herbert Xuffbc6112007-02-04 23:33:10 -0800585 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
586 sizeof(skb->cb));
587
588 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 sll->sll_family = AF_PACKET;
590 sll->sll_hatype = dev->type;
591 sll->sll_protocol = skb->protocol;
592 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800593 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700594 sll->sll_ifindex = orig_dev->ifindex;
595 else
596 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700598 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
Herbert Xuffbc6112007-02-04 23:33:10 -0800600 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800601
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 if (pskb_trim(skb, snaplen))
603 goto drop_n_acct;
604
605 skb_set_owner_r(skb, sk);
606 skb->dev = NULL;
Eric Dumazetadf30902009-06-02 05:19:30 +0000607 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
Phil Oester84531c22005-07-12 11:57:52 -0700609 /* drop conntrack reference */
610 nf_reset(skb);
611
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 spin_lock(&sk->sk_receive_queue.lock);
613 po->stats.tp_packets++;
Neil Horman3b885782009-10-12 13:26:31 -0700614 skb->dropcount = atomic_read(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 __skb_queue_tail(&sk->sk_receive_queue, skb);
616 spin_unlock(&sk->sk_receive_queue.lock);
617 sk->sk_data_ready(sk, skb->len);
618 return 0;
619
620drop_n_acct:
Neil Horman3b885782009-10-12 13:26:31 -0700621 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622
623drop_n_restore:
624 if (skb_head != skb->data && skb_shared(skb)) {
625 skb->data = skb_head;
626 skb->len = skb_len;
627 }
628drop:
Neil Hormanead2ceb2009-03-11 09:49:55 +0000629 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 return 0;
631}
632
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000633static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
634 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635{
636 struct sock *sk;
637 struct packet_sock *po;
638 struct sockaddr_ll *sll;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700639 union {
640 struct tpacket_hdr *h1;
641 struct tpacket2_hdr *h2;
642 void *raw;
643 } h;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000644 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800646 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700648 unsigned short macoff, netoff, hdrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700650 struct timeval tv;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700651 struct timespec ts;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
653 if (skb->pkt_type == PACKET_LOOPBACK)
654 goto drop;
655
656 sk = pt->af_packet_priv;
657 po = pkt_sk(sk);
658
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800659 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800660 goto drop;
661
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700662 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700664 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 else if (skb->pkt_type == PACKET_OUTGOING) {
666 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300667 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 }
669 }
670
Herbert Xu8dc41942007-02-04 23:31:32 -0800671 if (skb->ip_summed == CHECKSUM_PARTIAL)
672 status |= TP_STATUS_CSUMNOTREADY;
673
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 snaplen = skb->len;
675
David S. Millerdbcb5852007-01-24 15:21:02 -0800676 res = run_filter(skb, sk, snaplen);
677 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700678 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800679 if (snaplen > res)
680 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
682 if (sk->sk_type == SOCK_DGRAM) {
Patrick McHardy8913336a2008-07-18 18:05:19 -0700683 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
684 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300686 unsigned maclen = skb_network_offset(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700687 netoff = TPACKET_ALIGN(po->tp_hdrlen +
Patrick McHardy8913336a2008-07-18 18:05:19 -0700688 (maclen < 16 ? 16 : maclen)) +
689 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 macoff = netoff - maclen;
691 }
692
Johann Baudy69e3c752009-05-18 22:11:22 -0700693 if (macoff + snaplen > po->rx_ring.frame_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 if (po->copy_thresh &&
695 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
696 (unsigned)sk->sk_rcvbuf) {
697 if (skb_shared(skb)) {
698 copy_skb = skb_clone(skb, GFP_ATOMIC);
699 } else {
700 copy_skb = skb_get(skb);
701 skb_head = skb->data;
702 }
703 if (copy_skb)
704 skb_set_owner_r(copy_skb, sk);
705 }
Johann Baudy69e3c752009-05-18 22:11:22 -0700706 snaplen = po->rx_ring.frame_size - macoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 if ((int)snaplen < 0)
708 snaplen = 0;
709 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710
711 spin_lock(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -0700712 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700713 if (!h.raw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 goto ring_is_full;
Johann Baudy69e3c752009-05-18 22:11:22 -0700715 packet_increment_head(&po->rx_ring);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 po->stats.tp_packets++;
717 if (copy_skb) {
718 status |= TP_STATUS_COPY;
719 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
720 }
721 if (!po->stats.tp_drops)
722 status &= ~TP_STATUS_LOSING;
723 spin_unlock(&sk->sk_receive_queue.lock);
724
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700725 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700727 switch (po->tp_version) {
728 case TPACKET_V1:
729 h.h1->tp_len = skb->len;
730 h.h1->tp_snaplen = snaplen;
731 h.h1->tp_mac = macoff;
732 h.h1->tp_net = netoff;
733 if (skb->tstamp.tv64)
734 tv = ktime_to_timeval(skb->tstamp);
735 else
736 do_gettimeofday(&tv);
737 h.h1->tp_sec = tv.tv_sec;
738 h.h1->tp_usec = tv.tv_usec;
739 hdrlen = sizeof(*h.h1);
740 break;
741 case TPACKET_V2:
742 h.h2->tp_len = skb->len;
743 h.h2->tp_snaplen = snaplen;
744 h.h2->tp_mac = macoff;
745 h.h2->tp_net = netoff;
746 if (skb->tstamp.tv64)
747 ts = ktime_to_timespec(skb->tstamp);
748 else
749 getnstimeofday(&ts);
750 h.h2->tp_sec = ts.tv_sec;
751 h.h2->tp_nsec = ts.tv_nsec;
Eric Dumazet05423b22009-10-26 18:40:35 -0700752 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700753 hdrlen = sizeof(*h.h2);
754 break;
755 default:
756 BUG();
757 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700759 sll = h.raw + TPACKET_ALIGN(hdrlen);
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700760 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 sll->sll_family = AF_PACKET;
762 sll->sll_hatype = dev->type;
763 sll->sll_protocol = skb->protocol;
764 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800765 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700766 sll->sll_ifindex = orig_dev->ifindex;
767 else
768 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700770 __packet_set_status(po, h.raw, status);
Ralf Baechlee16aa202006-12-07 00:11:33 -0800771 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 {
773 struct page *p_start, *p_end;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700774 u8 *h_end = h.raw + macoff + snaplen - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700776 p_start = virt_to_page(h.raw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 p_end = virt_to_page(h_end);
778 while (p_start <= p_end) {
779 flush_dcache_page(p_start);
780 p_start++;
781 }
782 }
783
784 sk->sk_data_ready(sk, 0);
785
786drop_n_restore:
787 if (skb_head != skb->data && skb_shared(skb)) {
788 skb->data = skb_head;
789 skb->len = skb_len;
790 }
791drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900792 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 return 0;
794
795ring_is_full:
796 po->stats.tp_drops++;
797 spin_unlock(&sk->sk_receive_queue.lock);
798
799 sk->sk_data_ready(sk, 0);
Wei Yongjunacb5d752009-02-25 00:36:42 +0000800 kfree_skb(copy_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 goto drop_n_restore;
802}
803
Johann Baudy69e3c752009-05-18 22:11:22 -0700804static void tpacket_destruct_skb(struct sk_buff *skb)
805{
806 struct packet_sock *po = pkt_sk(skb->sk);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000807 void *ph;
Johann Baudy69e3c752009-05-18 22:11:22 -0700808
809 BUG_ON(skb == NULL);
810
811 if (likely(po->tx_ring.pg_vec)) {
812 ph = skb_shinfo(skb)->destructor_arg;
813 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
814 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
815 atomic_dec(&po->tx_ring.pending);
816 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
817 }
818
819 sock_wfree(skb);
820}
821
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000822static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
823 void *frame, struct net_device *dev, int size_max,
824 __be16 proto, unsigned char *addr)
Johann Baudy69e3c752009-05-18 22:11:22 -0700825{
826 union {
827 struct tpacket_hdr *h1;
828 struct tpacket2_hdr *h2;
829 void *raw;
830 } ph;
831 int to_write, offset, len, tp_len, nr_frags, len_max;
832 struct socket *sock = po->sk.sk_socket;
833 struct page *page;
834 void *data;
835 int err;
836
837 ph.raw = frame;
838
839 skb->protocol = proto;
840 skb->dev = dev;
841 skb->priority = po->sk.sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000842 skb->mark = po->sk.sk_mark;
Johann Baudy69e3c752009-05-18 22:11:22 -0700843 skb_shinfo(skb)->destructor_arg = ph.raw;
844
845 switch (po->tp_version) {
846 case TPACKET_V2:
847 tp_len = ph.h2->tp_len;
848 break;
849 default:
850 tp_len = ph.h1->tp_len;
851 break;
852 }
853 if (unlikely(tp_len > size_max)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000854 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
Johann Baudy69e3c752009-05-18 22:11:22 -0700855 return -EMSGSIZE;
856 }
857
858 skb_reserve(skb, LL_RESERVED_SPACE(dev));
859 skb_reset_network_header(skb);
860
861 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
862 to_write = tp_len;
863
864 if (sock->type == SOCK_DGRAM) {
865 err = dev_hard_header(skb, dev, ntohs(proto), addr,
866 NULL, tp_len);
867 if (unlikely(err < 0))
868 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000869 } else if (dev->hard_header_len) {
Johann Baudy69e3c752009-05-18 22:11:22 -0700870 /* net device doesn't like empty head */
871 if (unlikely(tp_len <= dev->hard_header_len)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000872 pr_err("packet size is too short (%d < %d)\n",
873 tp_len, dev->hard_header_len);
Johann Baudy69e3c752009-05-18 22:11:22 -0700874 return -EINVAL;
875 }
876
877 skb_push(skb, dev->hard_header_len);
878 err = skb_store_bits(skb, 0, data,
879 dev->hard_header_len);
880 if (unlikely(err))
881 return err;
882
883 data += dev->hard_header_len;
884 to_write -= dev->hard_header_len;
885 }
886
887 err = -EFAULT;
888 page = virt_to_page(data);
889 offset = offset_in_page(data);
890 len_max = PAGE_SIZE - offset;
891 len = ((to_write > len_max) ? len_max : to_write);
892
893 skb->data_len = to_write;
894 skb->len += to_write;
895 skb->truesize += to_write;
896 atomic_add(to_write, &po->sk.sk_wmem_alloc);
897
898 while (likely(to_write)) {
899 nr_frags = skb_shinfo(skb)->nr_frags;
900
901 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000902 pr_err("Packet exceed the number of skb frags(%lu)\n",
903 MAX_SKB_FRAGS);
Johann Baudy69e3c752009-05-18 22:11:22 -0700904 return -EFAULT;
905 }
906
907 flush_dcache_page(page);
908 get_page(page);
909 skb_fill_page_desc(skb,
910 nr_frags,
911 page++, offset, len);
912 to_write -= len;
913 offset = 0;
914 len_max = PAGE_SIZE;
915 len = ((to_write > len_max) ? len_max : to_write);
916 }
917
918 return tp_len;
919}
920
921static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
922{
923 struct socket *sock;
924 struct sk_buff *skb;
925 struct net_device *dev;
926 __be16 proto;
927 int ifindex, err, reserve = 0;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000928 void *ph;
929 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Johann Baudy69e3c752009-05-18 22:11:22 -0700930 int tp_len, size_max;
931 unsigned char *addr;
932 int len_sum = 0;
933 int status = 0;
934
935 sock = po->sk.sk_socket;
936
937 mutex_lock(&po->pg_vec_lock);
938
939 err = -EBUSY;
940 if (saddr == NULL) {
941 ifindex = po->ifindex;
942 proto = po->num;
943 addr = NULL;
944 } else {
945 err = -EINVAL;
946 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
947 goto out;
948 if (msg->msg_namelen < (saddr->sll_halen
949 + offsetof(struct sockaddr_ll,
950 sll_addr)))
951 goto out;
952 ifindex = saddr->sll_ifindex;
953 proto = saddr->sll_protocol;
954 addr = saddr->sll_addr;
955 }
956
957 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
958 err = -ENXIO;
959 if (unlikely(dev == NULL))
960 goto out;
961
962 reserve = dev->hard_header_len;
963
964 err = -ENETDOWN;
965 if (unlikely(!(dev->flags & IFF_UP)))
966 goto out_put;
967
968 size_max = po->tx_ring.frame_size
Gabor Gombasb5dd8842009-10-29 03:19:11 -0700969 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
Johann Baudy69e3c752009-05-18 22:11:22 -0700970
971 if (size_max > dev->mtu + reserve)
972 size_max = dev->mtu + reserve;
973
974 do {
975 ph = packet_current_frame(po, &po->tx_ring,
976 TP_STATUS_SEND_REQUEST);
977
978 if (unlikely(ph == NULL)) {
979 schedule();
980 continue;
981 }
982
983 status = TP_STATUS_SEND_REQUEST;
984 skb = sock_alloc_send_skb(&po->sk,
985 LL_ALLOCATED_SPACE(dev)
986 + sizeof(struct sockaddr_ll),
987 0, &err);
988
989 if (unlikely(skb == NULL))
990 goto out_status;
991
992 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
993 addr);
994
995 if (unlikely(tp_len < 0)) {
996 if (po->tp_loss) {
997 __packet_set_status(po, ph,
998 TP_STATUS_AVAILABLE);
999 packet_increment_head(&po->tx_ring);
1000 kfree_skb(skb);
1001 continue;
1002 } else {
1003 status = TP_STATUS_WRONG_FORMAT;
1004 err = tp_len;
1005 goto out_status;
1006 }
1007 }
1008
1009 skb->destructor = tpacket_destruct_skb;
1010 __packet_set_status(po, ph, TP_STATUS_SENDING);
1011 atomic_inc(&po->tx_ring.pending);
1012
1013 status = TP_STATUS_SEND_REQUEST;
1014 err = dev_queue_xmit(skb);
Jarek Poplawskieb70df12010-01-10 22:04:19 +00001015 if (unlikely(err > 0)) {
1016 err = net_xmit_errno(err);
1017 if (err && __packet_get_status(po, ph) ==
1018 TP_STATUS_AVAILABLE) {
1019 /* skb was destructed already */
1020 skb = NULL;
1021 goto out_status;
1022 }
1023 /*
1024 * skb was dropped but not destructed yet;
1025 * let's treat it like congestion or err < 0
1026 */
1027 err = 0;
1028 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001029 packet_increment_head(&po->tx_ring);
1030 len_sum += tp_len;
Joe Perchesf64f9e72009-11-29 16:55:45 -08001031 } while (likely((ph != NULL) ||
1032 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1033 (atomic_read(&po->tx_ring.pending))))
1034 );
Johann Baudy69e3c752009-05-18 22:11:22 -07001035
1036 err = len_sum;
1037 goto out_put;
1038
Johann Baudy69e3c752009-05-18 22:11:22 -07001039out_status:
1040 __packet_set_status(po, ph, status);
1041 kfree_skb(skb);
1042out_put:
1043 dev_put(dev);
1044out:
1045 mutex_unlock(&po->pg_vec_lock);
1046 return err;
1047}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001049static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1050 size_t reserve, size_t len,
1051 size_t linear, int noblock,
1052 int *err)
1053{
1054 struct sk_buff *skb;
1055
1056 /* Under a page? Don't bother with paged skb. */
1057 if (prepad + len < PAGE_SIZE || !linear)
1058 linear = len;
1059
1060 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1061 err);
1062 if (!skb)
1063 return NULL;
1064
1065 skb_reserve(skb, reserve);
1066 skb_put(skb, linear);
1067 skb->data_len = len - linear;
1068 skb->len += len - linear;
1069
1070 return skb;
1071}
1072
Johann Baudy69e3c752009-05-18 22:11:22 -07001073static int packet_snd(struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074 struct msghdr *msg, size_t len)
1075{
1076 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001077 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 struct sk_buff *skb;
1079 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -08001080 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 unsigned char *addr;
1082 int ifindex, err, reserve = 0;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001083 struct virtio_net_hdr vnet_hdr = { 0 };
1084 int offset = 0;
1085 int vnet_hdr_len;
1086 struct packet_sock *po = pkt_sk(sk);
1087 unsigned short gso_type = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088
1089 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001090 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001092
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 if (saddr == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 ifindex = po->ifindex;
1095 proto = po->num;
1096 addr = NULL;
1097 } else {
1098 err = -EINVAL;
1099 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1100 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001101 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1102 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 ifindex = saddr->sll_ifindex;
1104 proto = saddr->sll_protocol;
1105 addr = saddr->sll_addr;
1106 }
1107
1108
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001109 dev = dev_get_by_index(sock_net(sk), ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 err = -ENXIO;
1111 if (dev == NULL)
1112 goto out_unlock;
1113 if (sock->type == SOCK_RAW)
1114 reserve = dev->hard_header_len;
1115
David S. Millerd5e76b02007-01-25 19:30:36 -08001116 err = -ENETDOWN;
1117 if (!(dev->flags & IFF_UP))
1118 goto out_unlock;
1119
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001120 if (po->has_vnet_hdr) {
1121 vnet_hdr_len = sizeof(vnet_hdr);
1122
1123 err = -EINVAL;
1124 if (len < vnet_hdr_len)
1125 goto out_unlock;
1126
1127 len -= vnet_hdr_len;
1128
1129 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1130 vnet_hdr_len);
1131 if (err < 0)
1132 goto out_unlock;
1133
1134 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1135 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1136 vnet_hdr.hdr_len))
1137 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1138 vnet_hdr.csum_offset + 2;
1139
1140 err = -EINVAL;
1141 if (vnet_hdr.hdr_len > len)
1142 goto out_unlock;
1143
1144 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1145 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1146 case VIRTIO_NET_HDR_GSO_TCPV4:
1147 gso_type = SKB_GSO_TCPV4;
1148 break;
1149 case VIRTIO_NET_HDR_GSO_TCPV6:
1150 gso_type = SKB_GSO_TCPV6;
1151 break;
1152 case VIRTIO_NET_HDR_GSO_UDP:
1153 gso_type = SKB_GSO_UDP;
1154 break;
1155 default:
1156 goto out_unlock;
1157 }
1158
1159 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1160 gso_type |= SKB_GSO_TCP_ECN;
1161
1162 if (vnet_hdr.gso_size == 0)
1163 goto out_unlock;
1164
1165 }
1166 }
1167
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 err = -EMSGSIZE;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001169 if (!gso_type && (len > dev->mtu+reserve))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 goto out_unlock;
1171
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001172 err = -ENOBUFS;
1173 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1174 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1175 msg->msg_flags & MSG_DONTWAIT, &err);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001176 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 goto out_unlock;
1178
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001179 skb_set_network_header(skb, reserve);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001181 err = -EINVAL;
1182 if (sock->type == SOCK_DGRAM &&
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001183 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001184 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
1186 /* Returns -EFAULT on error */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001187 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 if (err)
1189 goto out_free;
1190
1191 skb->protocol = proto;
1192 skb->dev = dev;
1193 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +00001194 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001196 if (po->has_vnet_hdr) {
1197 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1198 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1199 vnet_hdr.csum_offset)) {
1200 err = -EINVAL;
1201 goto out_free;
1202 }
1203 }
1204
1205 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1206 skb_shinfo(skb)->gso_type = gso_type;
1207
1208 /* Header must be checked, and gso_segs computed. */
1209 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1210 skb_shinfo(skb)->gso_segs = 0;
1211
1212 len += vnet_hdr_len;
1213 }
1214
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 /*
1216 * Now send it
1217 */
1218
1219 err = dev_queue_xmit(skb);
1220 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1221 goto out_unlock;
1222
1223 dev_put(dev);
1224
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001225 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226
1227out_free:
1228 kfree_skb(skb);
1229out_unlock:
1230 if (dev)
1231 dev_put(dev);
1232out:
1233 return err;
1234}
1235
Johann Baudy69e3c752009-05-18 22:11:22 -07001236static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1237 struct msghdr *msg, size_t len)
1238{
Johann Baudy69e3c752009-05-18 22:11:22 -07001239 struct sock *sk = sock->sk;
1240 struct packet_sock *po = pkt_sk(sk);
1241 if (po->tx_ring.pg_vec)
1242 return tpacket_snd(po, msg);
1243 else
Johann Baudy69e3c752009-05-18 22:11:22 -07001244 return packet_snd(sock, msg, len);
1245}
1246
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247/*
1248 * Close a PACKET socket. This is fairly simple. We immediately go
1249 * to 'closed' state and remove our protocol entry in the device list.
1250 */
1251
1252static int packet_release(struct socket *sock)
1253{
1254 struct sock *sk = sock->sk;
1255 struct packet_sock *po;
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001256 struct net *net;
Johann Baudy69e3c752009-05-18 22:11:22 -07001257 struct tpacket_req req;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258
1259 if (!sk)
1260 return 0;
1261
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001262 net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 po = pkt_sk(sk);
1264
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001265 write_lock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 sk_del_node_init(sk);
Eric Dumazet920de802008-11-24 00:09:29 -08001267 sock_prot_inuse_add(net, sk->sk_prot, -1);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001268 write_unlock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269
1270 /*
1271 * Unhook packet receive handler.
1272 */
1273
1274 if (po->running) {
1275 /*
1276 * Remove the protocol hook
1277 */
1278 dev_remove_pack(&po->prot_hook);
1279 po->running = 0;
1280 po->num = 0;
1281 __sock_put(sk);
1282 }
1283
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285
Johann Baudy69e3c752009-05-18 22:11:22 -07001286 memset(&req, 0, sizeof(req));
1287
1288 if (po->rx_ring.pg_vec)
1289 packet_set_ring(sk, &req, 1, 0);
1290
1291 if (po->tx_ring.pg_vec)
1292 packet_set_ring(sk, &req, 1, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293
1294 /*
1295 * Now the socket is dead. No more input will appear.
1296 */
1297
1298 sock_orphan(sk);
1299 sock->sk = NULL;
1300
1301 /* Purge queues */
1302
1303 skb_queue_purge(&sk->sk_receive_queue);
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001304 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305
1306 sock_put(sk);
1307 return 0;
1308}
1309
1310/*
1311 * Attach a packet hook.
1312 */
1313
Al Viro0e11c912006-11-08 00:26:29 -08001314static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315{
1316 struct packet_sock *po = pkt_sk(sk);
1317 /*
1318 * Detach an existing hook if present.
1319 */
1320
1321 lock_sock(sk);
1322
1323 spin_lock(&po->bind_lock);
1324 if (po->running) {
1325 __sock_put(sk);
1326 po->running = 0;
1327 po->num = 0;
1328 spin_unlock(&po->bind_lock);
1329 dev_remove_pack(&po->prot_hook);
1330 spin_lock(&po->bind_lock);
1331 }
1332
1333 po->num = protocol;
1334 po->prot_hook.type = protocol;
1335 po->prot_hook.dev = dev;
1336
1337 po->ifindex = dev ? dev->ifindex : 0;
1338
1339 if (protocol == 0)
1340 goto out_unlock;
1341
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001342 if (!dev || (dev->flags & IFF_UP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 dev_add_pack(&po->prot_hook);
1344 sock_hold(sk);
1345 po->running = 1;
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001346 } else {
1347 sk->sk_err = ENETDOWN;
1348 if (!sock_flag(sk, SOCK_DEAD))
1349 sk->sk_error_report(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 }
1351
1352out_unlock:
1353 spin_unlock(&po->bind_lock);
1354 release_sock(sk);
1355 return 0;
1356}
1357
1358/*
1359 * Bind a packet socket to a device
1360 */
1361
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001362static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1363 int addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001365 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 char name[15];
1367 struct net_device *dev;
1368 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001369
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370 /*
1371 * Check legality
1372 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001373
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001374 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001376 strlcpy(name, uaddr->sa_data, sizeof(name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001378 dev = dev_get_by_name(sock_net(sk), name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 if (dev) {
1380 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1381 dev_put(dev);
1382 }
1383 return err;
1384}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385
1386static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1387{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001388 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1389 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 struct net_device *dev = NULL;
1391 int err;
1392
1393
1394 /*
1395 * Check legality
1396 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001397
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 if (addr_len < sizeof(struct sockaddr_ll))
1399 return -EINVAL;
1400 if (sll->sll_family != AF_PACKET)
1401 return -EINVAL;
1402
1403 if (sll->sll_ifindex) {
1404 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001405 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 if (dev == NULL)
1407 goto out;
1408 }
1409 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1410 if (dev)
1411 dev_put(dev);
1412
1413out:
1414 return err;
1415}
1416
1417static struct proto packet_proto = {
1418 .name = "PACKET",
1419 .owner = THIS_MODULE,
1420 .obj_size = sizeof(struct packet_sock),
1421};
1422
1423/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001424 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425 */
1426
Eric Paris3f378b62009-11-05 22:18:14 -08001427static int packet_create(struct net *net, struct socket *sock, int protocol,
1428 int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429{
1430 struct sock *sk;
1431 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001432 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 int err;
1434
1435 if (!capable(CAP_NET_RAW))
1436 return -EPERM;
David S. Millerbe020972007-05-29 13:16:31 -07001437 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1438 sock->type != SOCK_PACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 return -ESOCKTNOSUPPORT;
1440
1441 sock->state = SS_UNCONNECTED;
1442
1443 err = -ENOBUFS;
Pavel Emelyanov6257ff22007-11-01 00:39:31 -07001444 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 if (sk == NULL)
1446 goto out;
1447
1448 sock->ops = &packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 if (sock->type == SOCK_PACKET)
1450 sock->ops = &packet_ops_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001451
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 sock_init_data(sock, sk);
1453
1454 po = pkt_sk(sk);
1455 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001456 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457
1458 sk->sk_destruct = packet_sock_destruct;
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001459 sk_refcnt_debug_inc(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460
1461 /*
1462 * Attach a protocol block
1463 */
1464
1465 spin_lock_init(&po->bind_lock);
Herbert Xu905db442009-01-30 14:12:06 -08001466 mutex_init(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 po->prot_hook.func = packet_rcv;
David S. Millerbe020972007-05-29 13:16:31 -07001468
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 if (sock->type == SOCK_PACKET)
1470 po->prot_hook.func = packet_rcv_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001471
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 po->prot_hook.af_packet_priv = sk;
1473
Al Viro0e11c912006-11-08 00:26:29 -08001474 if (proto) {
1475 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 dev_add_pack(&po->prot_hook);
1477 sock_hold(sk);
1478 po->running = 1;
1479 }
1480
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08001481 write_lock_bh(&net->packet.sklist_lock);
1482 sk_add_node(sk, &net->packet.sklist);
Eric Dumazet36804532008-11-19 14:25:35 -08001483 sock_prot_inuse_add(net, &packet_proto, 1);
Eric Dumazet920de802008-11-24 00:09:29 -08001484 write_unlock_bh(&net->packet.sklist_lock);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001485 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486out:
1487 return err;
1488}
1489
1490/*
1491 * Pull a packet from our receive queue and hand it to the user.
1492 * If necessary we block.
1493 */
1494
1495static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1496 struct msghdr *msg, size_t len, int flags)
1497{
1498 struct sock *sk = sock->sk;
1499 struct sk_buff *skb;
1500 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001501 struct sockaddr_ll *sll;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001502 int vnet_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503
1504 err = -EINVAL;
1505 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1506 goto out;
1507
1508#if 0
1509 /* What error should we return now? EUNATTACH? */
1510 if (pkt_sk(sk)->ifindex < 0)
1511 return -ENODEV;
1512#endif
1513
1514 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 * Call the generic datagram receiver. This handles all sorts
1516 * of horrible races and re-entrancy so we can forget about it
1517 * in the protocol layers.
1518 *
1519 * Now it will return ENETDOWN, if device have just gone down,
1520 * but then it will block.
1521 */
1522
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001523 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524
1525 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001526 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 * handles the blocking we don't see and worry about blocking
1528 * retries.
1529 */
1530
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001531 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 goto out;
1533
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001534 if (pkt_sk(sk)->has_vnet_hdr) {
1535 struct virtio_net_hdr vnet_hdr = { 0 };
1536
1537 err = -EINVAL;
1538 vnet_hdr_len = sizeof(vnet_hdr);
1539 if ((len -= vnet_hdr_len) < 0)
1540 goto out_free;
1541
1542 if (skb_is_gso(skb)) {
1543 struct skb_shared_info *sinfo = skb_shinfo(skb);
1544
1545 /* This is a hint as to how much should be linear. */
1546 vnet_hdr.hdr_len = skb_headlen(skb);
1547 vnet_hdr.gso_size = sinfo->gso_size;
1548 if (sinfo->gso_type & SKB_GSO_TCPV4)
1549 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1550 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1551 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1552 else if (sinfo->gso_type & SKB_GSO_UDP)
1553 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1554 else if (sinfo->gso_type & SKB_GSO_FCOE)
1555 goto out_free;
1556 else
1557 BUG();
1558 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1559 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1560 } else
1561 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1562
1563 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1564 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1565 vnet_hdr.csum_start = skb->csum_start -
1566 skb_headroom(skb);
1567 vnet_hdr.csum_offset = skb->csum_offset;
1568 } /* else everything is zero */
1569
1570 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1571 vnet_hdr_len);
1572 if (err < 0)
1573 goto out_free;
1574 }
1575
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001577 * If the address length field is there to be filled in, we fill
1578 * it in now.
1579 */
1580
Herbert Xuffbc6112007-02-04 23:33:10 -08001581 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001582 if (sock->type == SOCK_PACKET)
1583 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1584 else
1585 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1586
1587 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 * You lose any data beyond the buffer you gave. If it worries a
1589 * user program they can ask the device for its MTU anyway.
1590 */
1591
1592 copied = skb->len;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001593 if (copied > len) {
1594 copied = len;
1595 msg->msg_flags |= MSG_TRUNC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 }
1597
1598 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1599 if (err)
1600 goto out_free;
1601
Neil Horman3b885782009-10-12 13:26:31 -07001602 sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603
1604 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001605 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1606 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607
Herbert Xu8dc41942007-02-04 23:31:32 -08001608 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001609 struct tpacket_auxdata aux;
1610
1611 aux.tp_status = TP_STATUS_USER;
1612 if (skb->ip_summed == CHECKSUM_PARTIAL)
1613 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1614 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1615 aux.tp_snaplen = skb->len;
1616 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001617 aux.tp_net = skb_network_offset(skb);
Eric Dumazet05423b22009-10-26 18:40:35 -07001618 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
Herbert Xuffbc6112007-02-04 23:33:10 -08001619
1620 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001621 }
1622
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 /*
1624 * Free or return the buffer as appropriate. Again this
1625 * hides all the races and re-entrancy issues from us.
1626 */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001627 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628
1629out_free:
1630 skb_free_datagram(sk, skb);
1631out:
1632 return err;
1633}
1634
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1636 int *uaddr_len, int peer)
1637{
1638 struct net_device *dev;
1639 struct sock *sk = sock->sk;
1640
1641 if (peer)
1642 return -EOPNOTSUPP;
1643
1644 uaddr->sa_family = AF_PACKET;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001645 rcu_read_lock();
1646 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1647 if (dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 strlcpy(uaddr->sa_data, dev->name, 15);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001649 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 memset(uaddr->sa_data, 0, 14);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001651 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 *uaddr_len = sizeof(*uaddr);
1653
1654 return 0;
1655}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656
1657static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1658 int *uaddr_len, int peer)
1659{
1660 struct net_device *dev;
1661 struct sock *sk = sock->sk;
1662 struct packet_sock *po = pkt_sk(sk);
Cyrill Gorcunov13cfa972009-11-08 05:51:19 +00001663 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664
1665 if (peer)
1666 return -EOPNOTSUPP;
1667
1668 sll->sll_family = AF_PACKET;
1669 sll->sll_ifindex = po->ifindex;
1670 sll->sll_protocol = po->num;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001671 rcu_read_lock();
1672 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 if (dev) {
1674 sll->sll_hatype = dev->type;
1675 sll->sll_halen = dev->addr_len;
1676 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 } else {
1678 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1679 sll->sll_halen = 0;
1680 }
Eric Dumazet654d1f82009-11-02 10:43:32 +01001681 rcu_read_unlock();
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001682 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683
1684 return 0;
1685}
1686
Wang Chen2aeb0b82008-07-14 20:49:46 -07001687static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1688 int what)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689{
1690 switch (i->type) {
1691 case PACKET_MR_MULTICAST:
1692 if (what > 0)
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001693 return dev_mc_add(dev, i->addr, i->alen, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 else
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001695 return dev_mc_delete(dev, i->addr, i->alen, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 break;
1697 case PACKET_MR_PROMISC:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001698 return dev_set_promiscuity(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 break;
1700 case PACKET_MR_ALLMULTI:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001701 return dev_set_allmulti(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 break;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001703 case PACKET_MR_UNICAST:
1704 if (what > 0)
Jiri Pirkoccffad252009-05-22 23:22:17 +00001705 return dev_unicast_add(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001706 else
Jiri Pirkoccffad252009-05-22 23:22:17 +00001707 return dev_unicast_delete(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001708 break;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001709 default:
1710 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 }
Wang Chen2aeb0b82008-07-14 20:49:46 -07001712 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713}
1714
1715static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1716{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001717 for ( ; i; i = i->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 if (i->ifindex == dev->ifindex)
1719 packet_dev_mc(dev, i, what);
1720 }
1721}
1722
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001723static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724{
1725 struct packet_sock *po = pkt_sk(sk);
1726 struct packet_mclist *ml, *i;
1727 struct net_device *dev;
1728 int err;
1729
1730 rtnl_lock();
1731
1732 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001733 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 if (!dev)
1735 goto done;
1736
1737 err = -EINVAL;
1738 if (mreq->mr_alen > dev->addr_len)
1739 goto done;
1740
1741 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001742 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 if (i == NULL)
1744 goto done;
1745
1746 err = 0;
1747 for (ml = po->mclist; ml; ml = ml->next) {
1748 if (ml->ifindex == mreq->mr_ifindex &&
1749 ml->type == mreq->mr_type &&
1750 ml->alen == mreq->mr_alen &&
1751 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1752 ml->count++;
1753 /* Free the new element ... */
1754 kfree(i);
1755 goto done;
1756 }
1757 }
1758
1759 i->type = mreq->mr_type;
1760 i->ifindex = mreq->mr_ifindex;
1761 i->alen = mreq->mr_alen;
1762 memcpy(i->addr, mreq->mr_address, i->alen);
1763 i->count = 1;
1764 i->next = po->mclist;
1765 po->mclist = i;
Wang Chen2aeb0b82008-07-14 20:49:46 -07001766 err = packet_dev_mc(dev, i, 1);
1767 if (err) {
1768 po->mclist = i->next;
1769 kfree(i);
1770 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771
1772done:
1773 rtnl_unlock();
1774 return err;
1775}
1776
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001777static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778{
1779 struct packet_mclist *ml, **mlp;
1780
1781 rtnl_lock();
1782
1783 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1784 if (ml->ifindex == mreq->mr_ifindex &&
1785 ml->type == mreq->mr_type &&
1786 ml->alen == mreq->mr_alen &&
1787 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1788 if (--ml->count == 0) {
1789 struct net_device *dev;
1790 *mlp = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001791 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1792 if (dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 kfree(ml);
1795 }
1796 rtnl_unlock();
1797 return 0;
1798 }
1799 }
1800 rtnl_unlock();
1801 return -EADDRNOTAVAIL;
1802}
1803
1804static void packet_flush_mclist(struct sock *sk)
1805{
1806 struct packet_sock *po = pkt_sk(sk);
1807 struct packet_mclist *ml;
1808
1809 if (!po->mclist)
1810 return;
1811
1812 rtnl_lock();
1813 while ((ml = po->mclist) != NULL) {
1814 struct net_device *dev;
1815
1816 po->mclist = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001817 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1818 if (dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820 kfree(ml);
1821 }
1822 rtnl_unlock();
1823}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824
1825static int
David S. Millerb7058842009-09-30 16:12:20 -07001826packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827{
1828 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001829 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 int ret;
1831
1832 if (level != SOL_PACKET)
1833 return -ENOPROTOOPT;
1834
Johann Baudy69e3c752009-05-18 22:11:22 -07001835 switch (optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001836 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837 case PACKET_DROP_MEMBERSHIP:
1838 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001839 struct packet_mreq_max mreq;
1840 int len = optlen;
1841 memset(&mreq, 0, sizeof(mreq));
1842 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001844 if (len > sizeof(mreq))
1845 len = sizeof(mreq);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001846 if (copy_from_user(&mreq, optval, len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001848 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1849 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 if (optname == PACKET_ADD_MEMBERSHIP)
1851 ret = packet_mc_add(sk, &mreq);
1852 else
1853 ret = packet_mc_drop(sk, &mreq);
1854 return ret;
1855 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001856
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 case PACKET_RX_RING:
Johann Baudy69e3c752009-05-18 22:11:22 -07001858 case PACKET_TX_RING:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 {
1860 struct tpacket_req req;
1861
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001862 if (optlen < sizeof(req))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 return -EINVAL;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001864 if (pkt_sk(sk)->has_vnet_hdr)
1865 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001866 if (copy_from_user(&req, optval, sizeof(req)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 return -EFAULT;
Johann Baudy69e3c752009-05-18 22:11:22 -07001868 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 }
1870 case PACKET_COPY_THRESH:
1871 {
1872 int val;
1873
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001874 if (optlen != sizeof(val))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001876 if (copy_from_user(&val, optval, sizeof(val)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 return -EFAULT;
1878
1879 pkt_sk(sk)->copy_thresh = val;
1880 return 0;
1881 }
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001882 case PACKET_VERSION:
1883 {
1884 int val;
1885
1886 if (optlen != sizeof(val))
1887 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001888 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001889 return -EBUSY;
1890 if (copy_from_user(&val, optval, sizeof(val)))
1891 return -EFAULT;
1892 switch (val) {
1893 case TPACKET_V1:
1894 case TPACKET_V2:
1895 po->tp_version = val;
1896 return 0;
1897 default:
1898 return -EINVAL;
1899 }
1900 }
Patrick McHardy8913336a2008-07-18 18:05:19 -07001901 case PACKET_RESERVE:
1902 {
1903 unsigned int val;
1904
1905 if (optlen != sizeof(val))
1906 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001907 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardy8913336a2008-07-18 18:05:19 -07001908 return -EBUSY;
1909 if (copy_from_user(&val, optval, sizeof(val)))
1910 return -EFAULT;
1911 po->tp_reserve = val;
1912 return 0;
1913 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001914 case PACKET_LOSS:
1915 {
1916 unsigned int val;
1917
1918 if (optlen != sizeof(val))
1919 return -EINVAL;
1920 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1921 return -EBUSY;
1922 if (copy_from_user(&val, optval, sizeof(val)))
1923 return -EFAULT;
1924 po->tp_loss = !!val;
1925 return 0;
1926 }
Herbert Xu8dc41942007-02-04 23:31:32 -08001927 case PACKET_AUXDATA:
1928 {
1929 int val;
1930
1931 if (optlen < sizeof(val))
1932 return -EINVAL;
1933 if (copy_from_user(&val, optval, sizeof(val)))
1934 return -EFAULT;
1935
1936 po->auxdata = !!val;
1937 return 0;
1938 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001939 case PACKET_ORIGDEV:
1940 {
1941 int val;
1942
1943 if (optlen < sizeof(val))
1944 return -EINVAL;
1945 if (copy_from_user(&val, optval, sizeof(val)))
1946 return -EFAULT;
1947
1948 po->origdev = !!val;
1949 return 0;
1950 }
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001951 case PACKET_VNET_HDR:
1952 {
1953 int val;
1954
1955 if (sock->type != SOCK_RAW)
1956 return -EINVAL;
1957 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1958 return -EBUSY;
1959 if (optlen < sizeof(val))
1960 return -EINVAL;
1961 if (copy_from_user(&val, optval, sizeof(val)))
1962 return -EFAULT;
1963
1964 po->has_vnet_hdr = !!val;
1965 return 0;
1966 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 default:
1968 return -ENOPROTOOPT;
1969 }
1970}
1971
1972static int packet_getsockopt(struct socket *sock, int level, int optname,
1973 char __user *optval, int __user *optlen)
1974{
1975 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08001976 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977 struct sock *sk = sock->sk;
1978 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08001979 void *data;
1980 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981
1982 if (level != SOL_PACKET)
1983 return -ENOPROTOOPT;
1984
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001985 if (get_user(len, optlen))
1986 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987
1988 if (len < 0)
1989 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001990
Johann Baudy69e3c752009-05-18 22:11:22 -07001991 switch (optname) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993 if (len > sizeof(struct tpacket_stats))
1994 len = sizeof(struct tpacket_stats);
1995 spin_lock_bh(&sk->sk_receive_queue.lock);
1996 st = po->stats;
1997 memset(&po->stats, 0, sizeof(st));
1998 spin_unlock_bh(&sk->sk_receive_queue.lock);
1999 st.tp_packets += st.tp_drops;
2000
Herbert Xu8dc41942007-02-04 23:31:32 -08002001 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08002003 case PACKET_AUXDATA:
2004 if (len > sizeof(int))
2005 len = sizeof(int);
2006 val = po->auxdata;
2007
2008 data = &val;
2009 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07002010 case PACKET_ORIGDEV:
2011 if (len > sizeof(int))
2012 len = sizeof(int);
2013 val = po->origdev;
2014
2015 data = &val;
2016 break;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08002017 case PACKET_VNET_HDR:
2018 if (len > sizeof(int))
2019 len = sizeof(int);
2020 val = po->has_vnet_hdr;
2021
2022 data = &val;
2023 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002024 case PACKET_VERSION:
2025 if (len > sizeof(int))
2026 len = sizeof(int);
2027 val = po->tp_version;
2028 data = &val;
2029 break;
2030 case PACKET_HDRLEN:
2031 if (len > sizeof(int))
2032 len = sizeof(int);
2033 if (copy_from_user(&val, optval, len))
2034 return -EFAULT;
2035 switch (val) {
2036 case TPACKET_V1:
2037 val = sizeof(struct tpacket_hdr);
2038 break;
2039 case TPACKET_V2:
2040 val = sizeof(struct tpacket2_hdr);
2041 break;
2042 default:
2043 return -EINVAL;
2044 }
2045 data = &val;
2046 break;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002047 case PACKET_RESERVE:
2048 if (len > sizeof(unsigned int))
2049 len = sizeof(unsigned int);
2050 val = po->tp_reserve;
2051 data = &val;
2052 break;
Johann Baudy69e3c752009-05-18 22:11:22 -07002053 case PACKET_LOSS:
2054 if (len > sizeof(unsigned int))
2055 len = sizeof(unsigned int);
2056 val = po->tp_loss;
2057 data = &val;
2058 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059 default:
2060 return -ENOPROTOOPT;
2061 }
2062
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002063 if (put_user(len, optlen))
2064 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08002065 if (copy_to_user(optval, data, len))
2066 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002067 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068}
2069
2070
2071static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2072{
2073 struct sock *sk;
2074 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08002075 struct net_device *dev = data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002076 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002078 read_lock(&net->packet.sklist_lock);
2079 sk_for_each(sk, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 struct packet_sock *po = pkt_sk(sk);
2081
2082 switch (msg) {
2083 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084 if (po->mclist)
2085 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07002086 /* fallthrough */
2087
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 case NETDEV_DOWN:
2089 if (dev->ifindex == po->ifindex) {
2090 spin_lock(&po->bind_lock);
2091 if (po->running) {
2092 __dev_remove_pack(&po->prot_hook);
2093 __sock_put(sk);
2094 po->running = 0;
2095 sk->sk_err = ENETDOWN;
2096 if (!sock_flag(sk, SOCK_DEAD))
2097 sk->sk_error_report(sk);
2098 }
2099 if (msg == NETDEV_UNREGISTER) {
2100 po->ifindex = -1;
2101 po->prot_hook.dev = NULL;
2102 }
2103 spin_unlock(&po->bind_lock);
2104 }
2105 break;
2106 case NETDEV_UP:
2107 spin_lock(&po->bind_lock);
2108 if (dev->ifindex == po->ifindex && po->num &&
2109 !po->running) {
2110 dev_add_pack(&po->prot_hook);
2111 sock_hold(sk);
2112 po->running = 1;
2113 }
2114 spin_unlock(&po->bind_lock);
2115 break;
2116 }
2117 }
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002118 read_unlock(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 return NOTIFY_DONE;
2120}
2121
2122
2123static int packet_ioctl(struct socket *sock, unsigned int cmd,
2124 unsigned long arg)
2125{
2126 struct sock *sk = sock->sk;
2127
Johann Baudy69e3c752009-05-18 22:11:22 -07002128 switch (cmd) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002129 case SIOCOUTQ:
2130 {
2131 int amount = sk_wmem_alloc_get(sk);
Eric Dumazet31e6d362009-06-17 19:05:41 -07002132
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002133 return put_user(amount, (int __user *)arg);
2134 }
2135 case SIOCINQ:
2136 {
2137 struct sk_buff *skb;
2138 int amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002140 spin_lock_bh(&sk->sk_receive_queue.lock);
2141 skb = skb_peek(&sk->sk_receive_queue);
2142 if (skb)
2143 amount = skb->len;
2144 spin_unlock_bh(&sk->sk_receive_queue.lock);
2145 return put_user(amount, (int __user *)arg);
2146 }
2147 case SIOCGSTAMP:
2148 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2149 case SIOCGSTAMPNS:
2150 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002151
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152#ifdef CONFIG_INET
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002153 case SIOCADDRT:
2154 case SIOCDELRT:
2155 case SIOCDARP:
2156 case SIOCGARP:
2157 case SIOCSARP:
2158 case SIOCGIFADDR:
2159 case SIOCSIFADDR:
2160 case SIOCGIFBRDADDR:
2161 case SIOCSIFBRDADDR:
2162 case SIOCGIFNETMASK:
2163 case SIOCSIFNETMASK:
2164 case SIOCGIFDSTADDR:
2165 case SIOCSIFDSTADDR:
2166 case SIOCSIFFLAGS:
2167 if (!net_eq(sock_net(sk), &init_net))
2168 return -ENOIOCTLCMD;
2169 return inet_dgram_ops.ioctl(sock, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170#endif
2171
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002172 default:
2173 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 }
2175 return 0;
2176}
2177
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002178static unsigned int packet_poll(struct file *file, struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 poll_table *wait)
2180{
2181 struct sock *sk = sock->sk;
2182 struct packet_sock *po = pkt_sk(sk);
2183 unsigned int mask = datagram_poll(file, sock, wait);
2184
2185 spin_lock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002186 if (po->rx_ring.pg_vec) {
2187 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 mask |= POLLIN | POLLRDNORM;
2189 }
2190 spin_unlock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002191 spin_lock_bh(&sk->sk_write_queue.lock);
2192 if (po->tx_ring.pg_vec) {
2193 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2194 mask |= POLLOUT | POLLWRNORM;
2195 }
2196 spin_unlock_bh(&sk->sk_write_queue.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 return mask;
2198}
2199
2200
2201/* Dirty? Well, I still did not learn better way to account
2202 * for user mmaps.
2203 */
2204
2205static void packet_mm_open(struct vm_area_struct *vma)
2206{
2207 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002208 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002210
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211 if (sk)
2212 atomic_inc(&pkt_sk(sk)->mapped);
2213}
2214
2215static void packet_mm_close(struct vm_area_struct *vma)
2216{
2217 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002218 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002220
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 if (sk)
2222 atomic_dec(&pkt_sk(sk)->mapped);
2223}
2224
Alexey Dobriyanf0f37e2f2009-09-27 22:29:37 +04002225static const struct vm_operations_struct packet_mmap_ops = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002226 .open = packet_mm_open,
2227 .close = packet_mm_close,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228};
2229
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002230static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231{
2232 int i;
2233
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002234 for (i = 0; i < len; i++) {
2235 if (likely(pg_vec[i]))
2236 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237 }
2238 kfree(pg_vec);
2239}
2240
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002241static inline char *alloc_one_pg_vec_page(unsigned long order)
2242{
Eric Dumazet719bfea2009-04-15 03:39:52 -07002243 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2244
2245 return (char *) __get_free_pages(gfp_flags, order);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002246}
2247
2248static char **alloc_pg_vec(struct tpacket_req *req, int order)
2249{
2250 unsigned int block_nr = req->tp_block_nr;
2251 char **pg_vec;
2252 int i;
2253
2254 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2255 if (unlikely(!pg_vec))
2256 goto out;
2257
2258 for (i = 0; i < block_nr; i++) {
2259 pg_vec[i] = alloc_one_pg_vec_page(order);
2260 if (unlikely(!pg_vec[i]))
2261 goto out_free_pgvec;
2262 }
2263
2264out:
2265 return pg_vec;
2266
2267out_free_pgvec:
2268 free_pg_vec(pg_vec, order, block_nr);
2269 pg_vec = NULL;
2270 goto out;
2271}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272
Johann Baudy69e3c752009-05-18 22:11:22 -07002273static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2274 int closing, int tx_ring)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275{
2276 char **pg_vec = NULL;
2277 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08002278 int was_running, order = 0;
Johann Baudy69e3c752009-05-18 22:11:22 -07002279 struct packet_ring_buffer *rb;
2280 struct sk_buff_head *rb_queue;
Al Viro0e11c912006-11-08 00:26:29 -08002281 __be16 num;
Johann Baudy69e3c752009-05-18 22:11:22 -07002282 int err;
2283
2284 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2285 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2286
2287 err = -EBUSY;
2288 if (!closing) {
2289 if (atomic_read(&po->mapped))
2290 goto out;
2291 if (atomic_read(&rb->pending))
2292 goto out;
2293 }
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002294
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 if (req->tp_block_nr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 /* Sanity tests and some calculations */
Johann Baudy69e3c752009-05-18 22:11:22 -07002297 err = -EBUSY;
2298 if (unlikely(rb->pg_vec))
2299 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002301 switch (po->tp_version) {
2302 case TPACKET_V1:
2303 po->tp_hdrlen = TPACKET_HDRLEN;
2304 break;
2305 case TPACKET_V2:
2306 po->tp_hdrlen = TPACKET2_HDRLEN;
2307 break;
2308 }
2309
Johann Baudy69e3c752009-05-18 22:11:22 -07002310 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002311 if (unlikely((int)req->tp_block_size <= 0))
Johann Baudy69e3c752009-05-18 22:11:22 -07002312 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002313 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002314 goto out;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002315 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
Johann Baudy69e3c752009-05-18 22:11:22 -07002316 po->tp_reserve))
2317 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002318 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002319 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320
Johann Baudy69e3c752009-05-18 22:11:22 -07002321 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2322 if (unlikely(rb->frames_per_block <= 0))
2323 goto out;
2324 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2325 req->tp_frame_nr))
2326 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327
2328 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002329 order = get_order(req->tp_block_size);
2330 pg_vec = alloc_pg_vec(req, order);
2331 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002333 }
2334 /* Done */
2335 else {
2336 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002337 if (unlikely(req->tp_frame_nr))
Johann Baudy69e3c752009-05-18 22:11:22 -07002338 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339 }
2340
2341 lock_sock(sk);
2342
2343 /* Detach socket from network */
2344 spin_lock(&po->bind_lock);
2345 was_running = po->running;
2346 num = po->num;
2347 if (was_running) {
2348 __dev_remove_pack(&po->prot_hook);
2349 po->num = 0;
2350 po->running = 0;
2351 __sock_put(sk);
2352 }
2353 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002354
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 synchronize_net();
2356
2357 err = -EBUSY;
Herbert Xu905db442009-01-30 14:12:06 -08002358 mutex_lock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359 if (closing || atomic_read(&po->mapped) == 0) {
2360 err = 0;
2361#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
Johann Baudy69e3c752009-05-18 22:11:22 -07002362 spin_lock_bh(&rb_queue->lock);
2363 pg_vec = XC(rb->pg_vec, pg_vec);
2364 rb->frame_max = (req->tp_frame_nr - 1);
2365 rb->head = 0;
2366 rb->frame_size = req->tp_frame_size;
2367 spin_unlock_bh(&rb_queue->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368
Johann Baudy69e3c752009-05-18 22:11:22 -07002369 order = XC(rb->pg_vec_order, order);
2370 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371
Johann Baudy69e3c752009-05-18 22:11:22 -07002372 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2373 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2374 tpacket_rcv : packet_rcv;
2375 skb_queue_purge(rb_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376#undef XC
2377 if (atomic_read(&po->mapped))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002378 pr_err("packet_mmap: vma is busy: %d\n",
2379 atomic_read(&po->mapped));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380 }
Herbert Xu905db442009-01-30 14:12:06 -08002381 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382
2383 spin_lock(&po->bind_lock);
2384 if (was_running && !po->running) {
2385 sock_hold(sk);
2386 po->running = 1;
2387 po->num = num;
2388 dev_add_pack(&po->prot_hook);
2389 }
2390 spin_unlock(&po->bind_lock);
2391
2392 release_sock(sk);
2393
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394 if (pg_vec)
2395 free_pg_vec(pg_vec, order, req->tp_block_nr);
2396out:
2397 return err;
2398}
2399
Johann Baudy69e3c752009-05-18 22:11:22 -07002400static int packet_mmap(struct file *file, struct socket *sock,
2401 struct vm_area_struct *vma)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402{
2403 struct sock *sk = sock->sk;
2404 struct packet_sock *po = pkt_sk(sk);
Johann Baudy69e3c752009-05-18 22:11:22 -07002405 unsigned long size, expected_size;
2406 struct packet_ring_buffer *rb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 unsigned long start;
2408 int err = -EINVAL;
2409 int i;
2410
2411 if (vma->vm_pgoff)
2412 return -EINVAL;
2413
Herbert Xu905db442009-01-30 14:12:06 -08002414 mutex_lock(&po->pg_vec_lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002415
2416 expected_size = 0;
2417 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2418 if (rb->pg_vec) {
2419 expected_size += rb->pg_vec_len
2420 * rb->pg_vec_pages
2421 * PAGE_SIZE;
2422 }
2423 }
2424
2425 if (expected_size == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002427
2428 size = vma->vm_end - vma->vm_start;
2429 if (size != expected_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430 goto out;
2431
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 start = vma->vm_start;
Johann Baudy69e3c752009-05-18 22:11:22 -07002433 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2434 if (rb->pg_vec == NULL)
2435 continue;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002436
Johann Baudy69e3c752009-05-18 22:11:22 -07002437 for (i = 0; i < rb->pg_vec_len; i++) {
2438 struct page *page = virt_to_page(rb->pg_vec[i]);
2439 int pg_num;
2440
2441 for (pg_num = 0; pg_num < rb->pg_vec_pages;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002442 pg_num++, page++) {
Johann Baudy69e3c752009-05-18 22:11:22 -07002443 err = vm_insert_page(vma, start, page);
2444 if (unlikely(err))
2445 goto out;
2446 start += PAGE_SIZE;
2447 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002448 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 }
Johann Baudy69e3c752009-05-18 22:11:22 -07002450
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002451 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 vma->vm_ops = &packet_mmap_ops;
2453 err = 0;
2454
2455out:
Herbert Xu905db442009-01-30 14:12:06 -08002456 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 return err;
2458}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002460static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461 .family = PF_PACKET,
2462 .owner = THIS_MODULE,
2463 .release = packet_release,
2464 .bind = packet_bind_spkt,
2465 .connect = sock_no_connect,
2466 .socketpair = sock_no_socketpair,
2467 .accept = sock_no_accept,
2468 .getname = packet_getname_spkt,
2469 .poll = datagram_poll,
2470 .ioctl = packet_ioctl,
2471 .listen = sock_no_listen,
2472 .shutdown = sock_no_shutdown,
2473 .setsockopt = sock_no_setsockopt,
2474 .getsockopt = sock_no_getsockopt,
2475 .sendmsg = packet_sendmsg_spkt,
2476 .recvmsg = packet_recvmsg,
2477 .mmap = sock_no_mmap,
2478 .sendpage = sock_no_sendpage,
2479};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002481static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 .family = PF_PACKET,
2483 .owner = THIS_MODULE,
2484 .release = packet_release,
2485 .bind = packet_bind,
2486 .connect = sock_no_connect,
2487 .socketpair = sock_no_socketpair,
2488 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002489 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 .poll = packet_poll,
2491 .ioctl = packet_ioctl,
2492 .listen = sock_no_listen,
2493 .shutdown = sock_no_shutdown,
2494 .setsockopt = packet_setsockopt,
2495 .getsockopt = packet_getsockopt,
2496 .sendmsg = packet_sendmsg,
2497 .recvmsg = packet_recvmsg,
2498 .mmap = packet_mmap,
2499 .sendpage = sock_no_sendpage,
2500};
2501
Stephen Hemmingerec1b4cf2009-10-05 05:58:39 +00002502static const struct net_proto_family packet_family_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 .family = PF_PACKET,
2504 .create = packet_create,
2505 .owner = THIS_MODULE,
2506};
2507
2508static struct notifier_block packet_netdev_notifier = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002509 .notifier_call = packet_notifier,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510};
2511
2512#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513
2514static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet40ccbf52008-01-07 22:39:57 -08002515 __acquires(seq_file_net(seq)->packet.sklist_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516{
Denis V. Luneve372c412007-11-19 22:31:54 -08002517 struct net *net = seq_file_net(seq);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002518 read_lock(&net->packet.sklist_lock);
Li Zefanb7ceabd2010-02-08 23:19:29 +00002519 return seq_hlist_start_head(&net->packet.sklist, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520}
2521
2522static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2523{
Herbert Xu1bf40952007-12-16 14:04:02 -08002524 struct net *net = seq_file_net(seq);
Li Zefanb7ceabd2010-02-08 23:19:29 +00002525 return seq_hlist_next(v, &net->packet.sklist, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526}
2527
2528static void packet_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet40ccbf52008-01-07 22:39:57 -08002529 __releases(seq_file_net(seq)->packet.sklist_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530{
Herbert Xu1bf40952007-12-16 14:04:02 -08002531 struct net *net = seq_file_net(seq);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002532 read_unlock(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533}
2534
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002535static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536{
2537 if (v == SEQ_START_TOKEN)
2538 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2539 else {
Li Zefanb7ceabd2010-02-08 23:19:29 +00002540 struct sock *s = sk_entry(v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 const struct packet_sock *po = pkt_sk(s);
2542
2543 seq_printf(seq,
2544 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2545 s,
2546 atomic_read(&s->sk_refcnt),
2547 s->sk_type,
2548 ntohs(po->num),
2549 po->ifindex,
2550 po->running,
2551 atomic_read(&s->sk_rmem_alloc),
2552 sock_i_uid(s),
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002553 sock_i_ino(s));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 }
2555
2556 return 0;
2557}
2558
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002559static const struct seq_operations packet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 .start = packet_seq_start,
2561 .next = packet_seq_next,
2562 .stop = packet_seq_stop,
2563 .show = packet_seq_show,
2564};
2565
2566static int packet_seq_open(struct inode *inode, struct file *file)
2567{
Denis V. Luneve372c412007-11-19 22:31:54 -08002568 return seq_open_net(inode, file, &packet_seq_ops,
2569 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570}
2571
Arjan van de Venda7071d2007-02-12 00:55:36 -08002572static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 .owner = THIS_MODULE,
2574 .open = packet_seq_open,
2575 .read = seq_read,
2576 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002577 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578};
2579
2580#endif
2581
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002582static int __net_init packet_net_init(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002583{
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002584 rwlock_init(&net->packet.sklist_lock);
2585 INIT_HLIST_HEAD(&net->packet.sklist);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002586
2587 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2588 return -ENOMEM;
2589
2590 return 0;
2591}
2592
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002593static void __net_exit packet_net_exit(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002594{
2595 proc_net_remove(net, "packet");
2596}
2597
2598static struct pernet_operations packet_net_ops = {
2599 .init = packet_net_init,
2600 .exit = packet_net_exit,
2601};
2602
2603
Linus Torvalds1da177e2005-04-16 15:20:36 -07002604static void __exit packet_exit(void)
2605{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606 unregister_netdevice_notifier(&packet_netdev_notifier);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002607 unregister_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 sock_unregister(PF_PACKET);
2609 proto_unregister(&packet_proto);
2610}
2611
2612static int __init packet_init(void)
2613{
2614 int rc = proto_register(&packet_proto, 0);
2615
2616 if (rc != 0)
2617 goto out;
2618
2619 sock_register(&packet_family_ops);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002620 register_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 register_netdevice_notifier(&packet_netdev_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622out:
2623 return rc;
2624}
2625
2626module_init(packet_init);
2627module_exit(packet_exit);
2628MODULE_LICENSE("GPL");
2629MODULE_ALIAS_NETPROTO(PF_PACKET);