blob: 27d756f8f870e7d9e45b1027c25250daedb774ad [file] [log] [blame]
Pravin B Shelarc5441932013-03-25 14:49:35 +00001/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57
58#if IS_ENABLED(CONFIG_IPV6)
59#include <net/ipv6.h>
60#include <net/ip6_fib.h>
61#include <net/ip6_route.h>
62#endif
63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
Tom Herbert7d442fa2014-01-02 11:48:26 -080071static inline void __tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
72{
73 struct dst_entry *old_dst;
74
75 if (dst && (dst->flags & DST_NOCACHE))
76 dst = NULL;
77
78 spin_lock_bh(&t->dst_lock);
79 old_dst = rcu_dereference_raw(t->dst_cache);
80 rcu_assign_pointer(t->dst_cache, dst);
81 dst_release(old_dst);
82 spin_unlock_bh(&t->dst_lock);
83}
84
85static inline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
86{
87 __tunnel_dst_set(t, dst);
88}
89
90static inline void tunnel_dst_reset(struct ip_tunnel *t)
91{
92 tunnel_dst_set(t, NULL);
93}
94
95static inline struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
96{
97 struct dst_entry *dst;
98
99 rcu_read_lock();
100 dst = rcu_dereference(t->dst_cache);
101 if (dst)
102 dst_hold(dst);
103 rcu_read_unlock();
104 return dst;
105}
106
107struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
108{
109 struct dst_entry *dst = tunnel_dst_get(t);
110
111 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
112 tunnel_dst_reset(t);
113 return NULL;
114 }
115
116 return dst;
117}
118
Pravin B Shelarc5441932013-03-25 14:49:35 +0000119/* Often modified stats are per cpu, other are shared (netdev->stats) */
120struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
121 struct rtnl_link_stats64 *tot)
122{
123 int i;
124
125 for_each_possible_cpu(i) {
126 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
127 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
128 unsigned int start;
129
130 do {
131 start = u64_stats_fetch_begin_bh(&tstats->syncp);
132 rx_packets = tstats->rx_packets;
133 tx_packets = tstats->tx_packets;
134 rx_bytes = tstats->rx_bytes;
135 tx_bytes = tstats->tx_bytes;
136 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
137
138 tot->rx_packets += rx_packets;
139 tot->tx_packets += tx_packets;
140 tot->rx_bytes += rx_bytes;
141 tot->tx_bytes += tx_bytes;
142 }
143
144 tot->multicast = dev->stats.multicast;
145
146 tot->rx_crc_errors = dev->stats.rx_crc_errors;
147 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
148 tot->rx_length_errors = dev->stats.rx_length_errors;
149 tot->rx_frame_errors = dev->stats.rx_frame_errors;
150 tot->rx_errors = dev->stats.rx_errors;
151
152 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
153 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
154 tot->tx_dropped = dev->stats.tx_dropped;
155 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
156 tot->tx_errors = dev->stats.tx_errors;
157
158 tot->collisions = dev->stats.collisions;
159
160 return tot;
161}
162EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
163
164static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
165 __be16 flags, __be32 key)
166{
167 if (p->i_flags & TUNNEL_KEY) {
168 if (flags & TUNNEL_KEY)
169 return key == p->i_key;
170 else
171 /* key expected, none present */
172 return false;
173 } else
174 return !(flags & TUNNEL_KEY);
175}
176
177/* Fallback tunnel: no source, no destination, no key, no options
178
179 Tunnel hash table:
180 We require exact key match i.e. if a key is present in packet
181 it will match only tunnel with the same key; if it is not present,
182 it will match only keyless tunnel.
183
184 All keysless packets, if not matched configured keyless tunnels
185 will match fallback tunnel.
186 Given src, dst and key, find appropriate for input tunnel.
187*/
188struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
189 int link, __be16 flags,
190 __be32 remote, __be32 local,
191 __be32 key)
192{
193 unsigned int hash;
194 struct ip_tunnel *t, *cand = NULL;
195 struct hlist_head *head;
196
197 hash = ip_tunnel_hash(itn, key, remote);
198 head = &itn->tunnels[hash];
199
200 hlist_for_each_entry_rcu(t, head, hash_node) {
201 if (local != t->parms.iph.saddr ||
202 remote != t->parms.iph.daddr ||
203 !(t->dev->flags & IFF_UP))
204 continue;
205
206 if (!ip_tunnel_key_match(&t->parms, flags, key))
207 continue;
208
209 if (t->parms.link == link)
210 return t;
211 else
212 cand = t;
213 }
214
215 hlist_for_each_entry_rcu(t, head, hash_node) {
216 if (remote != t->parms.iph.daddr ||
217 !(t->dev->flags & IFF_UP))
218 continue;
219
220 if (!ip_tunnel_key_match(&t->parms, flags, key))
221 continue;
222
223 if (t->parms.link == link)
224 return t;
225 else if (!cand)
226 cand = t;
227 }
228
229 hash = ip_tunnel_hash(itn, key, 0);
230 head = &itn->tunnels[hash];
231
232 hlist_for_each_entry_rcu(t, head, hash_node) {
233 if ((local != t->parms.iph.saddr &&
234 (local != t->parms.iph.daddr ||
235 !ipv4_is_multicast(local))) ||
236 !(t->dev->flags & IFF_UP))
237 continue;
238
239 if (!ip_tunnel_key_match(&t->parms, flags, key))
240 continue;
241
242 if (t->parms.link == link)
243 return t;
244 else if (!cand)
245 cand = t;
246 }
247
248 if (flags & TUNNEL_NO_KEY)
249 goto skip_key_lookup;
250
251 hlist_for_each_entry_rcu(t, head, hash_node) {
252 if (t->parms.i_key != key ||
253 !(t->dev->flags & IFF_UP))
254 continue;
255
256 if (t->parms.link == link)
257 return t;
258 else if (!cand)
259 cand = t;
260 }
261
262skip_key_lookup:
263 if (cand)
264 return cand;
265
266 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
267 return netdev_priv(itn->fb_tunnel_dev);
268
269
270 return NULL;
271}
272EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
273
274static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
275 struct ip_tunnel_parm *parms)
276{
277 unsigned int h;
278 __be32 remote;
279
280 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
281 remote = parms->iph.daddr;
282 else
283 remote = 0;
284
285 h = ip_tunnel_hash(itn, parms->i_key, remote);
286 return &itn->tunnels[h];
287}
288
289static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
290{
291 struct hlist_head *head = ip_bucket(itn, &t->parms);
292
293 hlist_add_head_rcu(&t->hash_node, head);
294}
295
296static void ip_tunnel_del(struct ip_tunnel *t)
297{
298 hlist_del_init_rcu(&t->hash_node);
299}
300
301static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
302 struct ip_tunnel_parm *parms,
303 int type)
304{
305 __be32 remote = parms->iph.daddr;
306 __be32 local = parms->iph.saddr;
307 __be32 key = parms->i_key;
308 int link = parms->link;
309 struct ip_tunnel *t = NULL;
310 struct hlist_head *head = ip_bucket(itn, parms);
311
312 hlist_for_each_entry_rcu(t, head, hash_node) {
313 if (local == t->parms.iph.saddr &&
314 remote == t->parms.iph.daddr &&
315 key == t->parms.i_key &&
316 link == t->parms.link &&
317 type == t->dev->type)
318 break;
319 }
320 return t;
321}
322
323static struct net_device *__ip_tunnel_create(struct net *net,
324 const struct rtnl_link_ops *ops,
325 struct ip_tunnel_parm *parms)
326{
327 int err;
328 struct ip_tunnel *tunnel;
329 struct net_device *dev;
330 char name[IFNAMSIZ];
331
332 if (parms->name[0])
333 strlcpy(name, parms->name, IFNAMSIZ);
334 else {
Pravin B Shelar54a5d382013-03-28 08:21:46 +0000335 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000336 err = -E2BIG;
337 goto failed;
338 }
339 strlcpy(name, ops->kind, IFNAMSIZ);
340 strncat(name, "%d", 2);
341 }
342
343 ASSERT_RTNL();
344 dev = alloc_netdev(ops->priv_size, name, ops->setup);
345 if (!dev) {
346 err = -ENOMEM;
347 goto failed;
348 }
349 dev_net_set(dev, net);
350
351 dev->rtnl_link_ops = ops;
352
353 tunnel = netdev_priv(dev);
354 tunnel->parms = *parms;
Nicolas Dichtel5e6700b2013-06-26 16:11:28 +0200355 tunnel->net = net;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000356
357 err = register_netdevice(dev);
358 if (err)
359 goto failed_free;
360
361 return dev;
362
363failed_free:
364 free_netdev(dev);
365failed:
366 return ERR_PTR(err);
367}
368
Tom Herbert7d442fa2014-01-02 11:48:26 -0800369static inline void init_tunnel_flow(struct flowi4 *fl4,
370 int proto,
371 __be32 daddr, __be32 saddr,
372 __be32 key, __u8 tos, int oif)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000373{
374 memset(fl4, 0, sizeof(*fl4));
375 fl4->flowi4_oif = oif;
376 fl4->daddr = daddr;
377 fl4->saddr = saddr;
378 fl4->flowi4_tos = tos;
379 fl4->flowi4_proto = proto;
380 fl4->fl4_gre_key = key;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000381}
382
383static int ip_tunnel_bind_dev(struct net_device *dev)
384{
385 struct net_device *tdev = NULL;
386 struct ip_tunnel *tunnel = netdev_priv(dev);
387 const struct iphdr *iph;
388 int hlen = LL_MAX_HEADER;
389 int mtu = ETH_DATA_LEN;
390 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
391
392 iph = &tunnel->parms.iph;
393
394 /* Guess output device to choose reasonable mtu and needed_headroom */
395 if (iph->daddr) {
396 struct flowi4 fl4;
397 struct rtable *rt;
398
Tom Herbert7d442fa2014-01-02 11:48:26 -0800399 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
400 iph->saddr, tunnel->parms.o_key,
401 RT_TOS(iph->tos), tunnel->parms.link);
402 rt = ip_route_output_key(tunnel->net, &fl4);
403
Pravin B Shelarc5441932013-03-25 14:49:35 +0000404 if (!IS_ERR(rt)) {
405 tdev = rt->dst.dev;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800406 tunnel_dst_set(tunnel, dst_clone(&rt->dst));
Pravin B Shelarc5441932013-03-25 14:49:35 +0000407 ip_rt_put(rt);
408 }
409 if (dev->type != ARPHRD_ETHER)
410 dev->flags |= IFF_POINTOPOINT;
411 }
412
413 if (!tdev && tunnel->parms.link)
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200414 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000415
416 if (tdev) {
417 hlen = tdev->hard_header_len + tdev->needed_headroom;
418 mtu = tdev->mtu;
419 }
420 dev->iflink = tunnel->parms.link;
421
422 dev->needed_headroom = t_hlen + hlen;
423 mtu -= (dev->hard_header_len + t_hlen);
424
425 if (mtu < 68)
426 mtu = 68;
427
428 return mtu;
429}
430
431static struct ip_tunnel *ip_tunnel_create(struct net *net,
432 struct ip_tunnel_net *itn,
433 struct ip_tunnel_parm *parms)
434{
435 struct ip_tunnel *nt, *fbt;
436 struct net_device *dev;
437
438 BUG_ON(!itn->fb_tunnel_dev);
439 fbt = netdev_priv(itn->fb_tunnel_dev);
440 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
441 if (IS_ERR(dev))
442 return NULL;
443
444 dev->mtu = ip_tunnel_bind_dev(dev);
445
446 nt = netdev_priv(dev);
447 ip_tunnel_add(itn, nt);
448 return nt;
449}
450
451int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
452 const struct tnl_ptk_info *tpi, bool log_ecn_error)
453{
454 struct pcpu_tstats *tstats;
455 const struct iphdr *iph = ip_hdr(skb);
456 int err;
457
Pravin B Shelarc5441932013-03-25 14:49:35 +0000458#ifdef CONFIG_NET_IPGRE_BROADCAST
459 if (ipv4_is_multicast(iph->daddr)) {
460 /* Looped back packet, drop it! */
461 if (rt_is_output_route(skb_rtable(skb)))
462 goto drop;
463 tunnel->dev->stats.multicast++;
464 skb->pkt_type = PACKET_BROADCAST;
465 }
466#endif
467
468 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
469 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
470 tunnel->dev->stats.rx_crc_errors++;
471 tunnel->dev->stats.rx_errors++;
472 goto drop;
473 }
474
475 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
476 if (!(tpi->flags&TUNNEL_SEQ) ||
477 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
478 tunnel->dev->stats.rx_fifo_errors++;
479 tunnel->dev->stats.rx_errors++;
480 goto drop;
481 }
482 tunnel->i_seqno = ntohl(tpi->seq) + 1;
483 }
484
Pravin B Shelarc5441932013-03-25 14:49:35 +0000485 err = IP_ECN_decapsulate(iph, skb);
486 if (unlikely(err)) {
487 if (log_ecn_error)
488 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
489 &iph->saddr, iph->tos);
490 if (err > 1) {
491 ++tunnel->dev->stats.rx_frame_errors;
492 ++tunnel->dev->stats.rx_errors;
493 goto drop;
494 }
495 }
496
497 tstats = this_cpu_ptr(tunnel->dev->tstats);
498 u64_stats_update_begin(&tstats->syncp);
499 tstats->rx_packets++;
500 tstats->rx_bytes += skb->len;
501 u64_stats_update_end(&tstats->syncp);
502
Alexei Starovoitov81b9eab2013-11-12 14:39:13 -0800503 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
504
Pravin B Shelar3d7b46c2013-06-17 17:50:02 -0700505 if (tunnel->dev->type == ARPHRD_ETHER) {
506 skb->protocol = eth_type_trans(skb, tunnel->dev);
507 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
508 } else {
509 skb->dev = tunnel->dev;
510 }
Nicolas Dichtel64261f22013-08-13 17:51:09 +0200511
Pravin B Shelarc5441932013-03-25 14:49:35 +0000512 gro_cells_receive(&tunnel->gro_cells, skb);
513 return 0;
514
515drop:
516 kfree_skb(skb);
517 return 0;
518}
519EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
520
Pravin B Shelar23a36472013-07-02 10:57:33 -0700521static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
522 struct rtable *rt, __be16 df)
523{
524 struct ip_tunnel *tunnel = netdev_priv(dev);
Alexander Duyck8c91e162013-07-11 13:12:22 -0700525 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
Pravin B Shelar23a36472013-07-02 10:57:33 -0700526 int mtu;
527
528 if (df)
529 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
530 - sizeof(struct iphdr) - tunnel->hlen;
531 else
532 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
533
534 if (skb_dst(skb))
535 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
536
537 if (skb->protocol == htons(ETH_P_IP)) {
538 if (!skb_is_gso(skb) &&
539 (df & htons(IP_DF)) && mtu < pkt_size) {
540 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
541 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
542 return -E2BIG;
543 }
544 }
545#if IS_ENABLED(CONFIG_IPV6)
546 else if (skb->protocol == htons(ETH_P_IPV6)) {
547 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
548
549 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
550 mtu >= IPV6_MIN_MTU) {
551 if ((tunnel->parms.iph.daddr &&
552 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
553 rt6->rt6i_dst.plen == 128) {
554 rt6->rt6i_flags |= RTF_MODIFIED;
555 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
556 }
557 }
558
559 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
560 mtu < pkt_size) {
561 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
562 return -E2BIG;
563 }
564 }
565#endif
566 return 0;
567}
568
Pravin B Shelarc5441932013-03-25 14:49:35 +0000569void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
Nicolas Dichtelbf3d6a82013-05-27 23:48:15 +0000570 const struct iphdr *tnl_params, const u8 protocol)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000571{
572 struct ip_tunnel *tunnel = netdev_priv(dev);
573 const struct iphdr *inner_iph;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000574 struct flowi4 fl4;
575 u8 tos, ttl;
576 __be16 df;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800577 struct rtable *rt = NULL; /* Route to the other host */
Pravin B Shelarc5441932013-03-25 14:49:35 +0000578 unsigned int max_headroom; /* The extra header space needed */
579 __be32 dst;
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700580 int err;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800581 bool connected = true;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000582
583 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
584
585 dst = tnl_params->daddr;
586 if (dst == 0) {
587 /* NBMA tunnel */
588
589 if (skb_dst(skb) == NULL) {
590 dev->stats.tx_fifo_errors++;
591 goto tx_error;
592 }
593
594 if (skb->protocol == htons(ETH_P_IP)) {
595 rt = skb_rtable(skb);
596 dst = rt_nexthop(rt, inner_iph->daddr);
597 }
598#if IS_ENABLED(CONFIG_IPV6)
599 else if (skb->protocol == htons(ETH_P_IPV6)) {
600 const struct in6_addr *addr6;
601 struct neighbour *neigh;
602 bool do_tx_error_icmp;
603 int addr_type;
604
605 neigh = dst_neigh_lookup(skb_dst(skb),
606 &ipv6_hdr(skb)->daddr);
607 if (neigh == NULL)
608 goto tx_error;
609
610 addr6 = (const struct in6_addr *)&neigh->primary_key;
611 addr_type = ipv6_addr_type(addr6);
612
613 if (addr_type == IPV6_ADDR_ANY) {
614 addr6 = &ipv6_hdr(skb)->daddr;
615 addr_type = ipv6_addr_type(addr6);
616 }
617
618 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
619 do_tx_error_icmp = true;
620 else {
621 do_tx_error_icmp = false;
622 dst = addr6->s6_addr32[3];
623 }
624 neigh_release(neigh);
625 if (do_tx_error_icmp)
626 goto tx_error_icmp;
627 }
628#endif
629 else
630 goto tx_error;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800631
632 connected = false;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000633 }
634
635 tos = tnl_params->tos;
636 if (tos & 0x1) {
637 tos &= ~0x1;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800638 if (skb->protocol == htons(ETH_P_IP)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000639 tos = inner_iph->tos;
Tom Herbert7d442fa2014-01-02 11:48:26 -0800640 connected = false;
641 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000642 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
Tom Herbert7d442fa2014-01-02 11:48:26 -0800643 connected = false;
644 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000645 }
646
Tom Herbert7d442fa2014-01-02 11:48:26 -0800647 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
648 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
649
650 if (connected)
651 rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
652
653 if (!rt) {
654 rt = ip_route_output_key(tunnel->net, &fl4);
655
656 if (IS_ERR(rt)) {
657 dev->stats.tx_carrier_errors++;
658 goto tx_error;
659 }
660 if (connected)
661 tunnel_dst_set(tunnel, dst_clone(&rt->dst));
Pravin B Shelarc5441932013-03-25 14:49:35 +0000662 }
Tom Herbert7d442fa2014-01-02 11:48:26 -0800663
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700664 if (rt->dst.dev == dev) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000665 ip_rt_put(rt);
666 dev->stats.collisions++;
667 goto tx_error;
668 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000669
Pravin B Shelar23a36472013-07-02 10:57:33 -0700670 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
671 ip_rt_put(rt);
672 goto tx_error;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000673 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000674
675 if (tunnel->err_count > 0) {
676 if (time_before(jiffies,
677 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
678 tunnel->err_count--;
679
680 dst_link_failure(skb);
681 } else
682 tunnel->err_count = 0;
683 }
684
Pravin B Shelard4a71b12013-09-25 09:57:47 -0700685 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000686 ttl = tnl_params->ttl;
687 if (ttl == 0) {
688 if (skb->protocol == htons(ETH_P_IP))
689 ttl = inner_iph->ttl;
690#if IS_ENABLED(CONFIG_IPV6)
691 else if (skb->protocol == htons(ETH_P_IPV6))
692 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
693#endif
694 else
695 ttl = ip4_dst_hoplimit(&rt->dst);
696 }
697
Pravin B Shelar23a36472013-07-02 10:57:33 -0700698 df = tnl_params->frag_off;
699 if (skb->protocol == htons(ETH_P_IP))
700 df |= (inner_iph->frag_off&htons(IP_DF));
701
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700702 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
703 + rt->dst.header_len;
Steffen Klassert3e08f4a2013-10-01 11:33:59 +0200704 if (max_headroom > dev->needed_headroom)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000705 dev->needed_headroom = max_headroom;
Steffen Klassert3e08f4a2013-10-01 11:33:59 +0200706
707 if (skb_cow_head(skb, dev->needed_headroom)) {
708 dev->stats.tx_dropped++;
709 dev_kfree_skb(skb);
710 return;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000711 }
712
Nicolas Dichtel8b7ed2d2013-09-02 15:34:54 +0200713 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
Pravin B Shelard4a71b12013-09-25 09:57:47 -0700714 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
Pravin B Shelar0e6fbc52013-06-17 17:49:56 -0700715 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000716
Pravin B Shelarc5441932013-03-25 14:49:35 +0000717 return;
718
719#if IS_ENABLED(CONFIG_IPV6)
720tx_error_icmp:
721 dst_link_failure(skb);
722#endif
723tx_error:
724 dev->stats.tx_errors++;
725 dev_kfree_skb(skb);
726}
727EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
728
729static void ip_tunnel_update(struct ip_tunnel_net *itn,
730 struct ip_tunnel *t,
731 struct net_device *dev,
732 struct ip_tunnel_parm *p,
733 bool set_mtu)
734{
735 ip_tunnel_del(t);
736 t->parms.iph.saddr = p->iph.saddr;
737 t->parms.iph.daddr = p->iph.daddr;
738 t->parms.i_key = p->i_key;
739 t->parms.o_key = p->o_key;
740 if (dev->type != ARPHRD_ETHER) {
741 memcpy(dev->dev_addr, &p->iph.saddr, 4);
742 memcpy(dev->broadcast, &p->iph.daddr, 4);
743 }
744 ip_tunnel_add(itn, t);
745
746 t->parms.iph.ttl = p->iph.ttl;
747 t->parms.iph.tos = p->iph.tos;
748 t->parms.iph.frag_off = p->iph.frag_off;
749
750 if (t->parms.link != p->link) {
751 int mtu;
752
753 t->parms.link = p->link;
754 mtu = ip_tunnel_bind_dev(dev);
755 if (set_mtu)
756 dev->mtu = mtu;
757 }
Tom Herbert7d442fa2014-01-02 11:48:26 -0800758 tunnel_dst_reset(t);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000759 netdev_state_change(dev);
760}
761
762int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
763{
764 int err = 0;
765 struct ip_tunnel *t;
766 struct net *net = dev_net(dev);
767 struct ip_tunnel *tunnel = netdev_priv(dev);
768 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
769
770 BUG_ON(!itn->fb_tunnel_dev);
771 switch (cmd) {
772 case SIOCGETTUNNEL:
773 t = NULL;
774 if (dev == itn->fb_tunnel_dev)
775 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
776 if (t == NULL)
777 t = netdev_priv(dev);
778 memcpy(p, &t->parms, sizeof(*p));
779 break;
780
781 case SIOCADDTUNNEL:
782 case SIOCCHGTUNNEL:
783 err = -EPERM;
784 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
785 goto done;
786 if (p->iph.ttl)
787 p->iph.frag_off |= htons(IP_DF);
788 if (!(p->i_flags&TUNNEL_KEY))
789 p->i_key = 0;
790 if (!(p->o_flags&TUNNEL_KEY))
791 p->o_key = 0;
792
793 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
794
795 if (!t && (cmd == SIOCADDTUNNEL))
796 t = ip_tunnel_create(net, itn, p);
797
798 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
799 if (t != NULL) {
800 if (t->dev != dev) {
801 err = -EEXIST;
802 break;
803 }
804 } else {
805 unsigned int nflags = 0;
806
807 if (ipv4_is_multicast(p->iph.daddr))
808 nflags = IFF_BROADCAST;
809 else if (p->iph.daddr)
810 nflags = IFF_POINTOPOINT;
811
812 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
813 err = -EINVAL;
814 break;
815 }
816
817 t = netdev_priv(dev);
818 }
819 }
820
821 if (t) {
822 err = 0;
823 ip_tunnel_update(itn, t, dev, p, true);
824 } else
825 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
826 break;
827
828 case SIOCDELTUNNEL:
829 err = -EPERM;
830 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
831 goto done;
832
833 if (dev == itn->fb_tunnel_dev) {
834 err = -ENOENT;
835 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
836 if (t == NULL)
837 goto done;
838 err = -EPERM;
839 if (t == netdev_priv(itn->fb_tunnel_dev))
840 goto done;
841 dev = t->dev;
842 }
843 unregister_netdevice(dev);
844 err = 0;
845 break;
846
847 default:
848 err = -EINVAL;
849 }
850
851done:
852 return err;
853}
854EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
855
856int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
857{
858 struct ip_tunnel *tunnel = netdev_priv(dev);
859 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
860
861 if (new_mtu < 68 ||
862 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
863 return -EINVAL;
864 dev->mtu = new_mtu;
865 return 0;
866}
867EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
868
869static void ip_tunnel_dev_free(struct net_device *dev)
870{
871 struct ip_tunnel *tunnel = netdev_priv(dev);
872
873 gro_cells_destroy(&tunnel->gro_cells);
874 free_percpu(dev->tstats);
875 free_netdev(dev);
876}
877
878void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
879{
Pravin B Shelarc5441932013-03-25 14:49:35 +0000880 struct ip_tunnel *tunnel = netdev_priv(dev);
881 struct ip_tunnel_net *itn;
882
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200883 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000884
885 if (itn->fb_tunnel_dev != dev) {
886 ip_tunnel_del(netdev_priv(dev));
887 unregister_netdevice_queue(dev, head);
888 }
889}
890EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
891
Eric Dumazetd3b6f612013-06-07 13:26:05 -0700892int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
Pravin B Shelarc5441932013-03-25 14:49:35 +0000893 struct rtnl_link_ops *ops, char *devname)
894{
895 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
896 struct ip_tunnel_parm parms;
stephen hemminger6261d982013-08-05 22:51:37 -0700897 unsigned int i;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000898
stephen hemminger6261d982013-08-05 22:51:37 -0700899 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
900 INIT_HLIST_HEAD(&itn->tunnels[i]);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000901
902 if (!ops) {
903 itn->fb_tunnel_dev = NULL;
904 return 0;
905 }
stephen hemminger6261d982013-08-05 22:51:37 -0700906
Pravin B Shelarc5441932013-03-25 14:49:35 +0000907 memset(&parms, 0, sizeof(parms));
908 if (devname)
909 strlcpy(parms.name, devname, IFNAMSIZ);
910
911 rtnl_lock();
912 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
Dan Carpenterea857f22013-08-19 10:05:10 +0300913 /* FB netdevice is special: we have one, and only one per netns.
914 * Allowing to move it to another netns is clearly unsafe.
915 */
Steffen Klassert67013282013-10-01 11:34:48 +0200916 if (!IS_ERR(itn->fb_tunnel_dev)) {
Dan Carpenterb4de77a2013-08-23 11:15:37 +0300917 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
Steffen Klassert67013282013-10-01 11:34:48 +0200918 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
919 }
Dan Carpenterb4de77a2013-08-23 11:15:37 +0300920 rtnl_unlock();
Pravin B Shelarc5441932013-03-25 14:49:35 +0000921
Dan Carpenterb4de77a2013-08-23 11:15:37 +0300922 return PTR_RET(itn->fb_tunnel_dev);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000923}
924EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
925
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200926static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
927 struct rtnl_link_ops *ops)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000928{
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200929 struct net *net = dev_net(itn->fb_tunnel_dev);
930 struct net_device *dev, *aux;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000931 int h;
932
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200933 for_each_netdev_safe(net, dev, aux)
934 if (dev->rtnl_link_ops == ops)
935 unregister_netdevice_queue(dev, head);
936
Pravin B Shelarc5441932013-03-25 14:49:35 +0000937 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
938 struct ip_tunnel *t;
939 struct hlist_node *n;
940 struct hlist_head *thead = &itn->tunnels[h];
941
942 hlist_for_each_entry_safe(t, n, thead, hash_node)
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200943 /* If dev is in the same netns, it has already
944 * been added to the list by the previous loop.
945 */
946 if (!net_eq(dev_net(t->dev), net))
947 unregister_netdevice_queue(t->dev, head);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000948 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000949}
950
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200951void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000952{
953 LIST_HEAD(list);
954
955 rtnl_lock();
Nicolas Dichtel6c742e72013-08-13 17:51:11 +0200956 ip_tunnel_destroy(itn, &list, ops);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000957 unregister_netdevice_many(&list);
958 rtnl_unlock();
Pravin B Shelarc5441932013-03-25 14:49:35 +0000959}
960EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
961
962int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
963 struct ip_tunnel_parm *p)
964{
965 struct ip_tunnel *nt;
966 struct net *net = dev_net(dev);
967 struct ip_tunnel_net *itn;
968 int mtu;
969 int err;
970
971 nt = netdev_priv(dev);
972 itn = net_generic(net, nt->ip_tnl_net_id);
973
974 if (ip_tunnel_find(itn, p, dev->type))
975 return -EEXIST;
976
Nicolas Dichtel5e6700b2013-06-26 16:11:28 +0200977 nt->net = net;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000978 nt->parms = *p;
979 err = register_netdevice(dev);
980 if (err)
981 goto out;
982
983 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
984 eth_hw_addr_random(dev);
985
986 mtu = ip_tunnel_bind_dev(dev);
987 if (!tb[IFLA_MTU])
988 dev->mtu = mtu;
989
990 ip_tunnel_add(itn, nt);
991
992out:
993 return err;
994}
995EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
996
997int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
998 struct ip_tunnel_parm *p)
999{
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001000 struct ip_tunnel *t;
Pravin B Shelarc5441932013-03-25 14:49:35 +00001001 struct ip_tunnel *tunnel = netdev_priv(dev);
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001002 struct net *net = tunnel->net;
Pravin B Shelarc5441932013-03-25 14:49:35 +00001003 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1004
1005 if (dev == itn->fb_tunnel_dev)
1006 return -EINVAL;
1007
Pravin B Shelarc5441932013-03-25 14:49:35 +00001008 t = ip_tunnel_find(itn, p, dev->type);
1009
1010 if (t) {
1011 if (t->dev != dev)
1012 return -EEXIST;
1013 } else {
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001014 t = tunnel;
Pravin B Shelarc5441932013-03-25 14:49:35 +00001015
1016 if (dev->type != ARPHRD_ETHER) {
1017 unsigned int nflags = 0;
1018
1019 if (ipv4_is_multicast(p->iph.daddr))
1020 nflags = IFF_BROADCAST;
1021 else if (p->iph.daddr)
1022 nflags = IFF_POINTOPOINT;
1023
1024 if ((dev->flags ^ nflags) &
1025 (IFF_POINTOPOINT | IFF_BROADCAST))
1026 return -EINVAL;
1027 }
1028 }
1029
1030 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1031 return 0;
1032}
1033EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1034
1035int ip_tunnel_init(struct net_device *dev)
1036{
1037 struct ip_tunnel *tunnel = netdev_priv(dev);
1038 struct iphdr *iph = &tunnel->parms.iph;
John Stultz827da442013-10-07 15:51:58 -07001039 int i, err;
Pravin B Shelarc5441932013-03-25 14:49:35 +00001040
1041 dev->destructor = ip_tunnel_dev_free;
1042 dev->tstats = alloc_percpu(struct pcpu_tstats);
1043 if (!dev->tstats)
1044 return -ENOMEM;
1045
John Stultz827da442013-10-07 15:51:58 -07001046 for_each_possible_cpu(i) {
1047 struct pcpu_tstats *ipt_stats;
1048 ipt_stats = per_cpu_ptr(dev->tstats, i);
1049 u64_stats_init(&ipt_stats->syncp);
1050 }
1051
Pravin B Shelarc5441932013-03-25 14:49:35 +00001052 err = gro_cells_init(&tunnel->gro_cells, dev);
1053 if (err) {
1054 free_percpu(dev->tstats);
1055 return err;
1056 }
1057
1058 tunnel->dev = dev;
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001059 tunnel->net = dev_net(dev);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001060 strcpy(tunnel->parms.name, dev->name);
1061 iph->version = 4;
1062 iph->ihl = 5;
1063
Tom Herbert7d442fa2014-01-02 11:48:26 -08001064 tunnel->dst_cache = NULL;
1065 spin_lock_init(&tunnel->dst_lock);
1066
Pravin B Shelarc5441932013-03-25 14:49:35 +00001067 return 0;
1068}
1069EXPORT_SYMBOL_GPL(ip_tunnel_init);
1070
1071void ip_tunnel_uninit(struct net_device *dev)
1072{
Pravin B Shelarc5441932013-03-25 14:49:35 +00001073 struct ip_tunnel *tunnel = netdev_priv(dev);
Nicolas Dichtel6c742e72013-08-13 17:51:11 +02001074 struct net *net = tunnel->net;
Pravin B Shelarc5441932013-03-25 14:49:35 +00001075 struct ip_tunnel_net *itn;
1076
1077 itn = net_generic(net, tunnel->ip_tnl_net_id);
1078 /* fb_tunnel_dev will be unregisted in net-exit call. */
1079 if (itn->fb_tunnel_dev != dev)
1080 ip_tunnel_del(netdev_priv(dev));
Tom Herbert7d442fa2014-01-02 11:48:26 -08001081
1082 tunnel_dst_reset(tunnel);
Pravin B Shelarc5441932013-03-25 14:49:35 +00001083}
1084EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1085
1086/* Do least required initialization, rest of init is done in tunnel_init call */
1087void ip_tunnel_setup(struct net_device *dev, int net_id)
1088{
1089 struct ip_tunnel *tunnel = netdev_priv(dev);
1090 tunnel->ip_tnl_net_id = net_id;
1091}
1092EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1093
1094MODULE_LICENSE("GPL");