blob: 37000ae24c555b29de767e653b6189cfb7618f2b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Joe Perchesafd465032012-03-12 07:03:32 +000013#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
Randy Dunlap4fc268d2006-01-11 12:17:47 -080015#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/module.h>
17#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090019#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <linux/skbuff.h>
22#include <linux/netdevice.h>
23#include <linux/in.h>
24#include <linux/tcp.h>
25#include <linux/udp.h>
26#include <linux/if_arp.h>
27#include <linux/mroute.h>
28#include <linux/init.h>
29#include <linux/in6.h>
30#include <linux/inetdevice.h>
31#include <linux/igmp.h>
32#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070033#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080034#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035
36#include <net/sock.h>
37#include <net/ip.h>
38#include <net/icmp.h>
39#include <net/protocol.h>
40#include <net/ipip.h>
41#include <net/arp.h>
42#include <net/checksum.h>
43#include <net/dsfield.h>
44#include <net/inet_ecn.h>
45#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070046#include <net/net_namespace.h>
47#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070048#include <net/rtnetlink.h>
Dmitry Kozlov00959ad2010-08-21 23:05:39 -070049#include <net/gre.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070050
Eric Dumazetdfd56b82011-12-10 09:48:31 +000051#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <net/ipv6.h>
53#include <net/ip6_fib.h>
54#include <net/ip6_route.h>
55#endif
56
57/*
58 Problems & solutions
59 --------------------
60
61 1. The most important issue is detecting local dead loops.
62 They would cause complete host lockup in transmit, which
63 would be "resolved" by stack overflow or, if queueing is enabled,
64 with infinite looping in net_bh.
65
66 We cannot track such dead loops during route installation,
67 it is infeasible task. The most general solutions would be
68 to keep skb->encapsulation counter (sort of local ttl),
Eric Dumazet6d0722a2010-09-29 23:35:10 -070069 and silently drop packet when it expires. It is a good
stephen hemmingerbff52852012-02-24 08:08:20 +000070 solution, but it supposes maintaining new variable in ALL
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 skb, even if no tunneling is used.
72
Eric Dumazet6d0722a2010-09-29 23:35:10 -070073 Current solution: xmit_recursion breaks dead loops. This is a percpu
74 counter, since when we enter the first ndo_xmit(), cpu migration is
75 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77 2. Networking dead loops would not kill routers, but would really
78 kill network. IP hop limit plays role of "t->recursion" in this case,
79 if we copy it from packet being encapsulated to upper header.
80 It is very good solution, but it introduces two problems:
81
82 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
83 do not work over tunnels.
84 - traceroute does not work. I planned to relay ICMP from tunnel,
85 so that this problem would be solved and traceroute output
86 would even more informative. This idea appeared to be wrong:
87 only Linux complies to rfc1812 now (yes, guys, Linux is the only
88 true router now :-)), all routers (at least, in neighbourhood of mine)
89 return only 8 bytes of payload. It is the end.
90
91 Hence, if we want that OSPF worked or traceroute said something reasonable,
92 we should search for another solution.
93
94 One of them is to parse packet trying to detect inner encapsulation
95 made by our node. It is difficult or even impossible, especially,
stephen hemmingerbff52852012-02-24 08:08:20 +000096 taking into account fragmentation. TO be short, ttl is not solution at all.
Linus Torvalds1da177e2005-04-16 15:20:36 -070097
98 Current solution: The solution was UNEXPECTEDLY SIMPLE.
99 We force DF flag on tunnels with preconfigured hop limit,
100 that is ALL. :-) Well, it does not remove the problem completely,
101 but exponential growth of network traffic is changed to linear
102 (branches, that exceed pmtu are pruned) and tunnel mtu
stephen hemmingerbff52852012-02-24 08:08:20 +0000103 rapidly degrades to value <68, where looping stops.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 Yes, it is not good if there exists a router in the loop,
105 which does not force DF, even when encapsulating packets have DF set.
106 But it is not our problem! Nobody could accuse us, we made
107 all that we could make. Even if it is your gated who injected
108 fatal route to network, even if it were you who configured
109 fatal static route: you are innocent. :-)
110
111
112
113 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
114 practically identical code. It would be good to glue them
115 together, but it is not very evident, how to make them modular.
116 sit is integral part of IPv6, ipip and gre are naturally modular.
117 We could extract common parts (hash table, ioctl etc)
118 to a separate module (ip_tunnel.c).
119
120 Alexey Kuznetsov.
121 */
122
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000123static bool log_ecn_error = true;
124module_param(log_ecn_error, bool, 0644);
125MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
Herbert Xuc19e6542008-10-09 11:59:55 -0700127static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128static int ipgre_tunnel_init(struct net_device *dev);
129static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700130static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
132/* Fallback tunnel: no source, no destination, no key, no options */
133
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700134#define HASH_SIZE 16
135
Eric Dumazetf99189b2009-11-17 10:42:49 +0000136static int ipgre_net_id __read_mostly;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700137struct ipgre_net {
Eric Dumazet15078502010-09-15 11:07:53 +0000138 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700139
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700140 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700141};
142
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143/* Tunnel hash table */
144
145/*
146 4 hash tables:
147
148 3: (remote,local)
149 2: (remote,*)
150 1: (*,local)
151 0: (*,*)
152
153 We require exact key match i.e. if a key is present in packet
154 it will match only tunnel with the same key; if it is not present,
155 it will match only keyless tunnel.
156
157 All keysless packets, if not matched configured keyless tunnels
158 will match fallback tunnel.
159 */
160
Al Virod5a0a1e2006-11-08 00:23:14 -0800161#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700163#define tunnels_r_l tunnels[3]
164#define tunnels_r tunnels[2]
165#define tunnels_l tunnels[1]
166#define tunnels_wc tunnels[0]
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000167/*
Eric Dumazet15078502010-09-15 11:07:53 +0000168 * Locking : hash tables are protected by RCU and RTNL
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000169 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000171#define for_each_ip_tunnel_rcu(start) \
172 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173
stephen hemminger87b6d212012-04-12 06:31:16 +0000174static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev,
175 struct rtnl_link_stats64 *tot)
Eric Dumazete985aad2010-09-27 03:57:11 +0000176{
Eric Dumazete985aad2010-09-27 03:57:11 +0000177 int i;
178
179 for_each_possible_cpu(i) {
180 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
stephen hemminger87b6d212012-04-12 06:31:16 +0000181 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
182 unsigned int start;
Eric Dumazete985aad2010-09-27 03:57:11 +0000183
stephen hemminger87b6d212012-04-12 06:31:16 +0000184 do {
185 start = u64_stats_fetch_begin_bh(&tstats->syncp);
186 rx_packets = tstats->rx_packets;
187 tx_packets = tstats->tx_packets;
188 rx_bytes = tstats->rx_bytes;
189 tx_bytes = tstats->tx_bytes;
190 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
191
192 tot->rx_packets += rx_packets;
193 tot->tx_packets += tx_packets;
194 tot->rx_bytes += rx_bytes;
195 tot->tx_bytes += tx_bytes;
Eric Dumazete985aad2010-09-27 03:57:11 +0000196 }
stephen hemminger87b6d212012-04-12 06:31:16 +0000197
198 tot->multicast = dev->stats.multicast;
199 tot->rx_crc_errors = dev->stats.rx_crc_errors;
200 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
201 tot->rx_length_errors = dev->stats.rx_length_errors;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000202 tot->rx_frame_errors = dev->stats.rx_frame_errors;
stephen hemminger87b6d212012-04-12 06:31:16 +0000203 tot->rx_errors = dev->stats.rx_errors;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000204
stephen hemminger87b6d212012-04-12 06:31:16 +0000205 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
206 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
207 tot->tx_dropped = dev->stats.tx_dropped;
208 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
209 tot->tx_errors = dev->stats.tx_errors;
210
211 return tot;
Eric Dumazete985aad2010-09-27 03:57:11 +0000212}
213
stephen hemmingerd2083282012-09-24 18:12:23 +0000214/* Does key in tunnel parameters match packet */
215static bool ipgre_key_match(const struct ip_tunnel_parm *p,
stephen hemminger9fbef052012-10-01 05:21:14 +0000216 __be16 flags, __be32 key)
stephen hemmingerd2083282012-09-24 18:12:23 +0000217{
218 if (p->i_flags & GRE_KEY) {
219 if (flags & GRE_KEY)
220 return key == p->i_key;
221 else
222 return false; /* key expected, none present */
223 } else
224 return !(flags & GRE_KEY);
225}
226
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227/* Given src, dst and key, find appropriate for input tunnel. */
228
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000229static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev,
230 __be32 remote, __be32 local,
stephen hemminger9fbef052012-10-01 05:21:14 +0000231 __be16 flags, __be32 key,
stephen hemmingerd2083282012-09-24 18:12:23 +0000232 __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233{
Timo Teras749c10f2009-01-19 17:22:12 -0800234 struct net *net = dev_net(dev);
235 int link = dev->ifindex;
Eric Dumazet15078502010-09-15 11:07:53 +0000236 unsigned int h0 = HASH(remote);
237 unsigned int h1 = HASH(key);
Timo Terasafcf1242009-01-26 20:56:10 -0800238 struct ip_tunnel *t, *cand = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700239 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700240 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
241 ARPHRD_ETHER : ARPHRD_IPGRE;
Timo Terasafcf1242009-01-26 20:56:10 -0800242 int score, cand_score = 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000244 for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800245 if (local != t->parms.iph.saddr ||
246 remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800247 !(t->dev->flags & IFF_UP))
248 continue;
249
stephen hemmingerd2083282012-09-24 18:12:23 +0000250 if (!ipgre_key_match(&t->parms, flags, key))
251 continue;
252
Timo Teras749c10f2009-01-19 17:22:12 -0800253 if (t->dev->type != ARPHRD_IPGRE &&
254 t->dev->type != dev_type)
255 continue;
256
Timo Terasafcf1242009-01-26 20:56:10 -0800257 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800258 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800259 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800260 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800261 score |= 2;
262 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800263 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800264
265 if (score < cand_score) {
266 cand = t;
267 cand_score = score;
268 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 }
Herbert Xue1a80002008-10-09 12:00:17 -0700270
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000271 for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800272 if (remote != t->parms.iph.daddr ||
Timo Teras749c10f2009-01-19 17:22:12 -0800273 !(t->dev->flags & IFF_UP))
274 continue;
275
stephen hemmingerd2083282012-09-24 18:12:23 +0000276 if (!ipgre_key_match(&t->parms, flags, key))
277 continue;
278
Timo Teras749c10f2009-01-19 17:22:12 -0800279 if (t->dev->type != ARPHRD_IPGRE &&
280 t->dev->type != dev_type)
281 continue;
282
Timo Terasafcf1242009-01-26 20:56:10 -0800283 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800284 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800285 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800286 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800287 score |= 2;
288 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800289 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800290
291 if (score < cand_score) {
292 cand = t;
293 cand_score = score;
294 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 }
Herbert Xue1a80002008-10-09 12:00:17 -0700296
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000297 for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800298 if ((local != t->parms.iph.saddr &&
299 (local != t->parms.iph.daddr ||
300 !ipv4_is_multicast(local))) ||
Timo Teras749c10f2009-01-19 17:22:12 -0800301 !(t->dev->flags & IFF_UP))
302 continue;
303
stephen hemmingerd2083282012-09-24 18:12:23 +0000304 if (!ipgre_key_match(&t->parms, flags, key))
305 continue;
306
Timo Teras749c10f2009-01-19 17:22:12 -0800307 if (t->dev->type != ARPHRD_IPGRE &&
308 t->dev->type != dev_type)
309 continue;
310
Timo Terasafcf1242009-01-26 20:56:10 -0800311 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800312 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800313 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800314 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800315 score |= 2;
316 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800317 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800318
319 if (score < cand_score) {
320 cand = t;
321 cand_score = score;
322 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 }
Herbert Xue1a80002008-10-09 12:00:17 -0700324
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000325 for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) {
Timo Teras749c10f2009-01-19 17:22:12 -0800326 if (t->parms.i_key != key ||
327 !(t->dev->flags & IFF_UP))
328 continue;
329
330 if (t->dev->type != ARPHRD_IPGRE &&
331 t->dev->type != dev_type)
332 continue;
333
Timo Terasafcf1242009-01-26 20:56:10 -0800334 score = 0;
Timo Teras749c10f2009-01-19 17:22:12 -0800335 if (t->parms.link != link)
Timo Terasafcf1242009-01-26 20:56:10 -0800336 score |= 1;
Timo Teras749c10f2009-01-19 17:22:12 -0800337 if (t->dev->type != dev_type)
Timo Terasafcf1242009-01-26 20:56:10 -0800338 score |= 2;
339 if (score == 0)
Timo Teras749c10f2009-01-19 17:22:12 -0800340 return t;
Timo Terasafcf1242009-01-26 20:56:10 -0800341
342 if (score < cand_score) {
343 cand = t;
344 cand_score = score;
345 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 }
347
Timo Terasafcf1242009-01-26 20:56:10 -0800348 if (cand != NULL)
349 return cand;
Herbert Xue1a80002008-10-09 12:00:17 -0700350
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000351 dev = ign->fb_tunnel_dev;
352 if (dev->flags & IFF_UP)
353 return netdev_priv(dev);
Timo Teras749c10f2009-01-19 17:22:12 -0800354
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 return NULL;
356}
357
Eric Dumazet15078502010-09-15 11:07:53 +0000358static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700359 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900361 __be32 remote = parms->iph.daddr;
362 __be32 local = parms->iph.saddr;
363 __be32 key = parms->i_key;
Eric Dumazet15078502010-09-15 11:07:53 +0000364 unsigned int h = HASH(key);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 int prio = 0;
366
367 if (local)
368 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800369 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 prio |= 2;
371 h ^= HASH(remote);
372 }
373
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700374 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375}
376
Eric Dumazet15078502010-09-15 11:07:53 +0000377static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700378 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900379{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700380 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900381}
382
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700383static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384{
Eric Dumazet15078502010-09-15 11:07:53 +0000385 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386
Eric Dumazet15078502010-09-15 11:07:53 +0000387 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
Eric Dumazet8d5b2c02009-10-23 06:14:38 +0000388 rcu_assign_pointer(*tp, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389}
390
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700391static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392{
Eric Dumazet15078502010-09-15 11:07:53 +0000393 struct ip_tunnel __rcu **tp;
394 struct ip_tunnel *iter;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395
Eric Dumazet15078502010-09-15 11:07:53 +0000396 for (tp = ipgre_bucket(ign, t);
397 (iter = rtnl_dereference(*tp)) != NULL;
398 tp = &iter->next) {
399 if (t == iter) {
400 rcu_assign_pointer(*tp, t->next);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 break;
402 }
403 }
404}
405
Herbert Xue1a80002008-10-09 12:00:17 -0700406static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
407 struct ip_tunnel_parm *parms,
408 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409{
Al Virod5a0a1e2006-11-08 00:23:14 -0800410 __be32 remote = parms->iph.daddr;
411 __be32 local = parms->iph.saddr;
412 __be32 key = parms->i_key;
Timo Teras749c10f2009-01-19 17:22:12 -0800413 int link = parms->link;
Eric Dumazet15078502010-09-15 11:07:53 +0000414 struct ip_tunnel *t;
415 struct ip_tunnel __rcu **tp;
Herbert Xue1a80002008-10-09 12:00:17 -0700416 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
417
Eric Dumazet15078502010-09-15 11:07:53 +0000418 for (tp = __ipgre_bucket(ign, parms);
419 (t = rtnl_dereference(*tp)) != NULL;
420 tp = &t->next)
Herbert Xue1a80002008-10-09 12:00:17 -0700421 if (local == t->parms.iph.saddr &&
422 remote == t->parms.iph.daddr &&
423 key == t->parms.i_key &&
Timo Teras749c10f2009-01-19 17:22:12 -0800424 link == t->parms.link &&
Herbert Xue1a80002008-10-09 12:00:17 -0700425 type == t->dev->type)
426 break;
427
428 return t;
429}
430
Eric Dumazet15078502010-09-15 11:07:53 +0000431static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700432 struct ip_tunnel_parm *parms, int create)
433{
434 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700437 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
Herbert Xue1a80002008-10-09 12:00:17 -0700439 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
440 if (t || !create)
441 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
443 if (parms->name[0])
444 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800445 else
stephen hemminger407d6fc2010-11-29 09:47:47 +0000446 strcpy(name, "gre%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
448 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
449 if (!dev)
stephen hemminger407d6fc2010-11-29 09:47:47 +0000450 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700452 dev_net_set(dev, net);
453
Patrick McHardy2941a482006-01-08 22:05:26 -0800454 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700456 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
Herbert Xu42aa9162008-10-09 11:59:32 -0700458 dev->mtu = ipgre_tunnel_bind_dev(dev);
459
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800460 if (register_netdevice(dev) < 0)
461 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462
Willem de Bruijnf2b3ee92012-01-26 10:34:35 +0000463 /* Can use a lockless transmit, unless we generate output sequences */
464 if (!(nt->parms.o_flags & GRE_SEQ))
465 dev->features |= NETIF_F_LLTX;
466
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700468 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 return nt;
470
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800471failed_free:
472 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 return NULL;
474}
475
476static void ipgre_tunnel_uninit(struct net_device *dev)
477{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700478 struct net *net = dev_net(dev);
479 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
480
481 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 dev_put(dev);
483}
484
485
486static void ipgre_err(struct sk_buff *skb, u32 info)
487{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488
Rami Rosen071f92d2008-05-21 17:47:54 -0700489/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 8 bytes of packet payload. It means, that precise relaying of
491 ICMP in the real Internet is absolutely infeasible.
492
493 Moreover, Cisco "wise men" put GRE key to the third word
494 in GRE header. It makes impossible maintaining even soft state for keyed
495 GRE tunnels with enabled checksum. Tell them "thank you".
496
497 Well, I wonder, rfc1812 was written by Cisco employee,
stephen hemmingerbff52852012-02-24 08:08:20 +0000498 what the hell these idiots break standards established
499 by themselves???
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 */
501
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000502 const struct iphdr *iph = (const struct iphdr *)skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000503 __be16 *p = (__be16 *)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300505 const int type = icmp_hdr(skb)->type;
506 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800508 __be16 flags;
stephen hemmingerd2083282012-09-24 18:12:23 +0000509 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510
511 flags = p[0];
512 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
513 if (flags&(GRE_VERSION|GRE_ROUTING))
514 return;
515 if (flags&GRE_KEY) {
516 grehlen += 4;
517 if (flags&GRE_CSUM)
518 grehlen += 4;
519 }
520 }
521
522 /* If only 8 bytes returned, keyed message will be dropped here */
523 if (skb_headlen(skb) < grehlen)
524 return;
525
stephen hemmingerd2083282012-09-24 18:12:23 +0000526 if (flags & GRE_KEY)
527 key = *(((__be32 *)p) + (grehlen / 4) - 1);
528
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 switch (type) {
530 default:
531 case ICMP_PARAMETERPROB:
532 return;
533
534 case ICMP_DEST_UNREACH:
535 switch (code) {
536 case ICMP_SR_FAILED:
537 case ICMP_PORT_UNREACH:
538 /* Impossible event. */
539 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 default:
541 /* All others are translated to HOST_UNREACH.
542 rfc2003 contains "deep thoughts" about NET_UNREACH,
543 I believe they are just ether pollution. --ANK
544 */
545 break;
546 }
547 break;
548 case ICMP_TIME_EXCEEDED:
549 if (code != ICMP_EXC_TTL)
550 return;
551 break;
David S. Miller55be7a92012-07-11 21:27:49 -0700552
553 case ICMP_REDIRECT:
554 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 }
556
Timo Teras749c10f2009-01-19 17:22:12 -0800557 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
stephen hemmingerd2083282012-09-24 18:12:23 +0000558 flags, key, p[1]);
559
David S. Miller36393392012-06-14 22:21:46 -0700560 if (t == NULL)
stephen hemminger0c5794a2012-09-24 18:12:24 +0000561 return;
David S. Miller36393392012-06-14 22:21:46 -0700562
563 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
564 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
565 t->parms.link, 0, IPPROTO_GRE, 0);
stephen hemminger0c5794a2012-09-24 18:12:24 +0000566 return;
David S. Miller36393392012-06-14 22:21:46 -0700567 }
David S. Miller55be7a92012-07-11 21:27:49 -0700568 if (type == ICMP_REDIRECT) {
569 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
570 IPPROTO_GRE, 0);
stephen hemminger0c5794a2012-09-24 18:12:24 +0000571 return;
David S. Miller55be7a92012-07-11 21:27:49 -0700572 }
David S. Miller36393392012-06-14 22:21:46 -0700573 if (t->parms.iph.daddr == 0 ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800574 ipv4_is_multicast(t->parms.iph.daddr))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000575 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576
577 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
stephen hemminger0c5794a2012-09-24 18:12:24 +0000578 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579
Wei Yongjunda6185d82009-02-24 23:34:48 -0800580 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 t->err_count++;
582 else
583 t->err_count = 1;
584 t->err_time = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585}
586
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587static inline u8
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000588ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589{
590 u8 inner = 0;
591 if (skb->protocol == htons(ETH_P_IP))
592 inner = old_iph->tos;
593 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000594 inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 return INET_ECN_encapsulate(tos, inner);
596}
597
598static int ipgre_rcv(struct sk_buff *skb)
599{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000600 const struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800602 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800603 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800604 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 u32 seqno = 0;
606 struct ip_tunnel *tunnel;
607 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700608 __be16 gre_proto;
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000609 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
611 if (!pskb_may_pull(skb, 16))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000612 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700614 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 h = skb->data;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000616 flags = *(__be16 *)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617
618 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
619 /* - Version must be 0.
620 - We do not support routing headers.
621 */
622 if (flags&(GRE_VERSION|GRE_ROUTING))
stephen hemminger0c5794a2012-09-24 18:12:24 +0000623 goto drop;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624
625 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800626 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700627 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800628 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800629 if (!csum)
630 break;
631 /* fall through */
632 case CHECKSUM_NONE:
633 skb->csum = 0;
634 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700635 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 }
637 offset += 4;
638 }
639 if (flags&GRE_KEY) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000640 key = *(__be32 *)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 offset += 4;
642 }
643 if (flags&GRE_SEQ) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000644 seqno = ntohl(*(__be32 *)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 offset += 4;
646 }
647 }
648
Herbert Xue1a80002008-10-09 12:00:17 -0700649 gre_proto = *(__be16 *)(h + 2);
650
stephen hemmingerd2083282012-09-24 18:12:23 +0000651 tunnel = ipgre_tunnel_lookup(skb->dev,
652 iph->saddr, iph->daddr, flags, key,
653 gre_proto);
654 if (tunnel) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000655 struct pcpu_tstats *tstats;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700656
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 secpath_reset(skb);
658
Herbert Xue1a80002008-10-09 12:00:17 -0700659 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 /* WCCP version 1 and 2 protocol decoding.
661 * - Change protocol to IP
662 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
663 */
Herbert Xue1a80002008-10-09 12:00:17 -0700664 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700665 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900666 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 offset += 4;
668 }
669
Timo Teras1d069162007-12-20 00:10:33 -0800670 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300671 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700672 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 skb->pkt_type = PACKET_HOST;
674#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800675 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676 /* Looped back packet, drop it! */
David S. Millerc7537962010-11-11 17:07:48 -0800677 if (rt_is_output_route(skb_rtable(skb)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678 goto drop;
Eric Dumazete985aad2010-09-27 03:57:11 +0000679 tunnel->dev->stats.multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 skb->pkt_type = PACKET_BROADCAST;
681 }
682#endif
683
684 if (((flags&GRE_CSUM) && csum) ||
685 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000686 tunnel->dev->stats.rx_crc_errors++;
687 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 goto drop;
689 }
690 if (tunnel->parms.i_flags&GRE_SEQ) {
691 if (!(flags&GRE_SEQ) ||
692 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000693 tunnel->dev->stats.rx_fifo_errors++;
694 tunnel->dev->stats.rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 goto drop;
696 }
697 tunnel->i_seqno = seqno + 1;
698 }
Herbert Xue1a80002008-10-09 12:00:17 -0700699
700 /* Warning: All skb pointers will be invalidated! */
701 if (tunnel->dev->type == ARPHRD_ETHER) {
702 if (!pskb_may_pull(skb, ETH_HLEN)) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000703 tunnel->dev->stats.rx_length_errors++;
704 tunnel->dev->stats.rx_errors++;
Herbert Xue1a80002008-10-09 12:00:17 -0700705 goto drop;
706 }
707
708 iph = ip_hdr(skb);
709 skb->protocol = eth_type_trans(skb, tunnel->dev);
710 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
711 }
712
stephen hemmingereccc1bb2012-09-25 11:02:48 +0000713 __skb_tunnel_rx(skb, tunnel->dev);
714
715 skb_reset_network_header(skb);
716 err = IP_ECN_decapsulate(iph, skb);
717 if (unlikely(err)) {
718 if (log_ecn_error)
719 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
720 &iph->saddr, iph->tos);
721 if (err > 1) {
722 ++tunnel->dev->stats.rx_frame_errors;
723 ++tunnel->dev->stats.rx_errors;
724 goto drop;
725 }
726 }
727
Eric Dumazete985aad2010-09-27 03:57:11 +0000728 tstats = this_cpu_ptr(tunnel->dev->tstats);
stephen hemminger87b6d212012-04-12 06:31:16 +0000729 u64_stats_update_begin(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000730 tstats->rx_packets++;
731 tstats->rx_bytes += skb->len;
stephen hemminger87b6d212012-04-12 06:31:16 +0000732 u64_stats_update_end(&tstats->syncp);
Eric Dumazete985aad2010-09-27 03:57:11 +0000733
Eric Dumazet60769a52012-09-27 02:48:50 +0000734 gro_cells_receive(&tunnel->gro_cells, skb);
Eric Dumazet8990f462010-09-20 00:12:11 +0000735 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700737 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738
739drop:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 kfree_skb(skb);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000741 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742}
743
Stephen Hemminger6fef4c02009-08-31 19:50:41 +0000744static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745{
Patrick McHardy2941a482006-01-08 22:05:26 -0800746 struct ip_tunnel *tunnel = netdev_priv(dev);
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000747 const struct iphdr *old_iph = ip_hdr(skb);
748 const struct iphdr *tiph;
David S. Millercbb1e852011-05-04 12:33:34 -0700749 struct flowi4 fl4;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800751 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 struct rtable *rt; /* Route to the other host */
Eric Dumazet15078502010-09-15 11:07:53 +0000753 struct net_device *tdev; /* Device to other host */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700755 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800757 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 int mtu;
759
Eric Dumazet6b78f162012-09-13 21:25:33 +0000760 if (skb->ip_summed == CHECKSUM_PARTIAL &&
761 skb_checksum_help(skb))
762 goto tx_error;
763
Herbert Xue1a80002008-10-09 12:00:17 -0700764 if (dev->type == ARPHRD_ETHER)
765 IPCB(skb)->flags = 0;
766
767 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 gre_hlen = 0;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000769 tiph = (const struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 } else {
771 gre_hlen = tunnel->hlen;
772 tiph = &tunnel->parms.iph;
773 }
774
775 if ((dst = tiph->daddr) == 0) {
776 /* NBMA tunnel */
777
Eric Dumazetadf30902009-06-02 05:19:30 +0000778 if (skb_dst(skb) == NULL) {
Eric Dumazete985aad2010-09-27 03:57:11 +0000779 dev->stats.tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 goto tx_error;
781 }
782
David S. Miller61d57f82012-01-24 18:23:30 -0500783 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazet511c3f92009-06-02 05:14:27 +0000784 rt = skb_rtable(skb);
David S. Millerf8126f12012-07-13 05:03:45 -0700785 dst = rt_nexthop(rt, old_iph->daddr);
David S. Miller61d57f82012-01-24 18:23:30 -0500786 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000787#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000789 const struct in6_addr *addr6;
David S. Miller0ec88662012-01-27 15:01:08 -0800790 struct neighbour *neigh;
791 bool do_tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 int addr_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793
David S. Miller0ec88662012-01-27 15:01:08 -0800794 neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 if (neigh == NULL)
796 goto tx_error;
797
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000798 addr6 = (const struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 addr_type = ipv6_addr_type(addr6);
800
801 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700802 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 addr_type = ipv6_addr_type(addr6);
804 }
805
806 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
David S. Miller0ec88662012-01-27 15:01:08 -0800807 do_tx_error_icmp = true;
808 else {
809 do_tx_error_icmp = false;
810 dst = addr6->s6_addr32[3];
811 }
812 neigh_release(neigh);
813 if (do_tx_error_icmp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 goto tx_error_icmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 }
816#endif
817 else
818 goto tx_error;
819 }
820
821 tos = tiph->tos;
Andreas Jaggiee686ca2009-07-14 09:35:59 -0700822 if (tos == 1) {
823 tos = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 if (skb->protocol == htons(ETH_P_IP))
825 tos = old_iph->tos;
Stephen Hemmingerdd4ba832010-07-08 21:35:58 -0700826 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000827 tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 }
829
David S. Millercbb1e852011-05-04 12:33:34 -0700830 rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr,
David S. Miller78fbfd82011-03-12 00:00:52 -0500831 tunnel->parms.o_key, RT_TOS(tos),
832 tunnel->parms.link);
833 if (IS_ERR(rt)) {
834 dev->stats.tx_carrier_errors++;
835 goto tx_error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700837 tdev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838
839 if (tdev == dev) {
840 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000841 dev->stats.collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 goto tx_error;
843 }
844
845 df = tiph->frag_off;
846 if (df)
Changli Gaod8d1f302010-06-10 23:31:35 -0700847 mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 else
Eric Dumazetadf30902009-06-02 05:19:30 +0000849 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850
Eric Dumazetadf30902009-06-02 05:19:30 +0000851 if (skb_dst(skb))
David S. Miller6700c272012-07-17 03:29:28 -0700852 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853
854 if (skb->protocol == htons(ETH_P_IP)) {
855 df |= (old_iph->frag_off&htons(IP_DF));
856
857 if ((old_iph->frag_off&htons(IP_DF)) &&
858 mtu < ntohs(old_iph->tot_len)) {
859 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
860 ip_rt_put(rt);
861 goto tx_error;
862 }
863 }
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000864#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 else if (skb->protocol == htons(ETH_P_IPV6)) {
Eric Dumazetadf30902009-06-02 05:19:30 +0000866 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867
Eric Dumazetadf30902009-06-02 05:19:30 +0000868 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800869 if ((tunnel->parms.iph.daddr &&
870 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 rt6->rt6i_dst.plen == 128) {
872 rt6->rt6i_flags |= RTF_MODIFIED;
David S. Millerdefb3512010-12-08 21:16:57 -0800873 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 }
875 }
876
877 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000878 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879 ip_rt_put(rt);
880 goto tx_error;
881 }
882 }
883#endif
884
885 if (tunnel->err_count > 0) {
Wei Yongjunda6185d82009-02-24 23:34:48 -0800886 if (time_before(jiffies,
887 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 tunnel->err_count--;
889
890 dst_link_failure(skb);
891 } else
892 tunnel->err_count = 0;
893 }
894
Changli Gaod8d1f302010-06-10 23:31:35 -0700895 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896
Patrick McHardycfbba492007-07-09 15:33:40 -0700897 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
898 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
Herbert Xu805dc1d2011-11-18 02:20:06 +0000900 if (max_headroom > dev->needed_headroom)
901 dev->needed_headroom = max_headroom;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 if (!new_skb) {
903 ip_rt_put(rt);
Eric Dumazete985aad2010-09-27 03:57:11 +0000904 dev->stats.tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000906 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 }
908 if (skb->sk)
909 skb_set_owner_w(new_skb, skb->sk);
910 dev_kfree_skb(skb);
911 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700912 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 }
914
Herbert Xu64194c32008-10-09 12:03:17 -0700915 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700916 skb_push(skb, gre_hlen);
917 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800919 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
920 IPSKB_REROUTED);
Eric Dumazetadf30902009-06-02 05:19:30 +0000921 skb_dst_drop(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -0700922 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
924 /*
925 * Push down and install the IPIP header.
926 */
927
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700928 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 iph->version = 4;
930 iph->ihl = sizeof(struct iphdr) >> 2;
931 iph->frag_off = df;
932 iph->protocol = IPPROTO_GRE;
933 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
David S. Millercbb1e852011-05-04 12:33:34 -0700934 iph->daddr = fl4.daddr;
935 iph->saddr = fl4.saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936
937 if ((iph->ttl = tiph->ttl) == 0) {
938 if (skb->protocol == htons(ETH_P_IP))
939 iph->ttl = old_iph->ttl;
Eric Dumazetdfd56b82011-12-10 09:48:31 +0000940#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 else if (skb->protocol == htons(ETH_P_IPV6))
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000942 iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943#endif
944 else
David S. Miller323e1262010-12-12 21:55:08 -0800945 iph->ttl = ip4_dst_hoplimit(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 }
947
Herbert Xue1a80002008-10-09 12:00:17 -0700948 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
949 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
950 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
952 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000953 __be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954
955 if (tunnel->parms.o_flags&GRE_SEQ) {
956 ++tunnel->o_seqno;
957 *ptr = htonl(tunnel->o_seqno);
958 ptr--;
959 }
960 if (tunnel->parms.o_flags&GRE_KEY) {
961 *ptr = tunnel->parms.o_key;
962 ptr--;
963 }
964 if (tunnel->parms.o_flags&GRE_CSUM) {
965 *ptr = 0;
Daniel Baluta5e73ea12012-04-15 01:34:41 +0000966 *(__sum16 *)ptr = ip_compute_csum((void *)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 }
968 }
969
Amerigo Wangaa0010f2012-11-11 21:52:33 +0000970 iptunnel_xmit(skb, dev);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000971 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972
David S. Miller496053f2012-01-11 16:46:32 -0800973#if IS_ENABLED(CONFIG_IPV6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974tx_error_icmp:
975 dst_link_failure(skb);
David S. Miller496053f2012-01-11 16:46:32 -0800976#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977tx_error:
Eric Dumazete985aad2010-09-27 03:57:11 +0000978 dev->stats.tx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 dev_kfree_skb(skb);
Patrick McHardy6ed10652009-06-23 06:03:08 +0000980 return NETDEV_TX_OK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981}
982
Herbert Xu42aa9162008-10-09 11:59:32 -0700983static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800984{
985 struct net_device *tdev = NULL;
986 struct ip_tunnel *tunnel;
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000987 const struct iphdr *iph;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800988 int hlen = LL_MAX_HEADER;
989 int mtu = ETH_DATA_LEN;
990 int addend = sizeof(struct iphdr) + 4;
991
992 tunnel = netdev_priv(dev);
993 iph = &tunnel->parms.iph;
994
Herbert Xuc95b8192008-10-09 11:58:54 -0700995 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800996
997 if (iph->daddr) {
David S. Millercbb1e852011-05-04 12:33:34 -0700998 struct flowi4 fl4;
999 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001000
David S. Millercbb1e852011-05-04 12:33:34 -07001001 rt = ip_route_output_gre(dev_net(dev), &fl4,
1002 iph->daddr, iph->saddr,
1003 tunnel->parms.o_key,
1004 RT_TOS(iph->tos),
1005 tunnel->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001006 if (!IS_ERR(rt)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001007 tdev = rt->dst.dev;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001008 ip_rt_put(rt);
1009 }
Herbert Xue1a80002008-10-09 12:00:17 -07001010
1011 if (dev->type != ARPHRD_ETHER)
1012 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001013 }
1014
1015 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -07001016 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001017
1018 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -07001019 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001020 mtu = tdev->mtu;
1021 }
1022 dev->iflink = tunnel->parms.link;
1023
1024 /* Precalculate GRE options length */
1025 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
1026 if (tunnel->parms.o_flags&GRE_CSUM)
1027 addend += 4;
1028 if (tunnel->parms.o_flags&GRE_KEY)
1029 addend += 4;
1030 if (tunnel->parms.o_flags&GRE_SEQ)
1031 addend += 4;
1032 }
Herbert Xuc95b8192008-10-09 11:58:54 -07001033 dev->needed_headroom = addend + hlen;
Tom Goff8cdb0452009-08-14 16:33:56 -07001034 mtu -= dev->hard_header_len + addend;
Herbert Xu42aa9162008-10-09 11:59:32 -07001035
1036 if (mtu < 68)
1037 mtu = 68;
1038
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001039 tunnel->hlen = addend;
1040
Herbert Xu42aa9162008-10-09 11:59:32 -07001041 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001042}
1043
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044static int
1045ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1046{
1047 int err = 0;
1048 struct ip_tunnel_parm p;
1049 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001050 struct net *net = dev_net(dev);
1051 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052
1053 switch (cmd) {
1054 case SIOCGETTUNNEL:
1055 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001056 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
1058 err = -EFAULT;
1059 break;
1060 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001061 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 }
1063 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -08001064 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 memcpy(&p, &t->parms, sizeof(p));
1066 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
1067 err = -EFAULT;
1068 break;
1069
1070 case SIOCADDTUNNEL:
1071 case SIOCCHGTUNNEL:
1072 err = -EPERM;
1073 if (!capable(CAP_NET_ADMIN))
1074 goto done;
1075
1076 err = -EFAULT;
1077 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1078 goto done;
1079
1080 err = -EINVAL;
1081 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
1082 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
1083 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
1084 goto done;
1085 if (p.iph.ttl)
1086 p.iph.frag_off |= htons(IP_DF);
1087
1088 if (!(p.i_flags&GRE_KEY))
1089 p.i_key = 0;
1090 if (!(p.o_flags&GRE_KEY))
1091 p.o_key = 0;
1092
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001093 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001095 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 if (t != NULL) {
1097 if (t->dev != dev) {
1098 err = -EEXIST;
1099 break;
1100 }
1101 } else {
Eric Dumazet15078502010-09-15 11:07:53 +00001102 unsigned int nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103
Patrick McHardy2941a482006-01-08 22:05:26 -08001104 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105
Joe Perchesf97c1e02007-12-16 13:45:43 -08001106 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 nflags = IFF_BROADCAST;
1108 else if (p.iph.daddr)
1109 nflags = IFF_POINTOPOINT;
1110
1111 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1112 err = -EINVAL;
1113 break;
1114 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001115 ipgre_tunnel_unlink(ign, t);
Pavel Emelyanov74b0b852010-10-27 05:43:53 +00001116 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 t->parms.iph.saddr = p.iph.saddr;
1118 t->parms.iph.daddr = p.iph.daddr;
1119 t->parms.i_key = p.i_key;
1120 t->parms.o_key = p.o_key;
1121 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1122 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001123 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 netdev_state_change(dev);
1125 }
1126 }
1127
1128 if (t) {
1129 err = 0;
1130 if (cmd == SIOCCHGTUNNEL) {
1131 t->parms.iph.ttl = p.iph.ttl;
1132 t->parms.iph.tos = p.iph.tos;
1133 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001134 if (t->parms.link != p.link) {
1135 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -07001136 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001137 netdev_state_change(dev);
1138 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 }
1140 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1141 err = -EFAULT;
1142 } else
1143 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1144 break;
1145
1146 case SIOCDELTUNNEL:
1147 err = -EPERM;
1148 if (!capable(CAP_NET_ADMIN))
1149 goto done;
1150
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001151 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 err = -EFAULT;
1153 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1154 goto done;
1155 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001156 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 goto done;
1158 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001159 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 goto done;
1161 dev = t->dev;
1162 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001163 unregister_netdevice(dev);
1164 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 break;
1166
1167 default:
1168 err = -EINVAL;
1169 }
1170
1171done:
1172 return err;
1173}
1174
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1176{
Patrick McHardy2941a482006-01-08 22:05:26 -08001177 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001178 if (new_mtu < 68 ||
1179 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180 return -EINVAL;
1181 dev->mtu = new_mtu;
1182 return 0;
1183}
1184
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185/* Nice toy. Unfortunately, useless in real life :-)
1186 It allows to construct virtual multiprotocol broadcast "LAN"
1187 over the Internet, provided multicast routing is tuned.
1188
1189
1190 I have no idea was this bicycle invented before me,
1191 so that I had to set ARPHRD_IPGRE to a random value.
1192 I have an impression, that Cisco could make something similar,
1193 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001194
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1196 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1197
1198 ping -t 255 224.66.66.66
1199
1200 If nobody answers, mbone does not work.
1201
1202 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1203 ip addr add 10.66.66.<somewhat>/24 dev Universe
1204 ifconfig Universe up
1205 ifconfig Universe add fe80::<Your_real_addr>/10
1206 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1207 ftp 10.66.66.66
1208 ...
1209 ftp fec0:6666:6666::193.233.7.65
1210 ...
1211
1212 */
1213
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001214static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1215 unsigned short type,
Eric Dumazet15078502010-09-15 11:07:53 +00001216 const void *daddr, const void *saddr, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217{
Patrick McHardy2941a482006-01-08 22:05:26 -08001218 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001220 __be16 *p = (__be16 *)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221
1222 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1223 p[0] = t->parms.o_flags;
1224 p[1] = htons(type);
1225
1226 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001227 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001229
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 if (saddr)
1231 memcpy(&iph->saddr, saddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001232 if (daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 memcpy(&iph->daddr, daddr, 4);
Timo Teräs6d55cb92010-03-03 04:01:13 +00001234 if (iph->daddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001236
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 return -t->hlen;
1238}
1239
Timo Teras6a5f44d2007-10-23 20:31:53 -07001240static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1241{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001242 const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001243 memcpy(haddr, &iph->saddr, 4);
1244 return 4;
1245}
1246
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001247static const struct header_ops ipgre_header_ops = {
1248 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001249 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001250};
1251
Timo Teras6a5f44d2007-10-23 20:31:53 -07001252#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253static int ipgre_open(struct net_device *dev)
1254{
Patrick McHardy2941a482006-01-08 22:05:26 -08001255 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
Joe Perchesf97c1e02007-12-16 13:45:43 -08001257 if (ipv4_is_multicast(t->parms.iph.daddr)) {
David S. Millercbb1e852011-05-04 12:33:34 -07001258 struct flowi4 fl4;
1259 struct rtable *rt;
Eric Dumazete985aad2010-09-27 03:57:11 +00001260
David S. Millercbb1e852011-05-04 12:33:34 -07001261 rt = ip_route_output_gre(dev_net(dev), &fl4,
1262 t->parms.iph.daddr,
1263 t->parms.iph.saddr,
1264 t->parms.o_key,
1265 RT_TOS(t->parms.iph.tos),
1266 t->parms.link);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001267 if (IS_ERR(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 return -EADDRNOTAVAIL;
Changli Gaod8d1f302010-06-10 23:31:35 -07001269 dev = rt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001271 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 return -EADDRNOTAVAIL;
1273 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001274 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 }
1276 return 0;
1277}
1278
1279static int ipgre_close(struct net_device *dev)
1280{
Patrick McHardy2941a482006-01-08 22:05:26 -08001281 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001282
Joe Perchesf97c1e02007-12-16 13:45:43 -08001283 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001284 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001285 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Eric Dumazet8723e1b2010-10-19 00:39:26 +00001286 if (in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 }
1289 return 0;
1290}
1291
1292#endif
1293
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001294static const struct net_device_ops ipgre_netdev_ops = {
1295 .ndo_init = ipgre_tunnel_init,
1296 .ndo_uninit = ipgre_tunnel_uninit,
1297#ifdef CONFIG_NET_IPGRE_BROADCAST
1298 .ndo_open = ipgre_open,
1299 .ndo_stop = ipgre_close,
1300#endif
1301 .ndo_start_xmit = ipgre_tunnel_xmit,
1302 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1303 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001304 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001305};
1306
Eric Dumazete985aad2010-09-27 03:57:11 +00001307static void ipgre_dev_free(struct net_device *dev)
1308{
Eric Dumazet60769a52012-09-27 02:48:50 +00001309 struct ip_tunnel *tunnel = netdev_priv(dev);
1310
1311 gro_cells_destroy(&tunnel->gro_cells);
Eric Dumazete985aad2010-09-27 03:57:11 +00001312 free_percpu(dev->tstats);
1313 free_netdev(dev);
1314}
1315
Eric Dumazet6b78f162012-09-13 21:25:33 +00001316#define GRE_FEATURES (NETIF_F_SG | \
1317 NETIF_F_FRAGLIST | \
1318 NETIF_F_HIGHDMA | \
1319 NETIF_F_HW_CSUM)
1320
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321static void ipgre_tunnel_setup(struct net_device *dev)
1322{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001323 dev->netdev_ops = &ipgre_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001324 dev->destructor = ipgre_dev_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325
1326 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001327 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001328 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 dev->flags = IFF_NOARP;
1330 dev->iflink = 0;
1331 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001332 dev->features |= NETIF_F_NETNS_LOCAL;
Eric Dumazet108bfa82009-05-28 22:35:10 +00001333 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
Eric Dumazet6b78f162012-09-13 21:25:33 +00001334
1335 dev->features |= GRE_FEATURES;
1336 dev->hw_features |= GRE_FEATURES;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337}
1338
1339static int ipgre_tunnel_init(struct net_device *dev)
1340{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 struct ip_tunnel *tunnel;
1342 struct iphdr *iph;
Eric Dumazet60769a52012-09-27 02:48:50 +00001343 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344
Patrick McHardy2941a482006-01-08 22:05:26 -08001345 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 iph = &tunnel->parms.iph;
1347
1348 tunnel->dev = dev;
1349 strcpy(tunnel->parms.name, dev->name);
1350
1351 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1352 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1353
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001356 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 if (!iph->saddr)
1358 return -EINVAL;
1359 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001360 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 }
1362#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001363 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001364 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365
Eric Dumazete985aad2010-09-27 03:57:11 +00001366 dev->tstats = alloc_percpu(struct pcpu_tstats);
1367 if (!dev->tstats)
1368 return -ENOMEM;
1369
Eric Dumazet60769a52012-09-27 02:48:50 +00001370 err = gro_cells_init(&tunnel->gro_cells, dev);
1371 if (err) {
1372 free_percpu(dev->tstats);
1373 return err;
1374 }
1375
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 return 0;
1377}
1378
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001379static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380{
Patrick McHardy2941a482006-01-08 22:05:26 -08001381 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 struct iphdr *iph = &tunnel->parms.iph;
1383
1384 tunnel->dev = dev;
1385 strcpy(tunnel->parms.name, dev->name);
1386
1387 iph->version = 4;
1388 iph->protocol = IPPROTO_GRE;
1389 iph->ihl = 5;
1390 tunnel->hlen = sizeof(struct iphdr) + 4;
1391
1392 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393}
1394
1395
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001396static const struct gre_protocol ipgre_protocol = {
1397 .handler = ipgre_rcv,
1398 .err_handler = ipgre_err,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399};
1400
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001401static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001402{
1403 int prio;
1404
1405 for (prio = 0; prio < 4; prio++) {
1406 int h;
1407 for (h = 0; h < HASH_SIZE; h++) {
Eric Dumazet15078502010-09-15 11:07:53 +00001408 struct ip_tunnel *t;
1409
1410 t = rtnl_dereference(ign->tunnels[prio][h]);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001411
1412 while (t != NULL) {
1413 unregister_netdevice_queue(t->dev, head);
Eric Dumazet15078502010-09-15 11:07:53 +00001414 t = rtnl_dereference(t->next);
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001415 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001416 }
1417 }
1418}
1419
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001420static int __net_init ipgre_init_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001421{
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001422 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001423 int err;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001424
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001425 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1426 ipgre_tunnel_setup);
1427 if (!ign->fb_tunnel_dev) {
1428 err = -ENOMEM;
1429 goto err_alloc_dev;
1430 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001431 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001432
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001433 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001434 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001435
1436 if ((err = register_netdev(ign->fb_tunnel_dev)))
1437 goto err_reg_dev;
1438
Eric Dumazet3285ee32010-10-30 16:21:28 -07001439 rcu_assign_pointer(ign->tunnels_wc[0],
1440 netdev_priv(ign->fb_tunnel_dev));
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001441 return 0;
1442
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001443err_reg_dev:
Eric Dumazet3285ee32010-10-30 16:21:28 -07001444 ipgre_dev_free(ign->fb_tunnel_dev);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001445err_alloc_dev:
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001446 return err;
1447}
1448
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00001449static void __net_exit ipgre_exit_net(struct net *net)
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001450{
1451 struct ipgre_net *ign;
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001452 LIST_HEAD(list);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001453
1454 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001455 rtnl_lock();
Eric Dumazeteef6dd62009-10-27 07:07:16 +00001456 ipgre_destroy_tunnels(ign, &list);
1457 unregister_netdevice_many(&list);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001458 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001459}
1460
1461static struct pernet_operations ipgre_net_ops = {
1462 .init = ipgre_init_net,
1463 .exit = ipgre_exit_net,
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001464 .id = &ipgre_net_id,
1465 .size = sizeof(struct ipgre_net),
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001466};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467
Herbert Xuc19e6542008-10-09 11:59:55 -07001468static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1469{
1470 __be16 flags;
1471
1472 if (!data)
1473 return 0;
1474
1475 flags = 0;
1476 if (data[IFLA_GRE_IFLAGS])
1477 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1478 if (data[IFLA_GRE_OFLAGS])
1479 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1480 if (flags & (GRE_VERSION|GRE_ROUTING))
1481 return -EINVAL;
1482
1483 return 0;
1484}
1485
Herbert Xue1a80002008-10-09 12:00:17 -07001486static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1487{
1488 __be32 daddr;
1489
1490 if (tb[IFLA_ADDRESS]) {
1491 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1492 return -EINVAL;
1493 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1494 return -EADDRNOTAVAIL;
1495 }
1496
1497 if (!data)
1498 goto out;
1499
1500 if (data[IFLA_GRE_REMOTE]) {
1501 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1502 if (!daddr)
1503 return -EINVAL;
1504 }
1505
1506out:
1507 return ipgre_tunnel_validate(tb, data);
1508}
1509
Herbert Xuc19e6542008-10-09 11:59:55 -07001510static void ipgre_netlink_parms(struct nlattr *data[],
1511 struct ip_tunnel_parm *parms)
1512{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001513 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001514
1515 parms->iph.protocol = IPPROTO_GRE;
1516
1517 if (!data)
1518 return;
1519
1520 if (data[IFLA_GRE_LINK])
1521 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1522
1523 if (data[IFLA_GRE_IFLAGS])
1524 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1525
1526 if (data[IFLA_GRE_OFLAGS])
1527 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1528
1529 if (data[IFLA_GRE_IKEY])
1530 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1531
1532 if (data[IFLA_GRE_OKEY])
1533 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1534
1535 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001536 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001537
1538 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001539 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001540
1541 if (data[IFLA_GRE_TTL])
1542 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1543
1544 if (data[IFLA_GRE_TOS])
1545 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1546
1547 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1548 parms->iph.frag_off = htons(IP_DF);
1549}
1550
Herbert Xue1a80002008-10-09 12:00:17 -07001551static int ipgre_tap_init(struct net_device *dev)
1552{
1553 struct ip_tunnel *tunnel;
1554
1555 tunnel = netdev_priv(dev);
1556
1557 tunnel->dev = dev;
1558 strcpy(tunnel->parms.name, dev->name);
1559
1560 ipgre_tunnel_bind_dev(dev);
1561
Eric Dumazete985aad2010-09-27 03:57:11 +00001562 dev->tstats = alloc_percpu(struct pcpu_tstats);
1563 if (!dev->tstats)
1564 return -ENOMEM;
1565
Herbert Xue1a80002008-10-09 12:00:17 -07001566 return 0;
1567}
1568
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001569static const struct net_device_ops ipgre_tap_netdev_ops = {
1570 .ndo_init = ipgre_tap_init,
1571 .ndo_uninit = ipgre_tunnel_uninit,
1572 .ndo_start_xmit = ipgre_tunnel_xmit,
1573 .ndo_set_mac_address = eth_mac_addr,
1574 .ndo_validate_addr = eth_validate_addr,
1575 .ndo_change_mtu = ipgre_tunnel_change_mtu,
stephen hemminger87b6d212012-04-12 06:31:16 +00001576 .ndo_get_stats64 = ipgre_get_stats64,
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001577};
1578
Herbert Xue1a80002008-10-09 12:00:17 -07001579static void ipgre_tap_setup(struct net_device *dev)
1580{
1581
1582 ether_setup(dev);
1583
Herbert Xu2e9526b2009-10-30 05:51:48 +00001584 dev->netdev_ops = &ipgre_tap_netdev_ops;
Eric Dumazete985aad2010-09-27 03:57:11 +00001585 dev->destructor = ipgre_dev_free;
Herbert Xue1a80002008-10-09 12:00:17 -07001586
1587 dev->iflink = 0;
1588 dev->features |= NETIF_F_NETNS_LOCAL;
1589}
1590
Eric W. Biederman81adee42009-11-08 00:53:51 -08001591static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[],
Herbert Xuc19e6542008-10-09 11:59:55 -07001592 struct nlattr *data[])
1593{
1594 struct ip_tunnel *nt;
1595 struct net *net = dev_net(dev);
1596 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1597 int mtu;
1598 int err;
1599
1600 nt = netdev_priv(dev);
1601 ipgre_netlink_parms(data, &nt->parms);
1602
Herbert Xue1a80002008-10-09 12:00:17 -07001603 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001604 return -EEXIST;
1605
Herbert Xue1a80002008-10-09 12:00:17 -07001606 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001607 eth_hw_addr_random(dev);
Herbert Xue1a80002008-10-09 12:00:17 -07001608
Herbert Xuc19e6542008-10-09 11:59:55 -07001609 mtu = ipgre_tunnel_bind_dev(dev);
1610 if (!tb[IFLA_MTU])
1611 dev->mtu = mtu;
1612
Eric Dumazetb790e012010-09-27 23:05:47 +00001613 /* Can use a lockless transmit, unless we generate output sequences */
1614 if (!(nt->parms.o_flags & GRE_SEQ))
1615 dev->features |= NETIF_F_LLTX;
1616
Herbert Xuc19e6542008-10-09 11:59:55 -07001617 err = register_netdevice(dev);
1618 if (err)
1619 goto out;
1620
1621 dev_hold(dev);
1622 ipgre_tunnel_link(ign, nt);
1623
1624out:
1625 return err;
1626}
1627
1628static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1629 struct nlattr *data[])
1630{
1631 struct ip_tunnel *t, *nt;
1632 struct net *net = dev_net(dev);
1633 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1634 struct ip_tunnel_parm p;
1635 int mtu;
1636
1637 if (dev == ign->fb_tunnel_dev)
1638 return -EINVAL;
1639
1640 nt = netdev_priv(dev);
1641 ipgre_netlink_parms(data, &p);
1642
1643 t = ipgre_tunnel_locate(net, &p, 0);
1644
1645 if (t) {
1646 if (t->dev != dev)
1647 return -EEXIST;
1648 } else {
Herbert Xuc19e6542008-10-09 11:59:55 -07001649 t = nt;
1650
Herbert Xu2e9526b2009-10-30 05:51:48 +00001651 if (dev->type != ARPHRD_ETHER) {
Eric Dumazet15078502010-09-15 11:07:53 +00001652 unsigned int nflags = 0;
Herbert Xuc19e6542008-10-09 11:59:55 -07001653
Herbert Xu2e9526b2009-10-30 05:51:48 +00001654 if (ipv4_is_multicast(p.iph.daddr))
1655 nflags = IFF_BROADCAST;
1656 else if (p.iph.daddr)
1657 nflags = IFF_POINTOPOINT;
1658
1659 if ((dev->flags ^ nflags) &
1660 (IFF_POINTOPOINT | IFF_BROADCAST))
1661 return -EINVAL;
1662 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001663
1664 ipgre_tunnel_unlink(ign, t);
1665 t->parms.iph.saddr = p.iph.saddr;
1666 t->parms.iph.daddr = p.iph.daddr;
1667 t->parms.i_key = p.i_key;
Herbert Xu2e9526b2009-10-30 05:51:48 +00001668 if (dev->type != ARPHRD_ETHER) {
1669 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1670 memcpy(dev->broadcast, &p.iph.daddr, 4);
1671 }
Herbert Xuc19e6542008-10-09 11:59:55 -07001672 ipgre_tunnel_link(ign, t);
1673 netdev_state_change(dev);
1674 }
1675
1676 t->parms.o_key = p.o_key;
1677 t->parms.iph.ttl = p.iph.ttl;
1678 t->parms.iph.tos = p.iph.tos;
1679 t->parms.iph.frag_off = p.iph.frag_off;
1680
1681 if (t->parms.link != p.link) {
1682 t->parms.link = p.link;
1683 mtu = ipgre_tunnel_bind_dev(dev);
1684 if (!tb[IFLA_MTU])
1685 dev->mtu = mtu;
1686 netdev_state_change(dev);
1687 }
1688
1689 return 0;
1690}
1691
1692static size_t ipgre_get_size(const struct net_device *dev)
1693{
1694 return
1695 /* IFLA_GRE_LINK */
1696 nla_total_size(4) +
1697 /* IFLA_GRE_IFLAGS */
1698 nla_total_size(2) +
1699 /* IFLA_GRE_OFLAGS */
1700 nla_total_size(2) +
1701 /* IFLA_GRE_IKEY */
1702 nla_total_size(4) +
1703 /* IFLA_GRE_OKEY */
1704 nla_total_size(4) +
1705 /* IFLA_GRE_LOCAL */
1706 nla_total_size(4) +
1707 /* IFLA_GRE_REMOTE */
1708 nla_total_size(4) +
1709 /* IFLA_GRE_TTL */
1710 nla_total_size(1) +
1711 /* IFLA_GRE_TOS */
1712 nla_total_size(1) +
1713 /* IFLA_GRE_PMTUDISC */
1714 nla_total_size(1) +
1715 0;
1716}
1717
1718static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1719{
1720 struct ip_tunnel *t = netdev_priv(dev);
1721 struct ip_tunnel_parm *p = &t->parms;
1722
David S. Millerf3756b72012-04-01 20:39:02 -04001723 if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1724 nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) ||
1725 nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) ||
1726 nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1727 nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1728 nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1729 nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1730 nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1731 nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1732 nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1733 !!(p->iph.frag_off & htons(IP_DF))))
1734 goto nla_put_failure;
Herbert Xuc19e6542008-10-09 11:59:55 -07001735 return 0;
1736
1737nla_put_failure:
1738 return -EMSGSIZE;
1739}
1740
1741static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1742 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1743 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1744 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1745 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1746 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001747 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1748 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001749 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1750 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1751 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1752};
1753
1754static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1755 .kind = "gre",
1756 .maxtype = IFLA_GRE_MAX,
1757 .policy = ipgre_policy,
1758 .priv_size = sizeof(struct ip_tunnel),
1759 .setup = ipgre_tunnel_setup,
1760 .validate = ipgre_tunnel_validate,
1761 .newlink = ipgre_newlink,
1762 .changelink = ipgre_changelink,
1763 .get_size = ipgre_get_size,
1764 .fill_info = ipgre_fill_info,
1765};
1766
Herbert Xue1a80002008-10-09 12:00:17 -07001767static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1768 .kind = "gretap",
1769 .maxtype = IFLA_GRE_MAX,
1770 .policy = ipgre_policy,
1771 .priv_size = sizeof(struct ip_tunnel),
1772 .setup = ipgre_tap_setup,
1773 .validate = ipgre_tap_validate,
1774 .newlink = ipgre_newlink,
1775 .changelink = ipgre_changelink,
1776 .get_size = ipgre_get_size,
1777 .fill_info = ipgre_fill_info,
1778};
1779
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780/*
1781 * And now the modules code and kernel interface.
1782 */
1783
1784static int __init ipgre_init(void)
1785{
1786 int err;
1787
Joe Perches058bd4d2012-03-11 18:36:11 +00001788 pr_info("GRE over IPv4 tunneling driver\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789
Eric W. Biedermancfb8fbf2009-11-29 15:46:13 +00001790 err = register_pernet_device(&ipgre_net_ops);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001791 if (err < 0)
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001792 return err;
1793
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001794 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001795 if (err < 0) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001796 pr_info("%s: can't add protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001797 goto add_proto_failed;
1798 }
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001799
Herbert Xuc19e6542008-10-09 11:59:55 -07001800 err = rtnl_link_register(&ipgre_link_ops);
1801 if (err < 0)
1802 goto rtnl_link_failed;
1803
Herbert Xue1a80002008-10-09 12:00:17 -07001804 err = rtnl_link_register(&ipgre_tap_ops);
1805 if (err < 0)
1806 goto tap_ops_failed;
1807
Herbert Xuc19e6542008-10-09 11:59:55 -07001808out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001810
Herbert Xue1a80002008-10-09 12:00:17 -07001811tap_ops_failed:
1812 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001813rtnl_link_failed:
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001814 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001815add_proto_failed:
1816 unregister_pernet_device(&ipgre_net_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001817 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818}
1819
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001820static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821{
Herbert Xue1a80002008-10-09 12:00:17 -07001822 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001823 rtnl_link_unregister(&ipgre_link_ops);
Dmitry Kozlov00959ad2010-08-21 23:05:39 -07001824 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
Joe Perches058bd4d2012-03-11 18:36:11 +00001825 pr_info("%s: can't remove protocol\n", __func__);
Alexey Dobriyanc2892f02010-02-16 07:57:44 +00001826 unregister_pernet_device(&ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827}
1828
1829module_init(ipgre_init);
1830module_exit(ipgre_fini);
1831MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001832MODULE_ALIAS_RTNL_LINK("gre");
1833MODULE_ALIAS_RTNL_LINK("gretap");
Vasiliy Kulikov8909c9a2011-03-02 00:33:13 +03001834MODULE_ALIAS_NETDEV("gre0");