blob: 0101521f366b74546cbb90f60e7e6faac597b430 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070030#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080031#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070043#include <net/net_namespace.h>
44#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090069 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
Herbert Xuc19e6542008-10-09 11:59:55 -0700122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700125static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700129#define HASH_SIZE 16
130
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700131static int ipgre_net_id;
132struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700133 struct ip_tunnel *tunnels[4][HASH_SIZE];
134
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700135 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700136};
137
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138/* Tunnel hash table */
139
140/*
141 4 hash tables:
142
143 3: (remote,local)
144 2: (remote,*)
145 1: (*,local)
146 0: (*,*)
147
148 We require exact key match i.e. if a key is present in packet
149 it will match only tunnel with the same key; if it is not present,
150 it will match only keyless tunnel.
151
152 All keysless packets, if not matched configured keyless tunnels
153 will match fallback tunnel.
154 */
155
Al Virod5a0a1e2006-11-08 00:23:14 -0800156#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700158#define tunnels_r_l tunnels[3]
159#define tunnels_r tunnels[2]
160#define tunnels_l tunnels[1]
161#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
163static DEFINE_RWLOCK(ipgre_lock);
164
165/* Given src, dst and key, find appropriate for input tunnel. */
166
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700167static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700168 __be32 remote, __be32 local,
169 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170{
171 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key);
173 struct ip_tunnel *t;
Herbert Xue1a80002008-10-09 12:00:17 -0700174 struct ip_tunnel *t2 = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700175 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700176 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
177 ARPHRD_ETHER : ARPHRD_IPGRE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700179 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
Herbert Xue1a80002008-10-09 12:00:17 -0700181 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
182 if (t->dev->type == dev_type)
183 return t;
184 if (t->dev->type == ARPHRD_IPGRE && !t2)
185 t2 = t;
186 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 }
188 }
Herbert Xue1a80002008-10-09 12:00:17 -0700189
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700190 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 if (remote == t->parms.iph.daddr) {
Herbert Xue1a80002008-10-09 12:00:17 -0700192 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
193 if (t->dev->type == dev_type)
194 return t;
195 if (t->dev->type == ARPHRD_IPGRE && !t2)
196 t2 = t;
197 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 }
199 }
Herbert Xue1a80002008-10-09 12:00:17 -0700200
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700201 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800203 (local == t->parms.iph.daddr &&
204 ipv4_is_multicast(local))) {
Herbert Xue1a80002008-10-09 12:00:17 -0700205 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
206 if (t->dev->type == dev_type)
207 return t;
208 if (t->dev->type == ARPHRD_IPGRE && !t2)
209 t2 = t;
210 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 }
212 }
Herbert Xue1a80002008-10-09 12:00:17 -0700213
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700214 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Herbert Xue1a80002008-10-09 12:00:17 -0700215 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
216 if (t->dev->type == dev_type)
217 return t;
218 if (t->dev->type == ARPHRD_IPGRE && !t2)
219 t2 = t;
220 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 }
222
Herbert Xue1a80002008-10-09 12:00:17 -0700223 if (t2)
224 return t2;
225
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700226 if (ign->fb_tunnel_dev->flags&IFF_UP)
227 return netdev_priv(ign->fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 return NULL;
229}
230
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700231static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
232 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900234 __be32 remote = parms->iph.daddr;
235 __be32 local = parms->iph.saddr;
236 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 unsigned h = HASH(key);
238 int prio = 0;
239
240 if (local)
241 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800242 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 prio |= 2;
244 h ^= HASH(remote);
245 }
246
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700247 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248}
249
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700250static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
251 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900252{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700253 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900254}
255
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700256static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700258 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
260 t->next = *tp;
261 write_lock_bh(&ipgre_lock);
262 *tp = t;
263 write_unlock_bh(&ipgre_lock);
264}
265
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700266static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267{
268 struct ip_tunnel **tp;
269
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700270 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (t == *tp) {
272 write_lock_bh(&ipgre_lock);
273 *tp = t->next;
274 write_unlock_bh(&ipgre_lock);
275 break;
276 }
277 }
278}
279
Herbert Xue1a80002008-10-09 12:00:17 -0700280static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
281 struct ip_tunnel_parm *parms,
282 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283{
Al Virod5a0a1e2006-11-08 00:23:14 -0800284 __be32 remote = parms->iph.daddr;
285 __be32 local = parms->iph.saddr;
286 __be32 key = parms->i_key;
Herbert Xue1a80002008-10-09 12:00:17 -0700287 struct ip_tunnel *t, **tp;
288 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
289
290 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
291 if (local == t->parms.iph.saddr &&
292 remote == t->parms.iph.daddr &&
293 key == t->parms.i_key &&
294 type == t->dev->type)
295 break;
296
297 return t;
298}
299
300static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
301 struct ip_tunnel_parm *parms, int create)
302{
303 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700306 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
Herbert Xue1a80002008-10-09 12:00:17 -0700308 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
309 if (t || !create)
310 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311
312 if (parms->name[0])
313 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800314 else
315 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
317 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
318 if (!dev)
319 return NULL;
320
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700321 dev_net_set(dev, net);
322
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800323 if (strchr(name, '%')) {
324 if (dev_alloc_name(dev, name) < 0)
325 goto failed_free;
326 }
327
Patrick McHardy2941a482006-01-08 22:05:26 -0800328 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700330 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331
Herbert Xu42aa9162008-10-09 11:59:32 -0700332 dev->mtu = ipgre_tunnel_bind_dev(dev);
333
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800334 if (register_netdevice(dev) < 0)
335 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700338 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 return nt;
340
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800341failed_free:
342 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 return NULL;
344}
345
346static void ipgre_tunnel_uninit(struct net_device *dev)
347{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700348 struct net *net = dev_net(dev);
349 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
350
351 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 dev_put(dev);
353}
354
355
356static void ipgre_err(struct sk_buff *skb, u32 info)
357{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
Rami Rosen071f92d2008-05-21 17:47:54 -0700359/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 8 bytes of packet payload. It means, that precise relaying of
361 ICMP in the real Internet is absolutely infeasible.
362
363 Moreover, Cisco "wise men" put GRE key to the third word
364 in GRE header. It makes impossible maintaining even soft state for keyed
365 GRE tunnels with enabled checksum. Tell them "thank you".
366
367 Well, I wonder, rfc1812 was written by Cisco employee,
368 what the hell these idiots break standrads established
369 by themself???
370 */
371
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800372 struct iphdr *iph = (struct iphdr *)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800373 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300375 const int type = icmp_hdr(skb)->type;
376 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800378 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
380 flags = p[0];
381 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
382 if (flags&(GRE_VERSION|GRE_ROUTING))
383 return;
384 if (flags&GRE_KEY) {
385 grehlen += 4;
386 if (flags&GRE_CSUM)
387 grehlen += 4;
388 }
389 }
390
391 /* If only 8 bytes returned, keyed message will be dropped here */
392 if (skb_headlen(skb) < grehlen)
393 return;
394
395 switch (type) {
396 default:
397 case ICMP_PARAMETERPROB:
398 return;
399
400 case ICMP_DEST_UNREACH:
401 switch (code) {
402 case ICMP_SR_FAILED:
403 case ICMP_PORT_UNREACH:
404 /* Impossible event. */
405 return;
406 case ICMP_FRAG_NEEDED:
407 /* Soft state for pmtu is maintained by IP core. */
408 return;
409 default:
410 /* All others are translated to HOST_UNREACH.
411 rfc2003 contains "deep thoughts" about NET_UNREACH,
412 I believe they are just ether pollution. --ANK
413 */
414 break;
415 }
416 break;
417 case ICMP_TIME_EXCEEDED:
418 if (code != ICMP_EXC_TTL)
419 return;
420 break;
421 }
422
423 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700424 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700425 flags & GRE_KEY ?
426 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
427 p[1]);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800428 if (t == NULL || t->parms.iph.daddr == 0 ||
429 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 goto out;
431
432 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
433 goto out;
434
435 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
436 t->err_count++;
437 else
438 t->err_count = 1;
439 t->err_time = jiffies;
440out:
441 read_unlock(&ipgre_lock);
442 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443}
444
445static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
446{
447 if (INET_ECN_is_ce(iph->tos)) {
448 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700449 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700451 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 }
453 }
454}
455
456static inline u8
457ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
458{
459 u8 inner = 0;
460 if (skb->protocol == htons(ETH_P_IP))
461 inner = old_iph->tos;
462 else if (skb->protocol == htons(ETH_P_IPV6))
463 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
464 return INET_ECN_encapsulate(tos, inner);
465}
466
467static int ipgre_rcv(struct sk_buff *skb)
468{
469 struct iphdr *iph;
470 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800471 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800472 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800473 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474 u32 seqno = 0;
475 struct ip_tunnel *tunnel;
476 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700477 __be16 gre_proto;
Herbert Xu64194c32008-10-09 12:03:17 -0700478 unsigned int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479
480 if (!pskb_may_pull(skb, 16))
481 goto drop_nolock;
482
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700483 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800485 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486
487 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
488 /* - Version must be 0.
489 - We do not support routing headers.
490 */
491 if (flags&(GRE_VERSION|GRE_ROUTING))
492 goto drop_nolock;
493
494 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800495 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700496 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800497 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800498 if (!csum)
499 break;
500 /* fall through */
501 case CHECKSUM_NONE:
502 skb->csum = 0;
503 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700504 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505 }
506 offset += 4;
507 }
508 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800509 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 offset += 4;
511 }
512 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800513 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 offset += 4;
515 }
516 }
517
Herbert Xue1a80002008-10-09 12:00:17 -0700518 gre_proto = *(__be16 *)(h + 2);
519
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700521 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
Herbert Xue1a80002008-10-09 12:00:17 -0700522 iph->saddr, iph->daddr, key,
523 gre_proto))) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700524 struct net_device_stats *stats = &tunnel->dev->stats;
525
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 secpath_reset(skb);
527
Herbert Xue1a80002008-10-09 12:00:17 -0700528 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 /* WCCP version 1 and 2 protocol decoding.
530 * - Change protocol to IP
531 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
532 */
Herbert Xue1a80002008-10-09 12:00:17 -0700533 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700534 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900535 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 offset += 4;
537 }
538
Timo Teras1d069162007-12-20 00:10:33 -0800539 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300540 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700541 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 skb->pkt_type = PACKET_HOST;
543#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800544 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800546 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700548 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 skb->pkt_type = PACKET_BROADCAST;
550 }
551#endif
552
553 if (((flags&GRE_CSUM) && csum) ||
554 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700555 stats->rx_crc_errors++;
556 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 goto drop;
558 }
559 if (tunnel->parms.i_flags&GRE_SEQ) {
560 if (!(flags&GRE_SEQ) ||
561 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700562 stats->rx_fifo_errors++;
563 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 goto drop;
565 }
566 tunnel->i_seqno = seqno + 1;
567 }
Herbert Xue1a80002008-10-09 12:00:17 -0700568
Herbert Xu64194c32008-10-09 12:03:17 -0700569 len = skb->len;
570
Herbert Xue1a80002008-10-09 12:00:17 -0700571 /* Warning: All skb pointers will be invalidated! */
572 if (tunnel->dev->type == ARPHRD_ETHER) {
573 if (!pskb_may_pull(skb, ETH_HLEN)) {
574 stats->rx_length_errors++;
575 stats->rx_errors++;
576 goto drop;
577 }
578
579 iph = ip_hdr(skb);
580 skb->protocol = eth_type_trans(skb, tunnel->dev);
581 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
582 }
583
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700584 stats->rx_packets++;
Herbert Xu64194c32008-10-09 12:03:17 -0700585 stats->rx_bytes += len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 skb->dev = tunnel->dev;
587 dst_release(skb->dst);
588 skb->dst = NULL;
589 nf_reset(skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700590
591 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700593
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 netif_rx(skb);
595 read_unlock(&ipgre_lock);
596 return(0);
597 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700598 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
600drop:
601 read_unlock(&ipgre_lock);
602drop_nolock:
603 kfree_skb(skb);
604 return(0);
605}
606
607static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
608{
Patrick McHardy2941a482006-01-08 22:05:26 -0800609 struct ip_tunnel *tunnel = netdev_priv(dev);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700610 struct net_device_stats *stats = &tunnel->dev->stats;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700611 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 struct iphdr *tiph;
613 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800614 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 struct rtable *rt; /* Route to the other host */
616 struct net_device *tdev; /* Device to other host */
617 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700618 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800620 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 int mtu;
622
623 if (tunnel->recursion++) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700624 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 goto tx_error;
626 }
627
Herbert Xue1a80002008-10-09 12:00:17 -0700628 if (dev->type == ARPHRD_ETHER)
629 IPCB(skb)->flags = 0;
630
631 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 gre_hlen = 0;
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800633 tiph = (struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 } else {
635 gre_hlen = tunnel->hlen;
636 tiph = &tunnel->parms.iph;
637 }
638
639 if ((dst = tiph->daddr) == 0) {
640 /* NBMA tunnel */
641
642 if (skb->dst == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700643 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 goto tx_error;
645 }
646
647 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800648 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 if ((dst = rt->rt_gateway) == 0)
650 goto tx_error_icmp;
651 }
652#ifdef CONFIG_IPV6
653 else if (skb->protocol == htons(ETH_P_IPV6)) {
654 struct in6_addr *addr6;
655 int addr_type;
656 struct neighbour *neigh = skb->dst->neighbour;
657
658 if (neigh == NULL)
659 goto tx_error;
660
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800661 addr6 = (struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662 addr_type = ipv6_addr_type(addr6);
663
664 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700665 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 addr_type = ipv6_addr_type(addr6);
667 }
668
669 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
670 goto tx_error_icmp;
671
672 dst = addr6->s6_addr32[3];
673 }
674#endif
675 else
676 goto tx_error;
677 }
678
679 tos = tiph->tos;
680 if (tos&1) {
681 if (skb->protocol == htons(ETH_P_IP))
682 tos = old_iph->tos;
683 tos &= ~1;
684 }
685
686 {
687 struct flowi fl = { .oif = tunnel->parms.link,
688 .nl_u = { .ip4_u =
689 { .daddr = dst,
690 .saddr = tiph->saddr,
691 .tos = RT_TOS(tos) } },
692 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700693 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700694 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 goto tx_error;
696 }
697 }
698 tdev = rt->u.dst.dev;
699
700 if (tdev == dev) {
701 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700702 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 goto tx_error;
704 }
705
706 df = tiph->frag_off;
707 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700708 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709 else
710 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
711
712 if (skb->dst)
713 skb->dst->ops->update_pmtu(skb->dst, mtu);
714
715 if (skb->protocol == htons(ETH_P_IP)) {
716 df |= (old_iph->frag_off&htons(IP_DF));
717
718 if ((old_iph->frag_off&htons(IP_DF)) &&
719 mtu < ntohs(old_iph->tot_len)) {
720 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
721 ip_rt_put(rt);
722 goto tx_error;
723 }
724 }
725#ifdef CONFIG_IPV6
726 else if (skb->protocol == htons(ETH_P_IPV6)) {
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800727 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728
729 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800730 if ((tunnel->parms.iph.daddr &&
731 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 rt6->rt6i_dst.plen == 128) {
733 rt6->rt6i_flags |= RTF_MODIFIED;
734 skb->dst->metrics[RTAX_MTU-1] = mtu;
735 }
736 }
737
738 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
739 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
740 ip_rt_put(rt);
741 goto tx_error;
742 }
743 }
744#endif
745
746 if (tunnel->err_count > 0) {
747 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
748 tunnel->err_count--;
749
750 dst_link_failure(skb);
751 } else
752 tunnel->err_count = 0;
753 }
754
755 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
756
Patrick McHardycfbba492007-07-09 15:33:40 -0700757 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
758 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
760 if (!new_skb) {
761 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900762 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 dev_kfree_skb(skb);
764 tunnel->recursion--;
765 return 0;
766 }
767 if (skb->sk)
768 skb_set_owner_w(new_skb, skb->sk);
769 dev_kfree_skb(skb);
770 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700771 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 }
773
Herbert Xu64194c32008-10-09 12:03:17 -0700774 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700775 skb_push(skb, gre_hlen);
776 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800778 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
779 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 dst_release(skb->dst);
781 skb->dst = &rt->u.dst;
782
783 /*
784 * Push down and install the IPIP header.
785 */
786
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700787 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 iph->version = 4;
789 iph->ihl = sizeof(struct iphdr) >> 2;
790 iph->frag_off = df;
791 iph->protocol = IPPROTO_GRE;
792 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
793 iph->daddr = rt->rt_dst;
794 iph->saddr = rt->rt_src;
795
796 if ((iph->ttl = tiph->ttl) == 0) {
797 if (skb->protocol == htons(ETH_P_IP))
798 iph->ttl = old_iph->ttl;
799#ifdef CONFIG_IPV6
800 else if (skb->protocol == htons(ETH_P_IPV6))
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800801 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802#endif
803 else
804 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
805 }
806
Herbert Xue1a80002008-10-09 12:00:17 -0700807 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
808 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
809 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810
811 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800812 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813
814 if (tunnel->parms.o_flags&GRE_SEQ) {
815 ++tunnel->o_seqno;
816 *ptr = htonl(tunnel->o_seqno);
817 ptr--;
818 }
819 if (tunnel->parms.o_flags&GRE_KEY) {
820 *ptr = tunnel->parms.o_key;
821 ptr--;
822 }
823 if (tunnel->parms.o_flags&GRE_CSUM) {
824 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800825 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 }
827 }
828
829 nf_reset(skb);
830
831 IPTUNNEL_XMIT();
832 tunnel->recursion--;
833 return 0;
834
835tx_error_icmp:
836 dst_link_failure(skb);
837
838tx_error:
839 stats->tx_errors++;
840 dev_kfree_skb(skb);
841 tunnel->recursion--;
842 return 0;
843}
844
Herbert Xu42aa9162008-10-09 11:59:32 -0700845static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800846{
847 struct net_device *tdev = NULL;
848 struct ip_tunnel *tunnel;
849 struct iphdr *iph;
850 int hlen = LL_MAX_HEADER;
851 int mtu = ETH_DATA_LEN;
852 int addend = sizeof(struct iphdr) + 4;
853
854 tunnel = netdev_priv(dev);
855 iph = &tunnel->parms.iph;
856
Herbert Xuc95b8192008-10-09 11:58:54 -0700857 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800858
859 if (iph->daddr) {
860 struct flowi fl = { .oif = tunnel->parms.link,
861 .nl_u = { .ip4_u =
862 { .daddr = iph->daddr,
863 .saddr = iph->saddr,
864 .tos = RT_TOS(iph->tos) } },
865 .proto = IPPROTO_GRE };
866 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700867 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800868 tdev = rt->u.dst.dev;
869 ip_rt_put(rt);
870 }
Herbert Xue1a80002008-10-09 12:00:17 -0700871
872 if (dev->type != ARPHRD_ETHER)
873 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800874 }
875
876 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700877 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800878
879 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700880 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800881 mtu = tdev->mtu;
882 }
883 dev->iflink = tunnel->parms.link;
884
885 /* Precalculate GRE options length */
886 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
887 if (tunnel->parms.o_flags&GRE_CSUM)
888 addend += 4;
889 if (tunnel->parms.o_flags&GRE_KEY)
890 addend += 4;
891 if (tunnel->parms.o_flags&GRE_SEQ)
892 addend += 4;
893 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700894 dev->needed_headroom = addend + hlen;
Herbert Xu42aa9162008-10-09 11:59:32 -0700895 mtu -= dev->hard_header_len - addend;
896
897 if (mtu < 68)
898 mtu = 68;
899
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800900 tunnel->hlen = addend;
901
Herbert Xu42aa9162008-10-09 11:59:32 -0700902 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800903}
904
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905static int
906ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
907{
908 int err = 0;
909 struct ip_tunnel_parm p;
910 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700911 struct net *net = dev_net(dev);
912 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913
914 switch (cmd) {
915 case SIOCGETTUNNEL:
916 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700917 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
919 err = -EFAULT;
920 break;
921 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700922 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 }
924 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800925 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 memcpy(&p, &t->parms, sizeof(p));
927 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
928 err = -EFAULT;
929 break;
930
931 case SIOCADDTUNNEL:
932 case SIOCCHGTUNNEL:
933 err = -EPERM;
934 if (!capable(CAP_NET_ADMIN))
935 goto done;
936
937 err = -EFAULT;
938 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
939 goto done;
940
941 err = -EINVAL;
942 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
943 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
944 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
945 goto done;
946 if (p.iph.ttl)
947 p.iph.frag_off |= htons(IP_DF);
948
949 if (!(p.i_flags&GRE_KEY))
950 p.i_key = 0;
951 if (!(p.o_flags&GRE_KEY))
952 p.o_key = 0;
953
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700954 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700956 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 if (t != NULL) {
958 if (t->dev != dev) {
959 err = -EEXIST;
960 break;
961 }
962 } else {
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800963 unsigned nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964
Patrick McHardy2941a482006-01-08 22:05:26 -0800965 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
Joe Perchesf97c1e02007-12-16 13:45:43 -0800967 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 nflags = IFF_BROADCAST;
969 else if (p.iph.daddr)
970 nflags = IFF_POINTOPOINT;
971
972 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
973 err = -EINVAL;
974 break;
975 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700976 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 t->parms.iph.saddr = p.iph.saddr;
978 t->parms.iph.daddr = p.iph.daddr;
979 t->parms.i_key = p.i_key;
980 t->parms.o_key = p.o_key;
981 memcpy(dev->dev_addr, &p.iph.saddr, 4);
982 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700983 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 netdev_state_change(dev);
985 }
986 }
987
988 if (t) {
989 err = 0;
990 if (cmd == SIOCCHGTUNNEL) {
991 t->parms.iph.ttl = p.iph.ttl;
992 t->parms.iph.tos = p.iph.tos;
993 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800994 if (t->parms.link != p.link) {
995 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -0700996 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800997 netdev_state_change(dev);
998 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 }
1000 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1001 err = -EFAULT;
1002 } else
1003 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1004 break;
1005
1006 case SIOCDELTUNNEL:
1007 err = -EPERM;
1008 if (!capable(CAP_NET_ADMIN))
1009 goto done;
1010
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001011 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 err = -EFAULT;
1013 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1014 goto done;
1015 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001016 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 goto done;
1018 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001019 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 goto done;
1021 dev = t->dev;
1022 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001023 unregister_netdevice(dev);
1024 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 break;
1026
1027 default:
1028 err = -EINVAL;
1029 }
1030
1031done:
1032 return err;
1033}
1034
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1036{
Patrick McHardy2941a482006-01-08 22:05:26 -08001037 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001038 if (new_mtu < 68 ||
1039 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 return -EINVAL;
1041 dev->mtu = new_mtu;
1042 return 0;
1043}
1044
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045/* Nice toy. Unfortunately, useless in real life :-)
1046 It allows to construct virtual multiprotocol broadcast "LAN"
1047 over the Internet, provided multicast routing is tuned.
1048
1049
1050 I have no idea was this bicycle invented before me,
1051 so that I had to set ARPHRD_IPGRE to a random value.
1052 I have an impression, that Cisco could make something similar,
1053 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001054
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1056 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1057
1058 ping -t 255 224.66.66.66
1059
1060 If nobody answers, mbone does not work.
1061
1062 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1063 ip addr add 10.66.66.<somewhat>/24 dev Universe
1064 ifconfig Universe up
1065 ifconfig Universe add fe80::<Your_real_addr>/10
1066 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1067 ftp 10.66.66.66
1068 ...
1069 ftp fec0:6666:6666::193.233.7.65
1070 ...
1071
1072 */
1073
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001074static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1075 unsigned short type,
1076 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077{
Patrick McHardy2941a482006-01-08 22:05:26 -08001078 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001080 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
1082 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1083 p[0] = t->parms.o_flags;
1084 p[1] = htons(type);
1085
1086 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001087 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001089
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 if (saddr)
1091 memcpy(&iph->saddr, saddr, 4);
1092
1093 if (daddr) {
1094 memcpy(&iph->daddr, daddr, 4);
1095 return t->hlen;
1096 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001097 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001099
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 return -t->hlen;
1101}
1102
Timo Teras6a5f44d2007-10-23 20:31:53 -07001103static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1104{
Jianjun Kong6ed2533e2008-11-03 00:25:16 -08001105 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001106 memcpy(haddr, &iph->saddr, 4);
1107 return 4;
1108}
1109
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001110static const struct header_ops ipgre_header_ops = {
1111 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001112 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001113};
1114
Timo Teras6a5f44d2007-10-23 20:31:53 -07001115#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116static int ipgre_open(struct net_device *dev)
1117{
Patrick McHardy2941a482006-01-08 22:05:26 -08001118 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119
Joe Perchesf97c1e02007-12-16 13:45:43 -08001120 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 struct flowi fl = { .oif = t->parms.link,
1122 .nl_u = { .ip4_u =
1123 { .daddr = t->parms.iph.daddr,
1124 .saddr = t->parms.iph.saddr,
1125 .tos = RT_TOS(t->parms.iph.tos) } },
1126 .proto = IPPROTO_GRE };
1127 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001128 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 return -EADDRNOTAVAIL;
1130 dev = rt->u.dst.dev;
1131 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001132 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 return -EADDRNOTAVAIL;
1134 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001135 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 }
1137 return 0;
1138}
1139
1140static int ipgre_close(struct net_device *dev)
1141{
Patrick McHardy2941a482006-01-08 22:05:26 -08001142 struct ip_tunnel *t = netdev_priv(dev);
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001143
Joe Perchesf97c1e02007-12-16 13:45:43 -08001144 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001145 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001146 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 if (in_dev) {
1148 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1149 in_dev_put(in_dev);
1150 }
1151 }
1152 return 0;
1153}
1154
1155#endif
1156
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001157static const struct net_device_ops ipgre_netdev_ops = {
1158 .ndo_init = ipgre_tunnel_init,
1159 .ndo_uninit = ipgre_tunnel_uninit,
1160#ifdef CONFIG_NET_IPGRE_BROADCAST
1161 .ndo_open = ipgre_open,
1162 .ndo_stop = ipgre_close,
1163#endif
1164 .ndo_start_xmit = ipgre_tunnel_xmit,
1165 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1166 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1167};
1168
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169static void ipgre_tunnel_setup(struct net_device *dev)
1170{
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001171 dev->netdev_ops = &ipgre_netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 dev->destructor = free_netdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173
1174 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001175 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001176 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 dev->flags = IFF_NOARP;
1178 dev->iflink = 0;
1179 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001180 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181}
1182
1183static int ipgre_tunnel_init(struct net_device *dev)
1184{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 struct ip_tunnel *tunnel;
1186 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187
Patrick McHardy2941a482006-01-08 22:05:26 -08001188 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 iph = &tunnel->parms.iph;
1190
1191 tunnel->dev = dev;
1192 strcpy(tunnel->parms.name, dev->name);
1193
1194 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1195 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1196
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001199 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 if (!iph->saddr)
1201 return -EINVAL;
1202 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001203 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 }
1205#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001206 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001207 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 return 0;
1210}
1211
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001212static void ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213{
Patrick McHardy2941a482006-01-08 22:05:26 -08001214 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001216 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217
1218 tunnel->dev = dev;
1219 strcpy(tunnel->parms.name, dev->name);
1220
1221 iph->version = 4;
1222 iph->protocol = IPPROTO_GRE;
1223 iph->ihl = 5;
1224 tunnel->hlen = sizeof(struct iphdr) + 4;
1225
1226 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001227 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228}
1229
1230
1231static struct net_protocol ipgre_protocol = {
1232 .handler = ipgre_rcv,
1233 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001234 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235};
1236
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001237static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1238{
1239 int prio;
1240
1241 for (prio = 0; prio < 4; prio++) {
1242 int h;
1243 for (h = 0; h < HASH_SIZE; h++) {
1244 struct ip_tunnel *t;
1245 while ((t = ign->tunnels[prio][h]) != NULL)
1246 unregister_netdevice(t->dev);
1247 }
1248 }
1249}
1250
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001251static int ipgre_init_net(struct net *net)
1252{
1253 int err;
1254 struct ipgre_net *ign;
1255
1256 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001257 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001258 if (ign == NULL)
1259 goto err_alloc;
1260
1261 err = net_assign_generic(net, ipgre_net_id, ign);
1262 if (err < 0)
1263 goto err_assign;
1264
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001265 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1266 ipgre_tunnel_setup);
1267 if (!ign->fb_tunnel_dev) {
1268 err = -ENOMEM;
1269 goto err_alloc_dev;
1270 }
Alexey Dobriyanbe77e592008-11-23 17:26:26 -08001271 dev_net_set(ign->fb_tunnel_dev, net);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001272
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001273 ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
Herbert Xuc19e6542008-10-09 11:59:55 -07001274 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001275
1276 if ((err = register_netdev(ign->fb_tunnel_dev)))
1277 goto err_reg_dev;
1278
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001279 return 0;
1280
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001281err_reg_dev:
1282 free_netdev(ign->fb_tunnel_dev);
1283err_alloc_dev:
1284 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001285err_assign:
1286 kfree(ign);
1287err_alloc:
1288 return err;
1289}
1290
1291static void ipgre_exit_net(struct net *net)
1292{
1293 struct ipgre_net *ign;
1294
1295 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001296 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001297 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001298 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001299 kfree(ign);
1300}
1301
1302static struct pernet_operations ipgre_net_ops = {
1303 .init = ipgre_init_net,
1304 .exit = ipgre_exit_net,
1305};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306
Herbert Xuc19e6542008-10-09 11:59:55 -07001307static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1308{
1309 __be16 flags;
1310
1311 if (!data)
1312 return 0;
1313
1314 flags = 0;
1315 if (data[IFLA_GRE_IFLAGS])
1316 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1317 if (data[IFLA_GRE_OFLAGS])
1318 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1319 if (flags & (GRE_VERSION|GRE_ROUTING))
1320 return -EINVAL;
1321
1322 return 0;
1323}
1324
Herbert Xue1a80002008-10-09 12:00:17 -07001325static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1326{
1327 __be32 daddr;
1328
1329 if (tb[IFLA_ADDRESS]) {
1330 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1331 return -EINVAL;
1332 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1333 return -EADDRNOTAVAIL;
1334 }
1335
1336 if (!data)
1337 goto out;
1338
1339 if (data[IFLA_GRE_REMOTE]) {
1340 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1341 if (!daddr)
1342 return -EINVAL;
1343 }
1344
1345out:
1346 return ipgre_tunnel_validate(tb, data);
1347}
1348
Herbert Xuc19e6542008-10-09 11:59:55 -07001349static void ipgre_netlink_parms(struct nlattr *data[],
1350 struct ip_tunnel_parm *parms)
1351{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001352 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001353
1354 parms->iph.protocol = IPPROTO_GRE;
1355
1356 if (!data)
1357 return;
1358
1359 if (data[IFLA_GRE_LINK])
1360 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1361
1362 if (data[IFLA_GRE_IFLAGS])
1363 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1364
1365 if (data[IFLA_GRE_OFLAGS])
1366 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1367
1368 if (data[IFLA_GRE_IKEY])
1369 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1370
1371 if (data[IFLA_GRE_OKEY])
1372 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1373
1374 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001375 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001376
1377 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001378 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001379
1380 if (data[IFLA_GRE_TTL])
1381 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1382
1383 if (data[IFLA_GRE_TOS])
1384 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1385
1386 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1387 parms->iph.frag_off = htons(IP_DF);
1388}
1389
Herbert Xue1a80002008-10-09 12:00:17 -07001390static int ipgre_tap_init(struct net_device *dev)
1391{
1392 struct ip_tunnel *tunnel;
1393
1394 tunnel = netdev_priv(dev);
1395
1396 tunnel->dev = dev;
1397 strcpy(tunnel->parms.name, dev->name);
1398
1399 ipgre_tunnel_bind_dev(dev);
1400
1401 return 0;
1402}
1403
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001404static const struct net_device_ops ipgre_tap_netdev_ops = {
1405 .ndo_init = ipgre_tap_init,
1406 .ndo_uninit = ipgre_tunnel_uninit,
1407 .ndo_start_xmit = ipgre_tunnel_xmit,
1408 .ndo_set_mac_address = eth_mac_addr,
1409 .ndo_validate_addr = eth_validate_addr,
1410 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1411};
1412
Herbert Xue1a80002008-10-09 12:00:17 -07001413static void ipgre_tap_setup(struct net_device *dev)
1414{
1415
1416 ether_setup(dev);
1417
Stephen Hemmingerb8c26a32008-11-20 20:34:29 -08001418 dev->netdev_ops = &ipgre_netdev_ops;
Herbert Xue1a80002008-10-09 12:00:17 -07001419 dev->destructor = free_netdev;
Herbert Xue1a80002008-10-09 12:00:17 -07001420
1421 dev->iflink = 0;
1422 dev->features |= NETIF_F_NETNS_LOCAL;
1423}
1424
Herbert Xuc19e6542008-10-09 11:59:55 -07001425static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1426 struct nlattr *data[])
1427{
1428 struct ip_tunnel *nt;
1429 struct net *net = dev_net(dev);
1430 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1431 int mtu;
1432 int err;
1433
1434 nt = netdev_priv(dev);
1435 ipgre_netlink_parms(data, &nt->parms);
1436
Herbert Xue1a80002008-10-09 12:00:17 -07001437 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001438 return -EEXIST;
1439
Herbert Xue1a80002008-10-09 12:00:17 -07001440 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1441 random_ether_addr(dev->dev_addr);
1442
Herbert Xuc19e6542008-10-09 11:59:55 -07001443 mtu = ipgre_tunnel_bind_dev(dev);
1444 if (!tb[IFLA_MTU])
1445 dev->mtu = mtu;
1446
1447 err = register_netdevice(dev);
1448 if (err)
1449 goto out;
1450
1451 dev_hold(dev);
1452 ipgre_tunnel_link(ign, nt);
1453
1454out:
1455 return err;
1456}
1457
1458static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1459 struct nlattr *data[])
1460{
1461 struct ip_tunnel *t, *nt;
1462 struct net *net = dev_net(dev);
1463 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1464 struct ip_tunnel_parm p;
1465 int mtu;
1466
1467 if (dev == ign->fb_tunnel_dev)
1468 return -EINVAL;
1469
1470 nt = netdev_priv(dev);
1471 ipgre_netlink_parms(data, &p);
1472
1473 t = ipgre_tunnel_locate(net, &p, 0);
1474
1475 if (t) {
1476 if (t->dev != dev)
1477 return -EEXIST;
1478 } else {
1479 unsigned nflags = 0;
1480
1481 t = nt;
1482
1483 if (ipv4_is_multicast(p.iph.daddr))
1484 nflags = IFF_BROADCAST;
1485 else if (p.iph.daddr)
1486 nflags = IFF_POINTOPOINT;
1487
1488 if ((dev->flags ^ nflags) &
1489 (IFF_POINTOPOINT | IFF_BROADCAST))
1490 return -EINVAL;
1491
1492 ipgre_tunnel_unlink(ign, t);
1493 t->parms.iph.saddr = p.iph.saddr;
1494 t->parms.iph.daddr = p.iph.daddr;
1495 t->parms.i_key = p.i_key;
1496 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1497 memcpy(dev->broadcast, &p.iph.daddr, 4);
1498 ipgre_tunnel_link(ign, t);
1499 netdev_state_change(dev);
1500 }
1501
1502 t->parms.o_key = p.o_key;
1503 t->parms.iph.ttl = p.iph.ttl;
1504 t->parms.iph.tos = p.iph.tos;
1505 t->parms.iph.frag_off = p.iph.frag_off;
1506
1507 if (t->parms.link != p.link) {
1508 t->parms.link = p.link;
1509 mtu = ipgre_tunnel_bind_dev(dev);
1510 if (!tb[IFLA_MTU])
1511 dev->mtu = mtu;
1512 netdev_state_change(dev);
1513 }
1514
1515 return 0;
1516}
1517
1518static size_t ipgre_get_size(const struct net_device *dev)
1519{
1520 return
1521 /* IFLA_GRE_LINK */
1522 nla_total_size(4) +
1523 /* IFLA_GRE_IFLAGS */
1524 nla_total_size(2) +
1525 /* IFLA_GRE_OFLAGS */
1526 nla_total_size(2) +
1527 /* IFLA_GRE_IKEY */
1528 nla_total_size(4) +
1529 /* IFLA_GRE_OKEY */
1530 nla_total_size(4) +
1531 /* IFLA_GRE_LOCAL */
1532 nla_total_size(4) +
1533 /* IFLA_GRE_REMOTE */
1534 nla_total_size(4) +
1535 /* IFLA_GRE_TTL */
1536 nla_total_size(1) +
1537 /* IFLA_GRE_TOS */
1538 nla_total_size(1) +
1539 /* IFLA_GRE_PMTUDISC */
1540 nla_total_size(1) +
1541 0;
1542}
1543
1544static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1545{
1546 struct ip_tunnel *t = netdev_priv(dev);
1547 struct ip_tunnel_parm *p = &t->parms;
1548
1549 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1550 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1551 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
Patrick McHardyba9e64b2008-10-10 12:10:30 -07001552 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1553 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001554 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1555 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
Herbert Xuc19e6542008-10-09 11:59:55 -07001556 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1557 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1558 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1559
1560 return 0;
1561
1562nla_put_failure:
1563 return -EMSGSIZE;
1564}
1565
1566static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1567 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1568 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1569 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1570 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1571 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001572 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1573 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001574 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1575 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1576 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1577};
1578
1579static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1580 .kind = "gre",
1581 .maxtype = IFLA_GRE_MAX,
1582 .policy = ipgre_policy,
1583 .priv_size = sizeof(struct ip_tunnel),
1584 .setup = ipgre_tunnel_setup,
1585 .validate = ipgre_tunnel_validate,
1586 .newlink = ipgre_newlink,
1587 .changelink = ipgre_changelink,
1588 .get_size = ipgre_get_size,
1589 .fill_info = ipgre_fill_info,
1590};
1591
Herbert Xue1a80002008-10-09 12:00:17 -07001592static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1593 .kind = "gretap",
1594 .maxtype = IFLA_GRE_MAX,
1595 .policy = ipgre_policy,
1596 .priv_size = sizeof(struct ip_tunnel),
1597 .setup = ipgre_tap_setup,
1598 .validate = ipgre_tap_validate,
1599 .newlink = ipgre_newlink,
1600 .changelink = ipgre_changelink,
1601 .get_size = ipgre_get_size,
1602 .fill_info = ipgre_fill_info,
1603};
1604
Linus Torvalds1da177e2005-04-16 15:20:36 -07001605/*
1606 * And now the modules code and kernel interface.
1607 */
1608
1609static int __init ipgre_init(void)
1610{
1611 int err;
1612
1613 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1614
1615 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1616 printk(KERN_INFO "ipgre init: can't add protocol\n");
1617 return -EAGAIN;
1618 }
1619
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001620 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1621 if (err < 0)
Herbert Xuc19e6542008-10-09 11:59:55 -07001622 goto gen_device_failed;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001623
Herbert Xuc19e6542008-10-09 11:59:55 -07001624 err = rtnl_link_register(&ipgre_link_ops);
1625 if (err < 0)
1626 goto rtnl_link_failed;
1627
Herbert Xue1a80002008-10-09 12:00:17 -07001628 err = rtnl_link_register(&ipgre_tap_ops);
1629 if (err < 0)
1630 goto tap_ops_failed;
1631
Herbert Xuc19e6542008-10-09 11:59:55 -07001632out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001634
Herbert Xue1a80002008-10-09 12:00:17 -07001635tap_ops_failed:
1636 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001637rtnl_link_failed:
1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1639gen_device_failed:
1640 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1641 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642}
1643
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001644static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645{
Herbert Xue1a80002008-10-09 12:00:17 -07001646 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001647 rtnl_link_unregister(&ipgre_link_ops);
1648 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1650 printk(KERN_INFO "ipgre close: can't remove protocol\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651}
1652
1653module_init(ipgre_init);
1654module_exit(ipgre_fini);
1655MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001656MODULE_ALIAS_RTNL_LINK("gre");
1657MODULE_ALIAS_RTNL_LINK("gretap");