blob: 191ef7588134b501571d8922d96fded4697170b6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Herbert Xue1a80002008-10-09 12:00:17 -070030#include <linux/etherdevice.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080031#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33#include <net/sock.h>
34#include <net/ip.h>
35#include <net/icmp.h>
36#include <net/protocol.h>
37#include <net/ipip.h>
38#include <net/arp.h>
39#include <net/checksum.h>
40#include <net/dsfield.h>
41#include <net/inet_ecn.h>
42#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070043#include <net/net_namespace.h>
44#include <net/netns/generic.h>
Herbert Xuc19e6542008-10-09 11:59:55 -070045#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#ifdef CONFIG_IPV6
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#endif
52
53/*
54 Problems & solutions
55 --------------------
56
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
61
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is the best
66 solution, but it supposes maintaing new variable in ALL
67 skb, even if no tunneling is used.
68
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090069 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070070 like dev->tbusy flag, but I preferred new variable, because
71 the semantics is different. One day, when hard_start_xmit
72 will be multithreaded we will have to use skb->encapsulation.
73
74
75
76 2. Networking dead loops would not kill routers, but would really
77 kill network. IP hop limit plays role of "t->recursion" in this case,
78 if we copy it from packet being encapsulated to upper header.
79 It is very good solution, but it introduces two problems:
80
81 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
82 do not work over tunnels.
83 - traceroute does not work. I planned to relay ICMP from tunnel,
84 so that this problem would be solved and traceroute output
85 would even more informative. This idea appeared to be wrong:
86 only Linux complies to rfc1812 now (yes, guys, Linux is the only
87 true router now :-)), all routers (at least, in neighbourhood of mine)
88 return only 8 bytes of payload. It is the end.
89
90 Hence, if we want that OSPF worked or traceroute said something reasonable,
91 we should search for another solution.
92
93 One of them is to parse packet trying to detect inner encapsulation
94 made by our node. It is difficult or even impossible, especially,
95 taking into account fragmentation. TO be short, tt is not solution at all.
96
97 Current solution: The solution was UNEXPECTEDLY SIMPLE.
98 We force DF flag on tunnels with preconfigured hop limit,
99 that is ALL. :-) Well, it does not remove the problem completely,
100 but exponential growth of network traffic is changed to linear
101 (branches, that exceed pmtu are pruned) and tunnel mtu
102 fastly degrades to value <68, where looping stops.
103 Yes, it is not good if there exists a router in the loop,
104 which does not force DF, even when encapsulating packets have DF set.
105 But it is not our problem! Nobody could accuse us, we made
106 all that we could make. Even if it is your gated who injected
107 fatal route to network, even if it were you who configured
108 fatal static route: you are innocent. :-)
109
110
111
112 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
113 practically identical code. It would be good to glue them
114 together, but it is not very evident, how to make them modular.
115 sit is integral part of IPv6, ipip and gre are naturally modular.
116 We could extract common parts (hash table, ioctl etc)
117 to a separate module (ip_tunnel.c).
118
119 Alexey Kuznetsov.
120 */
121
Herbert Xuc19e6542008-10-09 11:59:55 -0700122static struct rtnl_link_ops ipgre_link_ops __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123static int ipgre_tunnel_init(struct net_device *dev);
124static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700125static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
127/* Fallback tunnel: no source, no destination, no key, no options */
128
129static int ipgre_fb_tunnel_init(struct net_device *dev);
130
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700131#define HASH_SIZE 16
132
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700133static int ipgre_net_id;
134struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700135 struct ip_tunnel *tunnels[4][HASH_SIZE];
136
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700137 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700138};
139
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140/* Tunnel hash table */
141
142/*
143 4 hash tables:
144
145 3: (remote,local)
146 2: (remote,*)
147 1: (*,local)
148 0: (*,*)
149
150 We require exact key match i.e. if a key is present in packet
151 it will match only tunnel with the same key; if it is not present,
152 it will match only keyless tunnel.
153
154 All keysless packets, if not matched configured keyless tunnels
155 will match fallback tunnel.
156 */
157
Al Virod5a0a1e2006-11-08 00:23:14 -0800158#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700160#define tunnels_r_l tunnels[3]
161#define tunnels_r tunnels[2]
162#define tunnels_l tunnels[1]
163#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164
165static DEFINE_RWLOCK(ipgre_lock);
166
167/* Given src, dst and key, find appropriate for input tunnel. */
168
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700169static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
Herbert Xue1a80002008-10-09 12:00:17 -0700170 __be32 remote, __be32 local,
171 __be32 key, __be16 gre_proto)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172{
173 unsigned h0 = HASH(remote);
174 unsigned h1 = HASH(key);
175 struct ip_tunnel *t;
Herbert Xue1a80002008-10-09 12:00:17 -0700176 struct ip_tunnel *t2 = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700177 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Herbert Xue1a80002008-10-09 12:00:17 -0700178 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
179 ARPHRD_ETHER : ARPHRD_IPGRE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
Herbert Xue1a80002008-10-09 12:00:17 -0700183 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
184 if (t->dev->type == dev_type)
185 return t;
186 if (t->dev->type == ARPHRD_IPGRE && !t2)
187 t2 = t;
188 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 }
190 }
Herbert Xue1a80002008-10-09 12:00:17 -0700191
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700192 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 if (remote == t->parms.iph.daddr) {
Herbert Xue1a80002008-10-09 12:00:17 -0700194 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
195 if (t->dev->type == dev_type)
196 return t;
197 if (t->dev->type == ARPHRD_IPGRE && !t2)
198 t2 = t;
199 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 }
201 }
Herbert Xue1a80002008-10-09 12:00:17 -0700202
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700203 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800205 (local == t->parms.iph.daddr &&
206 ipv4_is_multicast(local))) {
Herbert Xue1a80002008-10-09 12:00:17 -0700207 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
208 if (t->dev->type == dev_type)
209 return t;
210 if (t->dev->type == ARPHRD_IPGRE && !t2)
211 t2 = t;
212 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 }
214 }
Herbert Xue1a80002008-10-09 12:00:17 -0700215
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700216 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Herbert Xue1a80002008-10-09 12:00:17 -0700217 if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
218 if (t->dev->type == dev_type)
219 return t;
220 if (t->dev->type == ARPHRD_IPGRE && !t2)
221 t2 = t;
222 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 }
224
Herbert Xue1a80002008-10-09 12:00:17 -0700225 if (t2)
226 return t2;
227
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700228 if (ign->fb_tunnel_dev->flags&IFF_UP)
229 return netdev_priv(ign->fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 return NULL;
231}
232
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700233static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
234 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900236 __be32 remote = parms->iph.daddr;
237 __be32 local = parms->iph.saddr;
238 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 unsigned h = HASH(key);
240 int prio = 0;
241
242 if (local)
243 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800244 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 prio |= 2;
246 h ^= HASH(remote);
247 }
248
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700249 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250}
251
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700252static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
253 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900254{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700255 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900256}
257
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700258static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700260 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
262 t->next = *tp;
263 write_lock_bh(&ipgre_lock);
264 *tp = t;
265 write_unlock_bh(&ipgre_lock);
266}
267
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700268static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269{
270 struct ip_tunnel **tp;
271
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700272 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 if (t == *tp) {
274 write_lock_bh(&ipgre_lock);
275 *tp = t->next;
276 write_unlock_bh(&ipgre_lock);
277 break;
278 }
279 }
280}
281
Herbert Xue1a80002008-10-09 12:00:17 -0700282static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
283 struct ip_tunnel_parm *parms,
284 int type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285{
Al Virod5a0a1e2006-11-08 00:23:14 -0800286 __be32 remote = parms->iph.daddr;
287 __be32 local = parms->iph.saddr;
288 __be32 key = parms->i_key;
Herbert Xue1a80002008-10-09 12:00:17 -0700289 struct ip_tunnel *t, **tp;
290 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
291
292 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
293 if (local == t->parms.iph.saddr &&
294 remote == t->parms.iph.daddr &&
295 key == t->parms.i_key &&
296 type == t->dev->type)
297 break;
298
299 return t;
300}
301
302static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
303 struct ip_tunnel_parm *parms, int create)
304{
305 struct ip_tunnel *t, *nt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700308 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Herbert Xue1a80002008-10-09 12:00:17 -0700310 t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
311 if (t || !create)
312 return t;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
314 if (parms->name[0])
315 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800316 else
317 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318
319 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
320 if (!dev)
321 return NULL;
322
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700323 dev_net_set(dev, net);
324
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800325 if (strchr(name, '%')) {
326 if (dev_alloc_name(dev, name) < 0)
327 goto failed_free;
328 }
329
Patrick McHardy2941a482006-01-08 22:05:26 -0800330 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 nt->parms = *parms;
Herbert Xuc19e6542008-10-09 11:59:55 -0700332 dev->rtnl_link_ops = &ipgre_link_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333
Herbert Xu42aa9162008-10-09 11:59:32 -0700334 dev->mtu = ipgre_tunnel_bind_dev(dev);
335
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800336 if (register_netdevice(dev) < 0)
337 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700340 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 return nt;
342
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800343failed_free:
344 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 return NULL;
346}
347
348static void ipgre_tunnel_uninit(struct net_device *dev)
349{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700350 struct net *net = dev_net(dev);
351 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
352
353 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354 dev_put(dev);
355}
356
357
358static void ipgre_err(struct sk_buff *skb, u32 info)
359{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360
Rami Rosen071f92d2008-05-21 17:47:54 -0700361/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 8 bytes of packet payload. It means, that precise relaying of
363 ICMP in the real Internet is absolutely infeasible.
364
365 Moreover, Cisco "wise men" put GRE key to the third word
366 in GRE header. It makes impossible maintaining even soft state for keyed
367 GRE tunnels with enabled checksum. Tell them "thank you".
368
369 Well, I wonder, rfc1812 was written by Cisco employee,
370 what the hell these idiots break standrads established
371 by themself???
372 */
373
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800374 struct iphdr *iph = (struct iphdr *)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800375 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300377 const int type = icmp_hdr(skb)->type;
378 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800380 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381
382 flags = p[0];
383 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
384 if (flags&(GRE_VERSION|GRE_ROUTING))
385 return;
386 if (flags&GRE_KEY) {
387 grehlen += 4;
388 if (flags&GRE_CSUM)
389 grehlen += 4;
390 }
391 }
392
393 /* If only 8 bytes returned, keyed message will be dropped here */
394 if (skb_headlen(skb) < grehlen)
395 return;
396
397 switch (type) {
398 default:
399 case ICMP_PARAMETERPROB:
400 return;
401
402 case ICMP_DEST_UNREACH:
403 switch (code) {
404 case ICMP_SR_FAILED:
405 case ICMP_PORT_UNREACH:
406 /* Impossible event. */
407 return;
408 case ICMP_FRAG_NEEDED:
409 /* Soft state for pmtu is maintained by IP core. */
410 return;
411 default:
412 /* All others are translated to HOST_UNREACH.
413 rfc2003 contains "deep thoughts" about NET_UNREACH,
414 I believe they are just ether pollution. --ANK
415 */
416 break;
417 }
418 break;
419 case ICMP_TIME_EXCEEDED:
420 if (code != ICMP_EXC_TTL)
421 return;
422 break;
423 }
424
425 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700426 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
Herbert Xue1a80002008-10-09 12:00:17 -0700427 flags & GRE_KEY ?
428 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
429 p[1]);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800430 if (t == NULL || t->parms.iph.daddr == 0 ||
431 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 goto out;
433
434 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
435 goto out;
436
437 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
438 t->err_count++;
439 else
440 t->err_count = 1;
441 t->err_time = jiffies;
442out:
443 read_unlock(&ipgre_lock);
444 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445}
446
447static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
448{
449 if (INET_ECN_is_ce(iph->tos)) {
450 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700451 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700453 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 }
455 }
456}
457
458static inline u8
459ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
460{
461 u8 inner = 0;
462 if (skb->protocol == htons(ETH_P_IP))
463 inner = old_iph->tos;
464 else if (skb->protocol == htons(ETH_P_IPV6))
465 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
466 return INET_ECN_encapsulate(tos, inner);
467}
468
469static int ipgre_rcv(struct sk_buff *skb)
470{
471 struct iphdr *iph;
472 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800473 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800474 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800475 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 u32 seqno = 0;
477 struct ip_tunnel *tunnel;
478 int offset = 4;
Herbert Xue1a80002008-10-09 12:00:17 -0700479 __be16 gre_proto;
Herbert Xu64194c32008-10-09 12:03:17 -0700480 unsigned int len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481
482 if (!pskb_may_pull(skb, 16))
483 goto drop_nolock;
484
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700485 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800487 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488
489 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
490 /* - Version must be 0.
491 - We do not support routing headers.
492 */
493 if (flags&(GRE_VERSION|GRE_ROUTING))
494 goto drop_nolock;
495
496 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800497 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700498 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800499 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800500 if (!csum)
501 break;
502 /* fall through */
503 case CHECKSUM_NONE:
504 skb->csum = 0;
505 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700506 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507 }
508 offset += 4;
509 }
510 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800511 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 offset += 4;
513 }
514 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800515 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 offset += 4;
517 }
518 }
519
Herbert Xue1a80002008-10-09 12:00:17 -0700520 gre_proto = *(__be16 *)(h + 2);
521
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700523 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
Herbert Xue1a80002008-10-09 12:00:17 -0700524 iph->saddr, iph->daddr, key,
525 gre_proto))) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700526 struct net_device_stats *stats = &tunnel->dev->stats;
527
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 secpath_reset(skb);
529
Herbert Xue1a80002008-10-09 12:00:17 -0700530 skb->protocol = gre_proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 /* WCCP version 1 and 2 protocol decoding.
532 * - Change protocol to IP
533 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
534 */
Herbert Xue1a80002008-10-09 12:00:17 -0700535 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700536 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900537 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 offset += 4;
539 }
540
Timo Teras1d069162007-12-20 00:10:33 -0800541 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300542 __pskb_pull(skb, offset);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700543 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 skb->pkt_type = PACKET_HOST;
545#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800546 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800548 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700550 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 skb->pkt_type = PACKET_BROADCAST;
552 }
553#endif
554
555 if (((flags&GRE_CSUM) && csum) ||
556 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700557 stats->rx_crc_errors++;
558 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 goto drop;
560 }
561 if (tunnel->parms.i_flags&GRE_SEQ) {
562 if (!(flags&GRE_SEQ) ||
563 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700564 stats->rx_fifo_errors++;
565 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 goto drop;
567 }
568 tunnel->i_seqno = seqno + 1;
569 }
Herbert Xue1a80002008-10-09 12:00:17 -0700570
Herbert Xu64194c32008-10-09 12:03:17 -0700571 len = skb->len;
572
Herbert Xue1a80002008-10-09 12:00:17 -0700573 /* Warning: All skb pointers will be invalidated! */
574 if (tunnel->dev->type == ARPHRD_ETHER) {
575 if (!pskb_may_pull(skb, ETH_HLEN)) {
576 stats->rx_length_errors++;
577 stats->rx_errors++;
578 goto drop;
579 }
580
581 iph = ip_hdr(skb);
582 skb->protocol = eth_type_trans(skb, tunnel->dev);
583 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
584 }
585
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700586 stats->rx_packets++;
Herbert Xu64194c32008-10-09 12:03:17 -0700587 stats->rx_bytes += len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 skb->dev = tunnel->dev;
589 dst_release(skb->dst);
590 skb->dst = NULL;
591 nf_reset(skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700592
593 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 ipgre_ecn_decapsulate(iph, skb);
Herbert Xue1a80002008-10-09 12:00:17 -0700595
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 netif_rx(skb);
597 read_unlock(&ipgre_lock);
598 return(0);
599 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700600 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
602drop:
603 read_unlock(&ipgre_lock);
604drop_nolock:
605 kfree_skb(skb);
606 return(0);
607}
608
609static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
610{
Patrick McHardy2941a482006-01-08 22:05:26 -0800611 struct ip_tunnel *tunnel = netdev_priv(dev);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700612 struct net_device_stats *stats = &tunnel->dev->stats;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700613 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 struct iphdr *tiph;
615 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800616 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617 struct rtable *rt; /* Route to the other host */
618 struct net_device *tdev; /* Device to other host */
619 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700620 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800622 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 int mtu;
624
625 if (tunnel->recursion++) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700626 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 goto tx_error;
628 }
629
Herbert Xue1a80002008-10-09 12:00:17 -0700630 if (dev->type == ARPHRD_ETHER)
631 IPCB(skb)->flags = 0;
632
633 if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 gre_hlen = 0;
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800635 tiph = (struct iphdr *)skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 } else {
637 gre_hlen = tunnel->hlen;
638 tiph = &tunnel->parms.iph;
639 }
640
641 if ((dst = tiph->daddr) == 0) {
642 /* NBMA tunnel */
643
644 if (skb->dst == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700645 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 goto tx_error;
647 }
648
649 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800650 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 if ((dst = rt->rt_gateway) == 0)
652 goto tx_error_icmp;
653 }
654#ifdef CONFIG_IPV6
655 else if (skb->protocol == htons(ETH_P_IPV6)) {
656 struct in6_addr *addr6;
657 int addr_type;
658 struct neighbour *neigh = skb->dst->neighbour;
659
660 if (neigh == NULL)
661 goto tx_error;
662
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800663 addr6 = (struct in6_addr *)&neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 addr_type = ipv6_addr_type(addr6);
665
666 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700667 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 addr_type = ipv6_addr_type(addr6);
669 }
670
671 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
672 goto tx_error_icmp;
673
674 dst = addr6->s6_addr32[3];
675 }
676#endif
677 else
678 goto tx_error;
679 }
680
681 tos = tiph->tos;
682 if (tos&1) {
683 if (skb->protocol == htons(ETH_P_IP))
684 tos = old_iph->tos;
685 tos &= ~1;
686 }
687
688 {
689 struct flowi fl = { .oif = tunnel->parms.link,
690 .nl_u = { .ip4_u =
691 { .daddr = dst,
692 .saddr = tiph->saddr,
693 .tos = RT_TOS(tos) } },
694 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700695 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700696 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 goto tx_error;
698 }
699 }
700 tdev = rt->u.dst.dev;
701
702 if (tdev == dev) {
703 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700704 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 goto tx_error;
706 }
707
708 df = tiph->frag_off;
709 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700710 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711 else
712 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
713
714 if (skb->dst)
715 skb->dst->ops->update_pmtu(skb->dst, mtu);
716
717 if (skb->protocol == htons(ETH_P_IP)) {
718 df |= (old_iph->frag_off&htons(IP_DF));
719
720 if ((old_iph->frag_off&htons(IP_DF)) &&
721 mtu < ntohs(old_iph->tot_len)) {
722 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
723 ip_rt_put(rt);
724 goto tx_error;
725 }
726 }
727#ifdef CONFIG_IPV6
728 else if (skb->protocol == htons(ETH_P_IPV6)) {
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800729 struct rt6_info *rt6 = (struct rt6_info *)skb->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
731 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800732 if ((tunnel->parms.iph.daddr &&
733 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 rt6->rt6i_dst.plen == 128) {
735 rt6->rt6i_flags |= RTF_MODIFIED;
736 skb->dst->metrics[RTAX_MTU-1] = mtu;
737 }
738 }
739
740 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
741 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
742 ip_rt_put(rt);
743 goto tx_error;
744 }
745 }
746#endif
747
748 if (tunnel->err_count > 0) {
749 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
750 tunnel->err_count--;
751
752 dst_link_failure(skb);
753 } else
754 tunnel->err_count = 0;
755 }
756
757 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
758
Patrick McHardycfbba492007-07-09 15:33:40 -0700759 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
760 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
762 if (!new_skb) {
763 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900764 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 dev_kfree_skb(skb);
766 tunnel->recursion--;
767 return 0;
768 }
769 if (skb->sk)
770 skb_set_owner_w(new_skb, skb->sk);
771 dev_kfree_skb(skb);
772 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700773 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 }
775
Herbert Xu64194c32008-10-09 12:03:17 -0700776 skb_reset_transport_header(skb);
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700777 skb_push(skb, gre_hlen);
778 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800780 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
781 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 dst_release(skb->dst);
783 skb->dst = &rt->u.dst;
784
785 /*
786 * Push down and install the IPIP header.
787 */
788
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700789 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 iph->version = 4;
791 iph->ihl = sizeof(struct iphdr) >> 2;
792 iph->frag_off = df;
793 iph->protocol = IPPROTO_GRE;
794 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
795 iph->daddr = rt->rt_dst;
796 iph->saddr = rt->rt_src;
797
798 if ((iph->ttl = tiph->ttl) == 0) {
799 if (skb->protocol == htons(ETH_P_IP))
800 iph->ttl = old_iph->ttl;
801#ifdef CONFIG_IPV6
802 else if (skb->protocol == htons(ETH_P_IPV6))
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800803 iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804#endif
805 else
806 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
807 }
808
Herbert Xue1a80002008-10-09 12:00:17 -0700809 ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
810 ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
811 htons(ETH_P_TEB) : skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812
813 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800814 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815
816 if (tunnel->parms.o_flags&GRE_SEQ) {
817 ++tunnel->o_seqno;
818 *ptr = htonl(tunnel->o_seqno);
819 ptr--;
820 }
821 if (tunnel->parms.o_flags&GRE_KEY) {
822 *ptr = tunnel->parms.o_key;
823 ptr--;
824 }
825 if (tunnel->parms.o_flags&GRE_CSUM) {
826 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800827 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 }
829 }
830
831 nf_reset(skb);
832
833 IPTUNNEL_XMIT();
834 tunnel->recursion--;
835 return 0;
836
837tx_error_icmp:
838 dst_link_failure(skb);
839
840tx_error:
841 stats->tx_errors++;
842 dev_kfree_skb(skb);
843 tunnel->recursion--;
844 return 0;
845}
846
Herbert Xu42aa9162008-10-09 11:59:32 -0700847static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800848{
849 struct net_device *tdev = NULL;
850 struct ip_tunnel *tunnel;
851 struct iphdr *iph;
852 int hlen = LL_MAX_HEADER;
853 int mtu = ETH_DATA_LEN;
854 int addend = sizeof(struct iphdr) + 4;
855
856 tunnel = netdev_priv(dev);
857 iph = &tunnel->parms.iph;
858
Herbert Xuc95b8192008-10-09 11:58:54 -0700859 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800860
861 if (iph->daddr) {
862 struct flowi fl = { .oif = tunnel->parms.link,
863 .nl_u = { .ip4_u =
864 { .daddr = iph->daddr,
865 .saddr = iph->saddr,
866 .tos = RT_TOS(iph->tos) } },
867 .proto = IPPROTO_GRE };
868 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700869 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800870 tdev = rt->u.dst.dev;
871 ip_rt_put(rt);
872 }
Herbert Xue1a80002008-10-09 12:00:17 -0700873
874 if (dev->type != ARPHRD_ETHER)
875 dev->flags |= IFF_POINTOPOINT;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800876 }
877
878 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700879 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800880
881 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700882 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800883 mtu = tdev->mtu;
884 }
885 dev->iflink = tunnel->parms.link;
886
887 /* Precalculate GRE options length */
888 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
889 if (tunnel->parms.o_flags&GRE_CSUM)
890 addend += 4;
891 if (tunnel->parms.o_flags&GRE_KEY)
892 addend += 4;
893 if (tunnel->parms.o_flags&GRE_SEQ)
894 addend += 4;
895 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700896 dev->needed_headroom = addend + hlen;
Herbert Xu42aa9162008-10-09 11:59:32 -0700897 mtu -= dev->hard_header_len - addend;
898
899 if (mtu < 68)
900 mtu = 68;
901
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800902 tunnel->hlen = addend;
903
Herbert Xu42aa9162008-10-09 11:59:32 -0700904 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800905}
906
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907static int
908ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
909{
910 int err = 0;
911 struct ip_tunnel_parm p;
912 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700913 struct net *net = dev_net(dev);
914 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915
916 switch (cmd) {
917 case SIOCGETTUNNEL:
918 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700919 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
921 err = -EFAULT;
922 break;
923 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700924 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 }
926 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800927 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 memcpy(&p, &t->parms, sizeof(p));
929 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
930 err = -EFAULT;
931 break;
932
933 case SIOCADDTUNNEL:
934 case SIOCCHGTUNNEL:
935 err = -EPERM;
936 if (!capable(CAP_NET_ADMIN))
937 goto done;
938
939 err = -EFAULT;
940 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
941 goto done;
942
943 err = -EINVAL;
944 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
945 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
946 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
947 goto done;
948 if (p.iph.ttl)
949 p.iph.frag_off |= htons(IP_DF);
950
951 if (!(p.i_flags&GRE_KEY))
952 p.i_key = 0;
953 if (!(p.o_flags&GRE_KEY))
954 p.o_key = 0;
955
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700956 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700958 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 if (t != NULL) {
960 if (t->dev != dev) {
961 err = -EEXIST;
962 break;
963 }
964 } else {
Jianjun Kong6ed2533e2008-11-03 00:25:16 -0800965 unsigned nflags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966
Patrick McHardy2941a482006-01-08 22:05:26 -0800967 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968
Joe Perchesf97c1e02007-12-16 13:45:43 -0800969 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 nflags = IFF_BROADCAST;
971 else if (p.iph.daddr)
972 nflags = IFF_POINTOPOINT;
973
974 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
975 err = -EINVAL;
976 break;
977 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700978 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 t->parms.iph.saddr = p.iph.saddr;
980 t->parms.iph.daddr = p.iph.daddr;
981 t->parms.i_key = p.i_key;
982 t->parms.o_key = p.o_key;
983 memcpy(dev->dev_addr, &p.iph.saddr, 4);
984 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700985 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 netdev_state_change(dev);
987 }
988 }
989
990 if (t) {
991 err = 0;
992 if (cmd == SIOCCHGTUNNEL) {
993 t->parms.iph.ttl = p.iph.ttl;
994 t->parms.iph.tos = p.iph.tos;
995 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800996 if (t->parms.link != p.link) {
997 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -0700998 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800999 netdev_state_change(dev);
1000 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 }
1002 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1003 err = -EFAULT;
1004 } else
1005 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1006 break;
1007
1008 case SIOCDELTUNNEL:
1009 err = -EPERM;
1010 if (!capable(CAP_NET_ADMIN))
1011 goto done;
1012
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001013 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 err = -EFAULT;
1015 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1016 goto done;
1017 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -07001018 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 goto done;
1020 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001021 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 goto done;
1023 dev = t->dev;
1024 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08001025 unregister_netdevice(dev);
1026 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 break;
1028
1029 default:
1030 err = -EINVAL;
1031 }
1032
1033done:
1034 return err;
1035}
1036
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1038{
Patrick McHardy2941a482006-01-08 22:05:26 -08001039 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -07001040 if (new_mtu < 68 ||
1041 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 return -EINVAL;
1043 dev->mtu = new_mtu;
1044 return 0;
1045}
1046
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047/* Nice toy. Unfortunately, useless in real life :-)
1048 It allows to construct virtual multiprotocol broadcast "LAN"
1049 over the Internet, provided multicast routing is tuned.
1050
1051
1052 I have no idea was this bicycle invented before me,
1053 so that I had to set ARPHRD_IPGRE to a random value.
1054 I have an impression, that Cisco could make something similar,
1055 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001056
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1058 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1059
1060 ping -t 255 224.66.66.66
1061
1062 If nobody answers, mbone does not work.
1063
1064 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1065 ip addr add 10.66.66.<somewhat>/24 dev Universe
1066 ifconfig Universe up
1067 ifconfig Universe add fe80::<Your_real_addr>/10
1068 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1069 ftp 10.66.66.66
1070 ...
1071 ftp fec0:6666:6666::193.233.7.65
1072 ...
1073
1074 */
1075
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001076static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1077 unsigned short type,
1078 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079{
Patrick McHardy2941a482006-01-08 22:05:26 -08001080 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001082 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083
1084 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1085 p[0] = t->parms.o_flags;
1086 p[1] = htons(type);
1087
1088 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001089 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001091
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 if (saddr)
1093 memcpy(&iph->saddr, saddr, 4);
1094
1095 if (daddr) {
1096 memcpy(&iph->daddr, daddr, 4);
1097 return t->hlen;
1098 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001099 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001101
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 return -t->hlen;
1103}
1104
Timo Teras6a5f44d2007-10-23 20:31:53 -07001105static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1106{
Jianjun Kong6ed2533e2008-11-03 00:25:16 -08001107 struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
Timo Teras6a5f44d2007-10-23 20:31:53 -07001108 memcpy(haddr, &iph->saddr, 4);
1109 return 4;
1110}
1111
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001112static const struct header_ops ipgre_header_ops = {
1113 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001114 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001115};
1116
Timo Teras6a5f44d2007-10-23 20:31:53 -07001117#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118static int ipgre_open(struct net_device *dev)
1119{
Patrick McHardy2941a482006-01-08 22:05:26 -08001120 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121
Joe Perchesf97c1e02007-12-16 13:45:43 -08001122 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 struct flowi fl = { .oif = t->parms.link,
1124 .nl_u = { .ip4_u =
1125 { .daddr = t->parms.iph.daddr,
1126 .saddr = t->parms.iph.saddr,
1127 .tos = RT_TOS(t->parms.iph.tos) } },
1128 .proto = IPPROTO_GRE };
1129 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001130 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 return -EADDRNOTAVAIL;
1132 dev = rt->u.dst.dev;
1133 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001134 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 return -EADDRNOTAVAIL;
1136 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001137 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 }
1139 return 0;
1140}
1141
1142static int ipgre_close(struct net_device *dev)
1143{
Patrick McHardy2941a482006-01-08 22:05:26 -08001144 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001145 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001146 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001147 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 if (in_dev) {
1149 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1150 in_dev_put(in_dev);
1151 }
1152 }
1153 return 0;
1154}
1155
1156#endif
1157
1158static void ipgre_tunnel_setup(struct net_device *dev)
1159{
Herbert Xuc19e6542008-10-09 11:59:55 -07001160 dev->init = ipgre_tunnel_init;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 dev->uninit = ipgre_tunnel_uninit;
1162 dev->destructor = free_netdev;
1163 dev->hard_start_xmit = ipgre_tunnel_xmit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164 dev->do_ioctl = ipgre_tunnel_ioctl;
1165 dev->change_mtu = ipgre_tunnel_change_mtu;
1166
1167 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001168 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001169 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170 dev->flags = IFF_NOARP;
1171 dev->iflink = 0;
1172 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001173 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174}
1175
1176static int ipgre_tunnel_init(struct net_device *dev)
1177{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 struct ip_tunnel *tunnel;
1179 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
Patrick McHardy2941a482006-01-08 22:05:26 -08001181 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 iph = &tunnel->parms.iph;
1183
1184 tunnel->dev = dev;
1185 strcpy(tunnel->parms.name, dev->name);
1186
1187 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1188 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1189
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001192 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 if (!iph->saddr)
1194 return -EINVAL;
1195 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001196 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 dev->open = ipgre_open;
1198 dev->stop = ipgre_close;
1199 }
1200#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001201 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001202 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 return 0;
1205}
1206
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001207static int ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208{
Patrick McHardy2941a482006-01-08 22:05:26 -08001209 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001211 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212
1213 tunnel->dev = dev;
1214 strcpy(tunnel->parms.name, dev->name);
1215
1216 iph->version = 4;
1217 iph->protocol = IPPROTO_GRE;
1218 iph->ihl = 5;
1219 tunnel->hlen = sizeof(struct iphdr) + 4;
1220
1221 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001222 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 return 0;
1224}
1225
1226
1227static struct net_protocol ipgre_protocol = {
1228 .handler = ipgre_rcv,
1229 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001230 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231};
1232
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001233static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1234{
1235 int prio;
1236
1237 for (prio = 0; prio < 4; prio++) {
1238 int h;
1239 for (h = 0; h < HASH_SIZE; h++) {
1240 struct ip_tunnel *t;
1241 while ((t = ign->tunnels[prio][h]) != NULL)
1242 unregister_netdevice(t->dev);
1243 }
1244 }
1245}
1246
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001247static int ipgre_init_net(struct net *net)
1248{
1249 int err;
1250 struct ipgre_net *ign;
1251
1252 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001253 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001254 if (ign == NULL)
1255 goto err_alloc;
1256
1257 err = net_assign_generic(net, ipgre_net_id, ign);
1258 if (err < 0)
1259 goto err_assign;
1260
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001261 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1262 ipgre_tunnel_setup);
1263 if (!ign->fb_tunnel_dev) {
1264 err = -ENOMEM;
1265 goto err_alloc_dev;
1266 }
1267
1268 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1269 dev_net_set(ign->fb_tunnel_dev, net);
Herbert Xuc19e6542008-10-09 11:59:55 -07001270 ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001271
1272 if ((err = register_netdev(ign->fb_tunnel_dev)))
1273 goto err_reg_dev;
1274
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001275 return 0;
1276
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001277err_reg_dev:
1278 free_netdev(ign->fb_tunnel_dev);
1279err_alloc_dev:
1280 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001281err_assign:
1282 kfree(ign);
1283err_alloc:
1284 return err;
1285}
1286
1287static void ipgre_exit_net(struct net *net)
1288{
1289 struct ipgre_net *ign;
1290
1291 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001292 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001293 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001294 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001295 kfree(ign);
1296}
1297
1298static struct pernet_operations ipgre_net_ops = {
1299 .init = ipgre_init_net,
1300 .exit = ipgre_exit_net,
1301};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302
Herbert Xuc19e6542008-10-09 11:59:55 -07001303static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1304{
1305 __be16 flags;
1306
1307 if (!data)
1308 return 0;
1309
1310 flags = 0;
1311 if (data[IFLA_GRE_IFLAGS])
1312 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1313 if (data[IFLA_GRE_OFLAGS])
1314 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1315 if (flags & (GRE_VERSION|GRE_ROUTING))
1316 return -EINVAL;
1317
1318 return 0;
1319}
1320
Herbert Xue1a80002008-10-09 12:00:17 -07001321static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1322{
1323 __be32 daddr;
1324
1325 if (tb[IFLA_ADDRESS]) {
1326 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1327 return -EINVAL;
1328 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1329 return -EADDRNOTAVAIL;
1330 }
1331
1332 if (!data)
1333 goto out;
1334
1335 if (data[IFLA_GRE_REMOTE]) {
1336 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1337 if (!daddr)
1338 return -EINVAL;
1339 }
1340
1341out:
1342 return ipgre_tunnel_validate(tb, data);
1343}
1344
Herbert Xuc19e6542008-10-09 11:59:55 -07001345static void ipgre_netlink_parms(struct nlattr *data[],
1346 struct ip_tunnel_parm *parms)
1347{
Herbert Xu7bb82d92008-10-11 12:20:15 -07001348 memset(parms, 0, sizeof(*parms));
Herbert Xuc19e6542008-10-09 11:59:55 -07001349
1350 parms->iph.protocol = IPPROTO_GRE;
1351
1352 if (!data)
1353 return;
1354
1355 if (data[IFLA_GRE_LINK])
1356 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1357
1358 if (data[IFLA_GRE_IFLAGS])
1359 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1360
1361 if (data[IFLA_GRE_OFLAGS])
1362 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1363
1364 if (data[IFLA_GRE_IKEY])
1365 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1366
1367 if (data[IFLA_GRE_OKEY])
1368 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1369
1370 if (data[IFLA_GRE_LOCAL])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001371 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001372
1373 if (data[IFLA_GRE_REMOTE])
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001374 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
Herbert Xuc19e6542008-10-09 11:59:55 -07001375
1376 if (data[IFLA_GRE_TTL])
1377 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1378
1379 if (data[IFLA_GRE_TOS])
1380 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1381
1382 if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1383 parms->iph.frag_off = htons(IP_DF);
1384}
1385
Herbert Xue1a80002008-10-09 12:00:17 -07001386static int ipgre_tap_init(struct net_device *dev)
1387{
1388 struct ip_tunnel *tunnel;
1389
1390 tunnel = netdev_priv(dev);
1391
1392 tunnel->dev = dev;
1393 strcpy(tunnel->parms.name, dev->name);
1394
1395 ipgre_tunnel_bind_dev(dev);
1396
1397 return 0;
1398}
1399
1400static void ipgre_tap_setup(struct net_device *dev)
1401{
1402
1403 ether_setup(dev);
1404
1405 dev->init = ipgre_tap_init;
1406 dev->uninit = ipgre_tunnel_uninit;
1407 dev->destructor = free_netdev;
1408 dev->hard_start_xmit = ipgre_tunnel_xmit;
1409 dev->change_mtu = ipgre_tunnel_change_mtu;
1410
1411 dev->iflink = 0;
1412 dev->features |= NETIF_F_NETNS_LOCAL;
1413}
1414
Herbert Xuc19e6542008-10-09 11:59:55 -07001415static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1416 struct nlattr *data[])
1417{
1418 struct ip_tunnel *nt;
1419 struct net *net = dev_net(dev);
1420 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1421 int mtu;
1422 int err;
1423
1424 nt = netdev_priv(dev);
1425 ipgre_netlink_parms(data, &nt->parms);
1426
Herbert Xue1a80002008-10-09 12:00:17 -07001427 if (ipgre_tunnel_find(net, &nt->parms, dev->type))
Herbert Xuc19e6542008-10-09 11:59:55 -07001428 return -EEXIST;
1429
Herbert Xue1a80002008-10-09 12:00:17 -07001430 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1431 random_ether_addr(dev->dev_addr);
1432
Herbert Xuc19e6542008-10-09 11:59:55 -07001433 mtu = ipgre_tunnel_bind_dev(dev);
1434 if (!tb[IFLA_MTU])
1435 dev->mtu = mtu;
1436
1437 err = register_netdevice(dev);
1438 if (err)
1439 goto out;
1440
1441 dev_hold(dev);
1442 ipgre_tunnel_link(ign, nt);
1443
1444out:
1445 return err;
1446}
1447
1448static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1449 struct nlattr *data[])
1450{
1451 struct ip_tunnel *t, *nt;
1452 struct net *net = dev_net(dev);
1453 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1454 struct ip_tunnel_parm p;
1455 int mtu;
1456
1457 if (dev == ign->fb_tunnel_dev)
1458 return -EINVAL;
1459
1460 nt = netdev_priv(dev);
1461 ipgre_netlink_parms(data, &p);
1462
1463 t = ipgre_tunnel_locate(net, &p, 0);
1464
1465 if (t) {
1466 if (t->dev != dev)
1467 return -EEXIST;
1468 } else {
1469 unsigned nflags = 0;
1470
1471 t = nt;
1472
1473 if (ipv4_is_multicast(p.iph.daddr))
1474 nflags = IFF_BROADCAST;
1475 else if (p.iph.daddr)
1476 nflags = IFF_POINTOPOINT;
1477
1478 if ((dev->flags ^ nflags) &
1479 (IFF_POINTOPOINT | IFF_BROADCAST))
1480 return -EINVAL;
1481
1482 ipgre_tunnel_unlink(ign, t);
1483 t->parms.iph.saddr = p.iph.saddr;
1484 t->parms.iph.daddr = p.iph.daddr;
1485 t->parms.i_key = p.i_key;
1486 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1487 memcpy(dev->broadcast, &p.iph.daddr, 4);
1488 ipgre_tunnel_link(ign, t);
1489 netdev_state_change(dev);
1490 }
1491
1492 t->parms.o_key = p.o_key;
1493 t->parms.iph.ttl = p.iph.ttl;
1494 t->parms.iph.tos = p.iph.tos;
1495 t->parms.iph.frag_off = p.iph.frag_off;
1496
1497 if (t->parms.link != p.link) {
1498 t->parms.link = p.link;
1499 mtu = ipgre_tunnel_bind_dev(dev);
1500 if (!tb[IFLA_MTU])
1501 dev->mtu = mtu;
1502 netdev_state_change(dev);
1503 }
1504
1505 return 0;
1506}
1507
1508static size_t ipgre_get_size(const struct net_device *dev)
1509{
1510 return
1511 /* IFLA_GRE_LINK */
1512 nla_total_size(4) +
1513 /* IFLA_GRE_IFLAGS */
1514 nla_total_size(2) +
1515 /* IFLA_GRE_OFLAGS */
1516 nla_total_size(2) +
1517 /* IFLA_GRE_IKEY */
1518 nla_total_size(4) +
1519 /* IFLA_GRE_OKEY */
1520 nla_total_size(4) +
1521 /* IFLA_GRE_LOCAL */
1522 nla_total_size(4) +
1523 /* IFLA_GRE_REMOTE */
1524 nla_total_size(4) +
1525 /* IFLA_GRE_TTL */
1526 nla_total_size(1) +
1527 /* IFLA_GRE_TOS */
1528 nla_total_size(1) +
1529 /* IFLA_GRE_PMTUDISC */
1530 nla_total_size(1) +
1531 0;
1532}
1533
1534static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1535{
1536 struct ip_tunnel *t = netdev_priv(dev);
1537 struct ip_tunnel_parm *p = &t->parms;
1538
1539 NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1540 NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1541 NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
Patrick McHardyba9e64b2008-10-10 12:10:30 -07001542 NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1543 NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001544 NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1545 NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
Herbert Xuc19e6542008-10-09 11:59:55 -07001546 NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1547 NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1548 NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1549
1550 return 0;
1551
1552nla_put_failure:
1553 return -EMSGSIZE;
1554}
1555
1556static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1557 [IFLA_GRE_LINK] = { .type = NLA_U32 },
1558 [IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
1559 [IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
1560 [IFLA_GRE_IKEY] = { .type = NLA_U32 },
1561 [IFLA_GRE_OKEY] = { .type = NLA_U32 },
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001562 [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1563 [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Herbert Xuc19e6542008-10-09 11:59:55 -07001564 [IFLA_GRE_TTL] = { .type = NLA_U8 },
1565 [IFLA_GRE_TOS] = { .type = NLA_U8 },
1566 [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
1567};
1568
1569static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1570 .kind = "gre",
1571 .maxtype = IFLA_GRE_MAX,
1572 .policy = ipgre_policy,
1573 .priv_size = sizeof(struct ip_tunnel),
1574 .setup = ipgre_tunnel_setup,
1575 .validate = ipgre_tunnel_validate,
1576 .newlink = ipgre_newlink,
1577 .changelink = ipgre_changelink,
1578 .get_size = ipgre_get_size,
1579 .fill_info = ipgre_fill_info,
1580};
1581
Herbert Xue1a80002008-10-09 12:00:17 -07001582static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1583 .kind = "gretap",
1584 .maxtype = IFLA_GRE_MAX,
1585 .policy = ipgre_policy,
1586 .priv_size = sizeof(struct ip_tunnel),
1587 .setup = ipgre_tap_setup,
1588 .validate = ipgre_tap_validate,
1589 .newlink = ipgre_newlink,
1590 .changelink = ipgre_changelink,
1591 .get_size = ipgre_get_size,
1592 .fill_info = ipgre_fill_info,
1593};
1594
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595/*
1596 * And now the modules code and kernel interface.
1597 */
1598
1599static int __init ipgre_init(void)
1600{
1601 int err;
1602
1603 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1604
1605 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1606 printk(KERN_INFO "ipgre init: can't add protocol\n");
1607 return -EAGAIN;
1608 }
1609
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001610 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1611 if (err < 0)
Herbert Xuc19e6542008-10-09 11:59:55 -07001612 goto gen_device_failed;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001613
Herbert Xuc19e6542008-10-09 11:59:55 -07001614 err = rtnl_link_register(&ipgre_link_ops);
1615 if (err < 0)
1616 goto rtnl_link_failed;
1617
Herbert Xue1a80002008-10-09 12:00:17 -07001618 err = rtnl_link_register(&ipgre_tap_ops);
1619 if (err < 0)
1620 goto tap_ops_failed;
1621
Herbert Xuc19e6542008-10-09 11:59:55 -07001622out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 return err;
Herbert Xuc19e6542008-10-09 11:59:55 -07001624
Herbert Xue1a80002008-10-09 12:00:17 -07001625tap_ops_failed:
1626 rtnl_link_unregister(&ipgre_link_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001627rtnl_link_failed:
1628 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1629gen_device_failed:
1630 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1631 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632}
1633
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001634static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635{
Herbert Xue1a80002008-10-09 12:00:17 -07001636 rtnl_link_unregister(&ipgre_tap_ops);
Herbert Xuc19e6542008-10-09 11:59:55 -07001637 rtnl_link_unregister(&ipgre_link_ops);
1638 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1640 printk(KERN_INFO "ipgre close: can't remove protocol\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641}
1642
1643module_init(ipgre_init);
1644module_exit(ipgre_fini);
1645MODULE_LICENSE("GPL");
Patrick McHardy4d74f8b2008-10-10 12:11:06 -07001646MODULE_ALIAS_RTNL_LINK("gre");
1647MODULE_ALIAS_RTNL_LINK("gretap");