blob: 80622dd6fb3f7cec0b01b95c4fb6f5dfd86a6079 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002 * Linux NET3: GRE over IP protocol decoder.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Randy Dunlap4fc268d2006-01-11 12:17:47 -080013#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/kernel.h>
17#include <asm/uaccess.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/tcp.h>
22#include <linux/udp.h>
23#include <linux/if_arp.h>
24#include <linux/mroute.h>
25#include <linux/init.h>
26#include <linux/in6.h>
27#include <linux/inetdevice.h>
28#include <linux/igmp.h>
29#include <linux/netfilter_ipv4.h>
Kris Katterjohn46f25df2006-01-05 16:35:42 -080030#include <linux/if_ether.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32#include <net/sock.h>
33#include <net/ip.h>
34#include <net/icmp.h>
35#include <net/protocol.h>
36#include <net/ipip.h>
37#include <net/arp.h>
38#include <net/checksum.h>
39#include <net/dsfield.h>
40#include <net/inet_ecn.h>
41#include <net/xfrm.h>
Pavel Emelyanov59a4c752008-04-16 01:08:53 -070042#include <net/net_namespace.h>
43#include <net/netns/generic.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45#ifdef CONFIG_IPV6
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#endif
50
51/*
52 Problems & solutions
53 --------------------
54
55 1. The most important issue is detecting local dead loops.
56 They would cause complete host lockup in transmit, which
57 would be "resolved" by stack overflow or, if queueing is enabled,
58 with infinite looping in net_bh.
59
60 We cannot track such dead loops during route installation,
61 it is infeasible task. The most general solutions would be
62 to keep skb->encapsulation counter (sort of local ttl),
63 and silently drop packet when it expires. It is the best
64 solution, but it supposes maintaing new variable in ALL
65 skb, even if no tunneling is used.
66
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090067 Current solution: t->recursion lock breaks dead loops. It looks
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 like dev->tbusy flag, but I preferred new variable, because
69 the semantics is different. One day, when hard_start_xmit
70 will be multithreaded we will have to use skb->encapsulation.
71
72
73
74 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case,
76 if we copy it from packet being encapsulated to upper header.
77 It is very good solution, but it introduces two problems:
78
79 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
80 do not work over tunnels.
81 - traceroute does not work. I planned to relay ICMP from tunnel,
82 so that this problem would be solved and traceroute output
83 would even more informative. This idea appeared to be wrong:
84 only Linux complies to rfc1812 now (yes, guys, Linux is the only
85 true router now :-)), all routers (at least, in neighbourhood of mine)
86 return only 8 bytes of payload. It is the end.
87
88 Hence, if we want that OSPF worked or traceroute said something reasonable,
89 we should search for another solution.
90
91 One of them is to parse packet trying to detect inner encapsulation
92 made by our node. It is difficult or even impossible, especially,
93 taking into account fragmentation. TO be short, tt is not solution at all.
94
95 Current solution: The solution was UNEXPECTEDLY SIMPLE.
96 We force DF flag on tunnels with preconfigured hop limit,
97 that is ALL. :-) Well, it does not remove the problem completely,
98 but exponential growth of network traffic is changed to linear
99 (branches, that exceed pmtu are pruned) and tunnel mtu
100 fastly degrades to value <68, where looping stops.
101 Yes, it is not good if there exists a router in the loop,
102 which does not force DF, even when encapsulating packets have DF set.
103 But it is not our problem! Nobody could accuse us, we made
104 all that we could make. Even if it is your gated who injected
105 fatal route to network, even if it were you who configured
106 fatal static route: you are innocent. :-)
107
108
109
110 3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
111 practically identical code. It would be good to glue them
112 together, but it is not very evident, how to make them modular.
113 sit is integral part of IPv6, ipip and gre are naturally modular.
114 We could extract common parts (hash table, ioctl etc)
115 to a separate module (ip_tunnel.c).
116
117 Alexey Kuznetsov.
118 */
119
120static int ipgre_tunnel_init(struct net_device *dev);
121static void ipgre_tunnel_setup(struct net_device *dev);
Herbert Xu42aa9162008-10-09 11:59:32 -0700122static int ipgre_tunnel_bind_dev(struct net_device *dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123
124/* Fallback tunnel: no source, no destination, no key, no options */
125
126static int ipgre_fb_tunnel_init(struct net_device *dev);
127
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700128#define HASH_SIZE 16
129
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700130static int ipgre_net_id;
131struct ipgre_net {
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700132 struct ip_tunnel *tunnels[4][HASH_SIZE];
133
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700134 struct net_device *fb_tunnel_dev;
Pavel Emelyanov59a4c752008-04-16 01:08:53 -0700135};
136
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137/* Tunnel hash table */
138
139/*
140 4 hash tables:
141
142 3: (remote,local)
143 2: (remote,*)
144 1: (*,local)
145 0: (*,*)
146
147 We require exact key match i.e. if a key is present in packet
148 it will match only tunnel with the same key; if it is not present,
149 it will match only keyless tunnel.
150
151 All keysless packets, if not matched configured keyless tunnels
152 will match fallback tunnel.
153 */
154
Al Virod5a0a1e2006-11-08 00:23:14 -0800155#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700157#define tunnels_r_l tunnels[3]
158#define tunnels_r tunnels[2]
159#define tunnels_l tunnels[1]
160#define tunnels_wc tunnels[0]
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161
162static DEFINE_RWLOCK(ipgre_lock);
163
164/* Given src, dst and key, find appropriate for input tunnel. */
165
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700166static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
167 __be32 remote, __be32 local, __be32 key)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168{
169 unsigned h0 = HASH(remote);
170 unsigned h1 = HASH(key);
171 struct ip_tunnel *t;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700172 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700174 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
176 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
177 return t;
178 }
179 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700180 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 if (remote == t->parms.iph.daddr) {
182 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
183 return t;
184 }
185 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700186 for (t = ign->tunnels_l[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 if (local == t->parms.iph.saddr ||
Joe Perchesf97c1e02007-12-16 13:45:43 -0800188 (local == t->parms.iph.daddr &&
189 ipv4_is_multicast(local))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
191 return t;
192 }
193 }
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700194 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 if (t->parms.i_key == key && (t->dev->flags&IFF_UP))
196 return t;
197 }
198
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700199 if (ign->fb_tunnel_dev->flags&IFF_UP)
200 return netdev_priv(ign->fb_tunnel_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 return NULL;
202}
203
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700204static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
205 struct ip_tunnel_parm *parms)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206{
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900207 __be32 remote = parms->iph.daddr;
208 __be32 local = parms->iph.saddr;
209 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210 unsigned h = HASH(key);
211 int prio = 0;
212
213 if (local)
214 prio |= 1;
Joe Perchesf97c1e02007-12-16 13:45:43 -0800215 if (remote && !ipv4_is_multicast(remote)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 prio |= 2;
217 h ^= HASH(remote);
218 }
219
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -0700220 return &ign->tunnels[prio][h];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221}
222
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700223static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
224 struct ip_tunnel *t)
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900225{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700226 return __ipgre_bucket(ign, &t->parms);
YOSHIFUJI Hideaki5056a1e2007-04-24 20:44:48 +0900227}
228
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700229static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700231 struct ip_tunnel **tp = ipgre_bucket(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
233 t->next = *tp;
234 write_lock_bh(&ipgre_lock);
235 *tp = t;
236 write_unlock_bh(&ipgre_lock);
237}
238
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700239static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240{
241 struct ip_tunnel **tp;
242
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700243 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 if (t == *tp) {
245 write_lock_bh(&ipgre_lock);
246 *tp = t->next;
247 write_unlock_bh(&ipgre_lock);
248 break;
249 }
250 }
251}
252
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700253static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
254 struct ip_tunnel_parm *parms, int create)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255{
Al Virod5a0a1e2006-11-08 00:23:14 -0800256 __be32 remote = parms->iph.daddr;
257 __be32 local = parms->iph.saddr;
258 __be32 key = parms->i_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 struct ip_tunnel *t, **tp, *nt;
260 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 char name[IFNAMSIZ];
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700262 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700264 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
266 if (key == t->parms.i_key)
267 return t;
268 }
269 }
270 if (!create)
271 return NULL;
272
273 if (parms->name[0])
274 strlcpy(name, parms->name, IFNAMSIZ);
Pavel Emelyanov34cc7ba2008-02-23 20:19:20 -0800275 else
276 sprintf(name, "gre%%d");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277
278 dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
279 if (!dev)
280 return NULL;
281
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -0700282 dev_net_set(dev, net);
283
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800284 if (strchr(name, '%')) {
285 if (dev_alloc_name(dev, name) < 0)
286 goto failed_free;
287 }
288
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 dev->init = ipgre_tunnel_init;
Patrick McHardy2941a482006-01-08 22:05:26 -0800290 nt = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 nt->parms = *parms;
292
Herbert Xu42aa9162008-10-09 11:59:32 -0700293 dev->mtu = ipgre_tunnel_bind_dev(dev);
294
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800295 if (register_netdevice(dev) < 0)
296 goto failed_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 dev_hold(dev);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700299 ipgre_tunnel_link(ign, nt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 return nt;
301
Pavel Emelyanovb37d428b2008-02-26 23:51:04 -0800302failed_free:
303 free_netdev(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304 return NULL;
305}
306
307static void ipgre_tunnel_uninit(struct net_device *dev)
308{
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700309 struct net *net = dev_net(dev);
310 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
311
312 ipgre_tunnel_unlink(ign, netdev_priv(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313 dev_put(dev);
314}
315
316
317static void ipgre_err(struct sk_buff *skb, u32 info)
318{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319
Rami Rosen071f92d2008-05-21 17:47:54 -0700320/* All the routers (except for Linux) return only
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 8 bytes of packet payload. It means, that precise relaying of
322 ICMP in the real Internet is absolutely infeasible.
323
324 Moreover, Cisco "wise men" put GRE key to the third word
325 in GRE header. It makes impossible maintaining even soft state for keyed
326 GRE tunnels with enabled checksum. Tell them "thank you".
327
328 Well, I wonder, rfc1812 was written by Cisco employee,
329 what the hell these idiots break standrads established
330 by themself???
331 */
332
333 struct iphdr *iph = (struct iphdr*)skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800334 __be16 *p = (__be16*)(skb->data+(iph->ihl<<2));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 int grehlen = (iph->ihl<<2) + 4;
Arnaldo Carvalho de Melo88c76642007-03-13 14:43:18 -0300336 const int type = icmp_hdr(skb)->type;
337 const int code = icmp_hdr(skb)->code;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 struct ip_tunnel *t;
Al Virod5a0a1e2006-11-08 00:23:14 -0800339 __be16 flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
341 flags = p[0];
342 if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
343 if (flags&(GRE_VERSION|GRE_ROUTING))
344 return;
345 if (flags&GRE_KEY) {
346 grehlen += 4;
347 if (flags&GRE_CSUM)
348 grehlen += 4;
349 }
350 }
351
352 /* If only 8 bytes returned, keyed message will be dropped here */
353 if (skb_headlen(skb) < grehlen)
354 return;
355
356 switch (type) {
357 default:
358 case ICMP_PARAMETERPROB:
359 return;
360
361 case ICMP_DEST_UNREACH:
362 switch (code) {
363 case ICMP_SR_FAILED:
364 case ICMP_PORT_UNREACH:
365 /* Impossible event. */
366 return;
367 case ICMP_FRAG_NEEDED:
368 /* Soft state for pmtu is maintained by IP core. */
369 return;
370 default:
371 /* All others are translated to HOST_UNREACH.
372 rfc2003 contains "deep thoughts" about NET_UNREACH,
373 I believe they are just ether pollution. --ANK
374 */
375 break;
376 }
377 break;
378 case ICMP_TIME_EXCEEDED:
379 if (code != ICMP_EXC_TTL)
380 return;
381 break;
382 }
383
384 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700385 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700386 (flags&GRE_KEY) ?
387 *(((__be32*)p) + (grehlen>>2) - 1) : 0);
Joe Perchesf97c1e02007-12-16 13:45:43 -0800388 if (t == NULL || t->parms.iph.daddr == 0 ||
389 ipv4_is_multicast(t->parms.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 goto out;
391
392 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
393 goto out;
394
395 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
396 t->err_count++;
397 else
398 t->err_count = 1;
399 t->err_time = jiffies;
400out:
401 read_unlock(&ipgre_lock);
402 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403}
404
405static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
406{
407 if (INET_ECN_is_ce(iph->tos)) {
408 if (skb->protocol == htons(ETH_P_IP)) {
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700409 IP_ECN_set_ce(ip_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700411 IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 }
413 }
414}
415
416static inline u8
417ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
418{
419 u8 inner = 0;
420 if (skb->protocol == htons(ETH_P_IP))
421 inner = old_iph->tos;
422 else if (skb->protocol == htons(ETH_P_IPV6))
423 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
424 return INET_ECN_encapsulate(tos, inner);
425}
426
427static int ipgre_rcv(struct sk_buff *skb)
428{
429 struct iphdr *iph;
430 u8 *h;
Al Virod5a0a1e2006-11-08 00:23:14 -0800431 __be16 flags;
Al Virod3bc23e2006-11-14 21:24:49 -0800432 __sum16 csum = 0;
Al Virod5a0a1e2006-11-08 00:23:14 -0800433 __be32 key = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 u32 seqno = 0;
435 struct ip_tunnel *tunnel;
436 int offset = 4;
437
438 if (!pskb_may_pull(skb, 16))
439 goto drop_nolock;
440
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700441 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 h = skb->data;
Al Virod5a0a1e2006-11-08 00:23:14 -0800443 flags = *(__be16*)h;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
445 if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
446 /* - Version must be 0.
447 - We do not support routing headers.
448 */
449 if (flags&(GRE_VERSION|GRE_ROUTING))
450 goto drop_nolock;
451
452 if (flags&GRE_CSUM) {
Herbert Xufb286bb2005-11-10 13:01:24 -0800453 switch (skb->ip_summed) {
Patrick McHardy84fa7932006-08-29 16:44:56 -0700454 case CHECKSUM_COMPLETE:
Al Virod3bc23e2006-11-14 21:24:49 -0800455 csum = csum_fold(skb->csum);
Herbert Xufb286bb2005-11-10 13:01:24 -0800456 if (!csum)
457 break;
458 /* fall through */
459 case CHECKSUM_NONE:
460 skb->csum = 0;
461 csum = __skb_checksum_complete(skb);
Patrick McHardy84fa7932006-08-29 16:44:56 -0700462 skb->ip_summed = CHECKSUM_COMPLETE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 }
464 offset += 4;
465 }
466 if (flags&GRE_KEY) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800467 key = *(__be32*)(h + offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 offset += 4;
469 }
470 if (flags&GRE_SEQ) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800471 seqno = ntohl(*(__be32*)(h + offset));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 offset += 4;
473 }
474 }
475
476 read_lock(&ipgre_lock);
Pavel Emelyanov3b4667f2008-04-16 01:09:44 -0700477 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700478 iph->saddr, iph->daddr, key)) != NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700479 struct net_device_stats *stats = &tunnel->dev->stats;
480
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 secpath_reset(skb);
482
Al Virod5a0a1e2006-11-08 00:23:14 -0800483 skb->protocol = *(__be16*)(h + 2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 /* WCCP version 1 and 2 protocol decoding.
485 * - Change protocol to IP
486 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
487 */
488 if (flags == 0 &&
YOSHIFUJI Hideaki496c98d2006-10-10 19:41:21 -0700489 skb->protocol == htons(ETH_P_WCCP)) {
490 skb->protocol = htons(ETH_P_IP);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900491 if ((*(h + offset) & 0xF0) != 0x40)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 offset += 4;
493 }
494
Timo Teras1d069162007-12-20 00:10:33 -0800495 skb->mac_header = skb->network_header;
Arnaldo Carvalho de Melo4209fb62007-03-10 18:42:03 -0300496 __pskb_pull(skb, offset);
497 skb_reset_network_header(skb);
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -0700498 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 skb->pkt_type = PACKET_HOST;
500#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -0800501 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 /* Looped back packet, drop it! */
Eric Dumazetee6b9672008-03-05 18:30:47 -0800503 if (skb->rtable->fl.iif == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 goto drop;
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700505 stats->multicast++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 skb->pkt_type = PACKET_BROADCAST;
507 }
508#endif
509
510 if (((flags&GRE_CSUM) && csum) ||
511 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700512 stats->rx_crc_errors++;
513 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 goto drop;
515 }
516 if (tunnel->parms.i_flags&GRE_SEQ) {
517 if (!(flags&GRE_SEQ) ||
518 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700519 stats->rx_fifo_errors++;
520 stats->rx_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 goto drop;
522 }
523 tunnel->i_seqno = seqno + 1;
524 }
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700525 stats->rx_packets++;
526 stats->rx_bytes += skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527 skb->dev = tunnel->dev;
528 dst_release(skb->dst);
529 skb->dst = NULL;
530 nf_reset(skb);
531 ipgre_ecn_decapsulate(iph, skb);
532 netif_rx(skb);
533 read_unlock(&ipgre_lock);
534 return(0);
535 }
Herbert Xu45af08b2006-04-05 22:31:19 -0700536 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537
538drop:
539 read_unlock(&ipgre_lock);
540drop_nolock:
541 kfree_skb(skb);
542 return(0);
543}
544
545static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
546{
Patrick McHardy2941a482006-01-08 22:05:26 -0800547 struct ip_tunnel *tunnel = netdev_priv(dev);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700548 struct net_device_stats *stats = &tunnel->dev->stats;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700549 struct iphdr *old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 struct iphdr *tiph;
551 u8 tos;
Al Virod5a0a1e2006-11-08 00:23:14 -0800552 __be16 df;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 struct rtable *rt; /* Route to the other host */
554 struct net_device *tdev; /* Device to other host */
555 struct iphdr *iph; /* Our new IP header */
Chuck Leverc2636b42007-10-23 21:07:32 -0700556 unsigned int max_headroom; /* The extra header space needed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 int gre_hlen;
Al Virod5a0a1e2006-11-08 00:23:14 -0800558 __be32 dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559 int mtu;
560
561 if (tunnel->recursion++) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700562 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 goto tx_error;
564 }
565
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700566 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567 gre_hlen = 0;
568 tiph = (struct iphdr*)skb->data;
569 } else {
570 gre_hlen = tunnel->hlen;
571 tiph = &tunnel->parms.iph;
572 }
573
574 if ((dst = tiph->daddr) == 0) {
575 /* NBMA tunnel */
576
577 if (skb->dst == NULL) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700578 stats->tx_fifo_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 goto tx_error;
580 }
581
582 if (skb->protocol == htons(ETH_P_IP)) {
Eric Dumazetee6b9672008-03-05 18:30:47 -0800583 rt = skb->rtable;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 if ((dst = rt->rt_gateway) == 0)
585 goto tx_error_icmp;
586 }
587#ifdef CONFIG_IPV6
588 else if (skb->protocol == htons(ETH_P_IPV6)) {
589 struct in6_addr *addr6;
590 int addr_type;
591 struct neighbour *neigh = skb->dst->neighbour;
592
593 if (neigh == NULL)
594 goto tx_error;
595
596 addr6 = (struct in6_addr*)&neigh->primary_key;
597 addr_type = ipv6_addr_type(addr6);
598
599 if (addr_type == IPV6_ADDR_ANY) {
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700600 addr6 = &ipv6_hdr(skb)->daddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 addr_type = ipv6_addr_type(addr6);
602 }
603
604 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
605 goto tx_error_icmp;
606
607 dst = addr6->s6_addr32[3];
608 }
609#endif
610 else
611 goto tx_error;
612 }
613
614 tos = tiph->tos;
615 if (tos&1) {
616 if (skb->protocol == htons(ETH_P_IP))
617 tos = old_iph->tos;
618 tos &= ~1;
619 }
620
621 {
622 struct flowi fl = { .oif = tunnel->parms.link,
623 .nl_u = { .ip4_u =
624 { .daddr = dst,
625 .saddr = tiph->saddr,
626 .tos = RT_TOS(tos) } },
627 .proto = IPPROTO_GRE };
Pavel Emelyanov96635522008-04-16 01:10:44 -0700628 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700629 stats->tx_carrier_errors++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 goto tx_error;
631 }
632 }
633 tdev = rt->u.dst.dev;
634
635 if (tdev == dev) {
636 ip_rt_put(rt);
Pavel Emelyanovaddd68e2008-05-21 14:14:22 -0700637 stats->collisions++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 goto tx_error;
639 }
640
641 df = tiph->frag_off;
642 if (df)
Herbert Xuc95b8192008-10-09 11:58:54 -0700643 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 else
645 mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
646
647 if (skb->dst)
648 skb->dst->ops->update_pmtu(skb->dst, mtu);
649
650 if (skb->protocol == htons(ETH_P_IP)) {
651 df |= (old_iph->frag_off&htons(IP_DF));
652
653 if ((old_iph->frag_off&htons(IP_DF)) &&
654 mtu < ntohs(old_iph->tot_len)) {
655 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
656 ip_rt_put(rt);
657 goto tx_error;
658 }
659 }
660#ifdef CONFIG_IPV6
661 else if (skb->protocol == htons(ETH_P_IPV6)) {
662 struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
663
664 if (rt6 && mtu < dst_mtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
Joe Perchesf97c1e02007-12-16 13:45:43 -0800665 if ((tunnel->parms.iph.daddr &&
666 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 rt6->rt6i_dst.plen == 128) {
668 rt6->rt6i_flags |= RTF_MODIFIED;
669 skb->dst->metrics[RTAX_MTU-1] = mtu;
670 }
671 }
672
673 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
674 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
675 ip_rt_put(rt);
676 goto tx_error;
677 }
678 }
679#endif
680
681 if (tunnel->err_count > 0) {
682 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
683 tunnel->err_count--;
684
685 dst_link_failure(skb);
686 } else
687 tunnel->err_count = 0;
688 }
689
690 max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
691
Patrick McHardycfbba492007-07-09 15:33:40 -0700692 if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
693 (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
695 if (!new_skb) {
696 ip_rt_put(rt);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900697 stats->tx_dropped++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 dev_kfree_skb(skb);
699 tunnel->recursion--;
700 return 0;
701 }
702 if (skb->sk)
703 skb_set_owner_w(new_skb, skb->sk);
704 dev_kfree_skb(skb);
705 skb = new_skb;
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700706 old_iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 }
708
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700709 skb->transport_header = skb->network_header;
Arnaldo Carvalho de Meloe2d1bca2007-04-10 20:46:21 -0700710 skb_push(skb, gre_hlen);
711 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Patrick McHardy48d5cad2006-02-15 15:10:22 -0800713 IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
714 IPSKB_REROUTED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 dst_release(skb->dst);
716 skb->dst = &rt->u.dst;
717
718 /*
719 * Push down and install the IPIP header.
720 */
721
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700722 iph = ip_hdr(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 iph->version = 4;
724 iph->ihl = sizeof(struct iphdr) >> 2;
725 iph->frag_off = df;
726 iph->protocol = IPPROTO_GRE;
727 iph->tos = ipgre_ecn_encapsulate(tos, old_iph, skb);
728 iph->daddr = rt->rt_dst;
729 iph->saddr = rt->rt_src;
730
731 if ((iph->ttl = tiph->ttl) == 0) {
732 if (skb->protocol == htons(ETH_P_IP))
733 iph->ttl = old_iph->ttl;
734#ifdef CONFIG_IPV6
735 else if (skb->protocol == htons(ETH_P_IPV6))
736 iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
737#endif
738 else
739 iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
740 }
741
Al Virod5a0a1e2006-11-08 00:23:14 -0800742 ((__be16*)(iph+1))[0] = tunnel->parms.o_flags;
743 ((__be16*)(iph+1))[1] = skb->protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
745 if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
Al Virod5a0a1e2006-11-08 00:23:14 -0800746 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
748 if (tunnel->parms.o_flags&GRE_SEQ) {
749 ++tunnel->o_seqno;
750 *ptr = htonl(tunnel->o_seqno);
751 ptr--;
752 }
753 if (tunnel->parms.o_flags&GRE_KEY) {
754 *ptr = tunnel->parms.o_key;
755 ptr--;
756 }
757 if (tunnel->parms.o_flags&GRE_CSUM) {
758 *ptr = 0;
Al Viro5f92a732006-11-14 21:36:54 -0800759 *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 }
761 }
762
763 nf_reset(skb);
764
765 IPTUNNEL_XMIT();
766 tunnel->recursion--;
767 return 0;
768
769tx_error_icmp:
770 dst_link_failure(skb);
771
772tx_error:
773 stats->tx_errors++;
774 dev_kfree_skb(skb);
775 tunnel->recursion--;
776 return 0;
777}
778
Herbert Xu42aa9162008-10-09 11:59:32 -0700779static int ipgre_tunnel_bind_dev(struct net_device *dev)
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800780{
781 struct net_device *tdev = NULL;
782 struct ip_tunnel *tunnel;
783 struct iphdr *iph;
784 int hlen = LL_MAX_HEADER;
785 int mtu = ETH_DATA_LEN;
786 int addend = sizeof(struct iphdr) + 4;
787
788 tunnel = netdev_priv(dev);
789 iph = &tunnel->parms.iph;
790
Herbert Xuc95b8192008-10-09 11:58:54 -0700791 /* Guess output device to choose reasonable mtu and needed_headroom */
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800792
793 if (iph->daddr) {
794 struct flowi fl = { .oif = tunnel->parms.link,
795 .nl_u = { .ip4_u =
796 { .daddr = iph->daddr,
797 .saddr = iph->saddr,
798 .tos = RT_TOS(iph->tos) } },
799 .proto = IPPROTO_GRE };
800 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -0700801 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800802 tdev = rt->u.dst.dev;
803 ip_rt_put(rt);
804 }
805 dev->flags |= IFF_POINTOPOINT;
806 }
807
808 if (!tdev && tunnel->parms.link)
Pavel Emelyanov96635522008-04-16 01:10:44 -0700809 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800810
811 if (tdev) {
Herbert Xuc95b8192008-10-09 11:58:54 -0700812 hlen = tdev->hard_header_len + tdev->needed_headroom;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800813 mtu = tdev->mtu;
814 }
815 dev->iflink = tunnel->parms.link;
816
817 /* Precalculate GRE options length */
818 if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
819 if (tunnel->parms.o_flags&GRE_CSUM)
820 addend += 4;
821 if (tunnel->parms.o_flags&GRE_KEY)
822 addend += 4;
823 if (tunnel->parms.o_flags&GRE_SEQ)
824 addend += 4;
825 }
Herbert Xuc95b8192008-10-09 11:58:54 -0700826 dev->needed_headroom = addend + hlen;
Herbert Xu42aa9162008-10-09 11:59:32 -0700827 mtu -= dev->hard_header_len - addend;
828
829 if (mtu < 68)
830 mtu = 68;
831
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800832 tunnel->hlen = addend;
833
Herbert Xu42aa9162008-10-09 11:59:32 -0700834 return mtu;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800835}
836
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837static int
838ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
839{
840 int err = 0;
841 struct ip_tunnel_parm p;
842 struct ip_tunnel *t;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700843 struct net *net = dev_net(dev);
844 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845
846 switch (cmd) {
847 case SIOCGETTUNNEL:
848 t = NULL;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700849 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
851 err = -EFAULT;
852 break;
853 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700854 t = ipgre_tunnel_locate(net, &p, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 }
856 if (t == NULL)
Patrick McHardy2941a482006-01-08 22:05:26 -0800857 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 memcpy(&p, &t->parms, sizeof(p));
859 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
860 err = -EFAULT;
861 break;
862
863 case SIOCADDTUNNEL:
864 case SIOCCHGTUNNEL:
865 err = -EPERM;
866 if (!capable(CAP_NET_ADMIN))
867 goto done;
868
869 err = -EFAULT;
870 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
871 goto done;
872
873 err = -EINVAL;
874 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
875 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
876 ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
877 goto done;
878 if (p.iph.ttl)
879 p.iph.frag_off |= htons(IP_DF);
880
881 if (!(p.i_flags&GRE_KEY))
882 p.i_key = 0;
883 if (!(p.o_flags&GRE_KEY))
884 p.o_key = 0;
885
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700886 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700888 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 if (t != NULL) {
890 if (t->dev != dev) {
891 err = -EEXIST;
892 break;
893 }
894 } else {
895 unsigned nflags=0;
896
Patrick McHardy2941a482006-01-08 22:05:26 -0800897 t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898
Joe Perchesf97c1e02007-12-16 13:45:43 -0800899 if (ipv4_is_multicast(p.iph.daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 nflags = IFF_BROADCAST;
901 else if (p.iph.daddr)
902 nflags = IFF_POINTOPOINT;
903
904 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
905 err = -EINVAL;
906 break;
907 }
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700908 ipgre_tunnel_unlink(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 t->parms.iph.saddr = p.iph.saddr;
910 t->parms.iph.daddr = p.iph.daddr;
911 t->parms.i_key = p.i_key;
912 t->parms.o_key = p.o_key;
913 memcpy(dev->dev_addr, &p.iph.saddr, 4);
914 memcpy(dev->broadcast, &p.iph.daddr, 4);
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700915 ipgre_tunnel_link(ign, t);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 netdev_state_change(dev);
917 }
918 }
919
920 if (t) {
921 err = 0;
922 if (cmd == SIOCCHGTUNNEL) {
923 t->parms.iph.ttl = p.iph.ttl;
924 t->parms.iph.tos = p.iph.tos;
925 t->parms.iph.frag_off = p.iph.frag_off;
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800926 if (t->parms.link != p.link) {
927 t->parms.link = p.link;
Herbert Xu42aa9162008-10-09 11:59:32 -0700928 dev->mtu = ipgre_tunnel_bind_dev(dev);
Michal Schmidtee34c1e2007-12-13 09:46:32 -0800929 netdev_state_change(dev);
930 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 }
932 if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
933 err = -EFAULT;
934 } else
935 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
936 break;
937
938 case SIOCDELTUNNEL:
939 err = -EPERM;
940 if (!capable(CAP_NET_ADMIN))
941 goto done;
942
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700943 if (dev == ign->fb_tunnel_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 err = -EFAULT;
945 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
946 goto done;
947 err = -ENOENT;
Pavel Emelyanovf57e7d52008-04-16 01:09:22 -0700948 if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949 goto done;
950 err = -EPERM;
Pavel Emelyanov7daa0002008-04-16 01:10:05 -0700951 if (t == netdev_priv(ign->fb_tunnel_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 goto done;
953 dev = t->dev;
954 }
Stephen Hemminger22f8cde2007-02-07 00:09:58 -0800955 unregister_netdevice(dev);
956 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 break;
958
959 default:
960 err = -EINVAL;
961 }
962
963done:
964 return err;
965}
966
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
968{
Patrick McHardy2941a482006-01-08 22:05:26 -0800969 struct ip_tunnel *tunnel = netdev_priv(dev);
Herbert Xuc95b8192008-10-09 11:58:54 -0700970 if (new_mtu < 68 ||
971 new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 return -EINVAL;
973 dev->mtu = new_mtu;
974 return 0;
975}
976
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977/* Nice toy. Unfortunately, useless in real life :-)
978 It allows to construct virtual multiprotocol broadcast "LAN"
979 over the Internet, provided multicast routing is tuned.
980
981
982 I have no idea was this bicycle invented before me,
983 so that I had to set ARPHRD_IPGRE to a random value.
984 I have an impression, that Cisco could make something similar,
985 but this feature is apparently missing in IOS<=11.2(8).
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900986
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
988 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
989
990 ping -t 255 224.66.66.66
991
992 If nobody answers, mbone does not work.
993
994 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
995 ip addr add 10.66.66.<somewhat>/24 dev Universe
996 ifconfig Universe up
997 ifconfig Universe add fe80::<Your_real_addr>/10
998 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
999 ftp 10.66.66.66
1000 ...
1001 ftp fec0:6666:6666::193.233.7.65
1002 ...
1003
1004 */
1005
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001006static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1007 unsigned short type,
1008 const void *daddr, const void *saddr, unsigned len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009{
Patrick McHardy2941a482006-01-08 22:05:26 -08001010 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
Al Virod5a0a1e2006-11-08 00:23:14 -08001012 __be16 *p = (__be16*)(iph+1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013
1014 memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1015 p[0] = t->parms.o_flags;
1016 p[1] = htons(type);
1017
1018 /*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001019 * Set the source hardware address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001021
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 if (saddr)
1023 memcpy(&iph->saddr, saddr, 4);
1024
1025 if (daddr) {
1026 memcpy(&iph->daddr, daddr, 4);
1027 return t->hlen;
1028 }
Joe Perchesf97c1e02007-12-16 13:45:43 -08001029 if (iph->daddr && !ipv4_is_multicast(iph->daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030 return t->hlen;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001031
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 return -t->hlen;
1033}
1034
Timo Teras6a5f44d2007-10-23 20:31:53 -07001035static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1036{
1037 struct iphdr *iph = (struct iphdr*) skb_mac_header(skb);
1038 memcpy(haddr, &iph->saddr, 4);
1039 return 4;
1040}
1041
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001042static const struct header_ops ipgre_header_ops = {
1043 .create = ipgre_header,
Timo Teras6a5f44d2007-10-23 20:31:53 -07001044 .parse = ipgre_header_parse,
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001045};
1046
Timo Teras6a5f44d2007-10-23 20:31:53 -07001047#ifdef CONFIG_NET_IPGRE_BROADCAST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048static int ipgre_open(struct net_device *dev)
1049{
Patrick McHardy2941a482006-01-08 22:05:26 -08001050 struct ip_tunnel *t = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051
Joe Perchesf97c1e02007-12-16 13:45:43 -08001052 if (ipv4_is_multicast(t->parms.iph.daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 struct flowi fl = { .oif = t->parms.link,
1054 .nl_u = { .ip4_u =
1055 { .daddr = t->parms.iph.daddr,
1056 .saddr = t->parms.iph.saddr,
1057 .tos = RT_TOS(t->parms.iph.tos) } },
1058 .proto = IPPROTO_GRE };
1059 struct rtable *rt;
Pavel Emelyanov96635522008-04-16 01:10:44 -07001060 if (ip_route_output_key(dev_net(dev), &rt, &fl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 return -EADDRNOTAVAIL;
1062 dev = rt->u.dst.dev;
1063 ip_rt_put(rt);
Herbert Xue5ed6392005-10-03 14:35:55 -07001064 if (__in_dev_get_rtnl(dev) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 return -EADDRNOTAVAIL;
1066 t->mlink = dev->ifindex;
Herbert Xue5ed6392005-10-03 14:35:55 -07001067 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 }
1069 return 0;
1070}
1071
1072static int ipgre_close(struct net_device *dev)
1073{
Patrick McHardy2941a482006-01-08 22:05:26 -08001074 struct ip_tunnel *t = netdev_priv(dev);
Joe Perchesf97c1e02007-12-16 13:45:43 -08001075 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
Denis V. Lunev7fee0ca2008-01-21 17:32:38 -08001076 struct in_device *in_dev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001077 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 if (in_dev) {
1079 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1080 in_dev_put(in_dev);
1081 }
1082 }
1083 return 0;
1084}
1085
1086#endif
1087
1088static void ipgre_tunnel_setup(struct net_device *dev)
1089{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090 dev->uninit = ipgre_tunnel_uninit;
1091 dev->destructor = free_netdev;
1092 dev->hard_start_xmit = ipgre_tunnel_xmit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 dev->do_ioctl = ipgre_tunnel_ioctl;
1094 dev->change_mtu = ipgre_tunnel_change_mtu;
1095
1096 dev->type = ARPHRD_IPGRE;
Herbert Xuc95b8192008-10-09 11:58:54 -07001097 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
Kris Katterjohn46f25df2006-01-05 16:35:42 -08001098 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 dev->flags = IFF_NOARP;
1100 dev->iflink = 0;
1101 dev->addr_len = 4;
Pavel Emelyanov0b67ece2008-04-16 01:11:13 -07001102 dev->features |= NETIF_F_NETNS_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103}
1104
1105static int ipgre_tunnel_init(struct net_device *dev)
1106{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 struct ip_tunnel *tunnel;
1108 struct iphdr *iph;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109
Patrick McHardy2941a482006-01-08 22:05:26 -08001110 tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 iph = &tunnel->parms.iph;
1112
1113 tunnel->dev = dev;
1114 strcpy(tunnel->parms.name, dev->name);
1115
1116 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1117 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1118
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 if (iph->daddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120#ifdef CONFIG_NET_IPGRE_BROADCAST
Joe Perchesf97c1e02007-12-16 13:45:43 -08001121 if (ipv4_is_multicast(iph->daddr)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 if (!iph->saddr)
1123 return -EINVAL;
1124 dev->flags = IFF_BROADCAST;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -07001125 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 dev->open = ipgre_open;
1127 dev->stop = ipgre_close;
1128 }
1129#endif
Michal Schmidtee34c1e2007-12-13 09:46:32 -08001130 } else
Timo Teras6a5f44d2007-10-23 20:31:53 -07001131 dev->header_ops = &ipgre_header_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 return 0;
1134}
1135
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001136static int ipgre_fb_tunnel_init(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137{
Patrick McHardy2941a482006-01-08 22:05:26 -08001138 struct ip_tunnel *tunnel = netdev_priv(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 struct iphdr *iph = &tunnel->parms.iph;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001140 struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141
1142 tunnel->dev = dev;
1143 strcpy(tunnel->parms.name, dev->name);
1144
1145 iph->version = 4;
1146 iph->protocol = IPPROTO_GRE;
1147 iph->ihl = 5;
1148 tunnel->hlen = sizeof(struct iphdr) + 4;
1149
1150 dev_hold(dev);
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001151 ign->tunnels_wc[0] = tunnel;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 return 0;
1153}
1154
1155
1156static struct net_protocol ipgre_protocol = {
1157 .handler = ipgre_rcv,
1158 .err_handler = ipgre_err,
Pavel Emelyanovf96c1482008-04-16 01:11:36 -07001159 .netns_ok = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160};
1161
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001162static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1163{
1164 int prio;
1165
1166 for (prio = 0; prio < 4; prio++) {
1167 int h;
1168 for (h = 0; h < HASH_SIZE; h++) {
1169 struct ip_tunnel *t;
1170 while ((t = ign->tunnels[prio][h]) != NULL)
1171 unregister_netdevice(t->dev);
1172 }
1173 }
1174}
1175
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001176static int ipgre_init_net(struct net *net)
1177{
1178 int err;
1179 struct ipgre_net *ign;
1180
1181 err = -ENOMEM;
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001182 ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001183 if (ign == NULL)
1184 goto err_alloc;
1185
1186 err = net_assign_generic(net, ipgre_net_id, ign);
1187 if (err < 0)
1188 goto err_assign;
1189
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001190 ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1191 ipgre_tunnel_setup);
1192 if (!ign->fb_tunnel_dev) {
1193 err = -ENOMEM;
1194 goto err_alloc_dev;
1195 }
1196
1197 ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init;
1198 dev_net_set(ign->fb_tunnel_dev, net);
1199
1200 if ((err = register_netdev(ign->fb_tunnel_dev)))
1201 goto err_reg_dev;
1202
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001203 return 0;
1204
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001205err_reg_dev:
1206 free_netdev(ign->fb_tunnel_dev);
1207err_alloc_dev:
1208 /* nothing */
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001209err_assign:
1210 kfree(ign);
1211err_alloc:
1212 return err;
1213}
1214
1215static void ipgre_exit_net(struct net *net)
1216{
1217 struct ipgre_net *ign;
1218
1219 ign = net_generic(net, ipgre_net_id);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001220 rtnl_lock();
Pavel Emelyanoveb8ce742008-04-16 01:10:26 -07001221 ipgre_destroy_tunnels(ign);
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001222 rtnl_unlock();
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001223 kfree(ign);
1224}
1225
1226static struct pernet_operations ipgre_net_ops = {
1227 .init = ipgre_init_net,
1228 .exit = ipgre_exit_net,
1229};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230
1231/*
1232 * And now the modules code and kernel interface.
1233 */
1234
1235static int __init ipgre_init(void)
1236{
1237 int err;
1238
1239 printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1240
1241 if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
1242 printk(KERN_INFO "ipgre init: can't add protocol\n");
1243 return -EAGAIN;
1244 }
1245
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001246 err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1247 if (err < 0)
Pavel Emelyanov7daa0002008-04-16 01:10:05 -07001248 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1249
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251}
1252
Alexey Kuznetsovdb445752005-07-30 17:46:44 -07001253static void __exit ipgre_fini(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254{
1255 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1256 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1257
Pavel Emelyanov59a4c752008-04-16 01:08:53 -07001258 unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259}
1260
1261module_init(ipgre_init);
1262module_exit(ipgre_fini);
1263MODULE_LICENSE("GPL");