blob: 1a314bc77863b039e8118c2642f2ece7915433eb [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101static struct dst_ops ip6_dst_ops = {
102 .family = AF_INET6,
103 .protocol = __constant_htons(ETH_P_IPV6),
104 .gc = ip6_dst_gc,
105 .gc_thresh = 1024,
106 .check = ip6_dst_check,
107 .destroy = ip6_dst_destroy,
108 .ifdown = ip6_dst_ifdown,
109 .negative_advice = ip6_negative_advice,
110 .link_failure = ip6_link_failure,
111 .update_pmtu = ip6_rt_update_pmtu,
112 .entry_size = sizeof(struct rt6_info),
113};
114
115struct rt6_info ip6_null_entry = {
116 .u = {
117 .dst = {
118 .__refcnt = ATOMIC_INIT(1),
119 .__use = 1,
120 .dev = &loopback_dev,
121 .obsolete = -1,
122 .error = -ENETUNREACH,
123 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
124 .input = ip6_pkt_discard,
125 .output = ip6_pkt_discard_out,
126 .ops = &ip6_dst_ops,
127 .path = (struct dst_entry*)&ip6_null_entry,
128 }
129 },
130 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
131 .rt6i_metric = ~(u32) 0,
132 .rt6i_ref = ATOMIC_INIT(1),
133};
134
135struct fib6_node ip6_routing_table = {
136 .leaf = &ip6_null_entry,
137 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138};
139
140/* Protects all the ip6 fib */
141
142DEFINE_RWLOCK(rt6_lock);
143
144
145/* allocate dst with ip6_dst_ops */
146static __inline__ struct rt6_info *ip6_dst_alloc(void)
147{
148 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149}
150
151static void ip6_dst_destroy(struct dst_entry *dst)
152{
153 struct rt6_info *rt = (struct rt6_info *)dst;
154 struct inet6_dev *idev = rt->rt6i_idev;
155
156 if (idev != NULL) {
157 rt->rt6i_idev = NULL;
158 in6_dev_put(idev);
159 }
160}
161
162static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163 int how)
164{
165 struct rt6_info *rt = (struct rt6_info *)dst;
166 struct inet6_dev *idev = rt->rt6i_idev;
167
168 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170 if (loopback_idev != NULL) {
171 rt->rt6i_idev = loopback_idev;
172 in6_dev_put(idev);
173 }
174 }
175}
176
177static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178{
179 return (rt->rt6i_flags & RTF_EXPIRES &&
180 time_after(jiffies, rt->rt6i_expires));
181}
182
183/*
184 * Route lookup. Any rt6_lock is implied.
185 */
186
187static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188 int oif,
189 int strict)
190{
191 struct rt6_info *local = NULL;
192 struct rt6_info *sprt;
193
194 if (oif) {
195 for (sprt = rt; sprt; sprt = sprt->u.next) {
196 struct net_device *dev = sprt->rt6i_dev;
197 if (dev->ifindex == oif)
198 return sprt;
199 if (dev->flags & IFF_LOOPBACK) {
200 if (sprt->rt6i_idev == NULL ||
201 sprt->rt6i_idev->dev->ifindex != oif) {
202 if (strict && oif)
203 continue;
204 if (local && (!oif ||
205 local->rt6i_idev->dev->ifindex == oif))
206 continue;
207 }
208 local = sprt;
209 }
210 }
211
212 if (local)
213 return local;
214
215 if (strict)
216 return &ip6_null_entry;
217 }
218 return rt;
219}
220
221/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800222 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700223 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800224static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800226 struct net_device *dev = rt->rt6i_dev;
227 if (!oif || dev->ifindex == oif)
228 return 2;
229 if ((dev->flags & IFF_LOOPBACK) &&
230 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
231 return 1;
232 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233}
234
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800235static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800237 struct neighbour *neigh = rt->rt6i_nexthop;
238 int m = 0;
239 if (neigh) {
240 read_lock_bh(&neigh->lock);
241 if (neigh->nud_state & NUD_VALID)
242 m = 1;
243 read_unlock_bh(&neigh->lock);
244 }
245 return m;
246}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800248static int rt6_score_route(struct rt6_info *rt, int oif,
249 int strict)
250{
251 int m = rt6_check_dev(rt, oif);
252 if (!m && (strict & RT6_SELECT_F_IFACE))
253 return -1;
254 if (rt6_check_neigh(rt))
255 m |= 4;
256 else if (strict & RT6_SELECT_F_REACHABLE)
257 return -1;
258 return m;
259}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800261static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
262 int strict)
263{
264 struct rt6_info *match = NULL, *last = NULL;
265 struct rt6_info *rt, *rt0 = *head;
266 u32 metric;
267 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800269 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270 __FUNCTION__, head, head ? *head : NULL, oif);
271
272 for (rt = rt0, metric = rt0->rt6i_metric;
273 rt && rt->rt6i_metric == metric;
274 rt = rt->u.next) {
275 int m;
276
277 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 continue;
279
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800280 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800282 m = rt6_score_route(rt, oif, strict);
283 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800286 if (m > mpri) {
287 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 mpri = m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 }
290 }
291
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800292 if (!match &&
293 (strict & RT6_SELECT_F_REACHABLE) &&
294 last && last != rt0) {
295 /* no entries matched; do round-robin */
296 *head = rt0->u.next;
297 rt0->u.next = last->u.next;
298 last->u.next = rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 }
300
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800301 RT6_TRACE("%s() => %p, score=%d\n",
302 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800304 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305}
306
307struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
308 int oif, int strict)
309{
310 struct fib6_node *fn;
311 struct rt6_info *rt;
312
313 read_lock_bh(&rt6_lock);
314 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
315 rt = rt6_device_match(fn->leaf, oif, strict);
316 dst_hold(&rt->u.dst);
317 rt->u.dst.__use++;
318 read_unlock_bh(&rt6_lock);
319
320 rt->u.dst.lastuse = jiffies;
321 if (rt->u.dst.error == 0)
322 return rt;
323 dst_release(&rt->u.dst);
324 return NULL;
325}
326
327/* ip6_ins_rt is called with FREE rt6_lock.
328 It takes new route entry, the addition fails by any reason the
329 route is freed. In any case, if caller does not hold it, it may
330 be destroyed.
331 */
332
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700333int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
334 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335{
336 int err;
337
338 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700339 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 write_unlock_bh(&rt6_lock);
341
342 return err;
343}
344
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800345static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
346 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 struct rt6_info *rt;
349
350 /*
351 * Clone the route.
352 */
353
354 rt = ip6_rt_copy(ort);
355
356 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900357 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
358 if (rt->rt6i_dst.plen != 128 &&
359 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
360 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900362 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900364 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 rt->rt6i_dst.plen = 128;
366 rt->rt6i_flags |= RTF_CACHE;
367 rt->u.dst.flags |= DST_HOST;
368
369#ifdef CONFIG_IPV6_SUBTREES
370 if (rt->rt6i_src.plen && saddr) {
371 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
372 rt->rt6i_src.plen = 128;
373 }
374#endif
375
376 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
377
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800378 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800380 return rt;
381}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800383static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
384{
385 struct rt6_info *rt = ip6_rt_copy(ort);
386 if (rt) {
387 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
388 rt->rt6i_dst.plen = 128;
389 rt->rt6i_flags |= RTF_CACHE;
390 if (rt->rt6i_flags & RTF_REJECT)
391 rt->u.dst.error = ort->u.dst.error;
392 rt->u.dst.flags |= DST_HOST;
393 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
394 }
395 return rt;
396}
397
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398#define BACKTRACK() \
399if (rt == &ip6_null_entry && strict) { \
400 while ((fn = fn->parent) != NULL) { \
401 if (fn->fn_flags & RTN_ROOT) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 goto out; \
403 } \
404 if (fn->fn_flags & RTN_RTINFO) \
405 goto restart; \
406 } \
407}
408
409
410void ip6_route_input(struct sk_buff *skb)
411{
412 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800413 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 int strict;
415 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800416 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
YOSHIFUJI Hideaki118f8c12006-03-20 17:01:06 -0800418 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420relookup:
421 read_lock_bh(&rt6_lock);
422
423 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
424 &skb->nh.ipv6h->saddr);
425
426restart:
427 rt = fn->leaf;
428
YOSHIFUJI Hideaki118f8c12006-03-20 17:01:06 -0800429 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | RT6_SELECT_F_REACHABLE);
430 if (rt == &ip6_null_entry)
431 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 BACKTRACK();
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800433 if ((rt->rt6i_flags & RTF_CACHE))
434 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800436 dst_hold(&rt->u.dst);
437 read_unlock_bh(&rt6_lock);
438
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800439 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
440 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
441 else {
442#if CLONE_OFFLINK_ROUTE
443 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
444#else
445 goto out2;
446#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800449 dst_release(&rt->u.dst);
450 rt = nrt ? : &ip6_null_entry;
451
452 dst_hold(&rt->u.dst);
453 if (nrt) {
454 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
455 if (!err)
456 goto out2;
457 }
458
459 if (--attempts <= 0)
460 goto out2;
461
462 /*
463 * Race condition! In the gap, when rt6_lock was
464 * released someone could insert this route. Relookup.
465 */
466 dst_release(&rt->u.dst);
467 goto relookup;
468
469out:
470 dst_hold(&rt->u.dst);
471 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472out2:
473 rt->u.dst.lastuse = jiffies;
474 rt->u.dst.__use++;
475 skb->dst = (struct dst_entry *) rt;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800476 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477}
478
479struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
480{
481 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800482 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 int strict;
484 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800485 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800487 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488
489relookup:
490 read_lock_bh(&rt6_lock);
491
492 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
493
494restart:
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800495 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
496 if (rt == &ip6_null_entry)
497 rt = rt6_select(&fn->leaf, fl->oif, strict);
498 BACKTRACK();
499 if ((rt->rt6i_flags & RTF_CACHE))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800502 dst_hold(&rt->u.dst);
503 read_unlock_bh(&rt6_lock);
504
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800505 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800506 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800507 else {
508#if CLONE_OFFLINK_ROUTE
509 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
510#else
511 goto out2;
512#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800514
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800515 dst_release(&rt->u.dst);
516 rt = nrt ? : &ip6_null_entry;
517
518 dst_hold(&rt->u.dst);
519 if (nrt) {
520 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
521 if (!err)
522 goto out2;
523 }
524
525 if (--attempts <= 0)
526 goto out2;
527
528 /*
529 * Race condition! In the gap, when rt6_lock was
530 * released someone could insert this route. Relookup.
531 */
532 dst_release(&rt->u.dst);
533 goto relookup;
534
535out:
536 dst_hold(&rt->u.dst);
537 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538out2:
539 rt->u.dst.lastuse = jiffies;
540 rt->u.dst.__use++;
541 return &rt->u.dst;
542}
543
544
545/*
546 * Destination cache support functions
547 */
548
549static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
550{
551 struct rt6_info *rt;
552
553 rt = (struct rt6_info *) dst;
554
555 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
556 return dst;
557
558 return NULL;
559}
560
561static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
562{
563 struct rt6_info *rt = (struct rt6_info *) dst;
564
565 if (rt) {
566 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700567 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 else
569 dst_release(dst);
570 }
571 return NULL;
572}
573
574static void ip6_link_failure(struct sk_buff *skb)
575{
576 struct rt6_info *rt;
577
578 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
579
580 rt = (struct rt6_info *) skb->dst;
581 if (rt) {
582 if (rt->rt6i_flags&RTF_CACHE) {
583 dst_set_expires(&rt->u.dst, 0);
584 rt->rt6i_flags |= RTF_EXPIRES;
585 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
586 rt->rt6i_node->fn_sernum = -1;
587 }
588}
589
590static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
591{
592 struct rt6_info *rt6 = (struct rt6_info*)dst;
593
594 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
595 rt6->rt6i_flags |= RTF_MODIFIED;
596 if (mtu < IPV6_MIN_MTU) {
597 mtu = IPV6_MIN_MTU;
598 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
599 }
600 dst->metrics[RTAX_MTU-1] = mtu;
601 }
602}
603
604/* Protected by rt6_lock. */
605static struct dst_entry *ndisc_dst_gc_list;
606static int ipv6_get_mtu(struct net_device *dev);
607
608static inline unsigned int ipv6_advmss(unsigned int mtu)
609{
610 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
611
612 if (mtu < ip6_rt_min_advmss)
613 mtu = ip6_rt_min_advmss;
614
615 /*
616 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
617 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
618 * IPV6_MAXPLEN is also valid and means: "any MSS,
619 * rely only on pmtu discovery"
620 */
621 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
622 mtu = IPV6_MAXPLEN;
623 return mtu;
624}
625
626struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
627 struct neighbour *neigh,
628 struct in6_addr *addr,
629 int (*output)(struct sk_buff *))
630{
631 struct rt6_info *rt;
632 struct inet6_dev *idev = in6_dev_get(dev);
633
634 if (unlikely(idev == NULL))
635 return NULL;
636
637 rt = ip6_dst_alloc();
638 if (unlikely(rt == NULL)) {
639 in6_dev_put(idev);
640 goto out;
641 }
642
643 dev_hold(dev);
644 if (neigh)
645 neigh_hold(neigh);
646 else
647 neigh = ndisc_get_neigh(dev, addr);
648
649 rt->rt6i_dev = dev;
650 rt->rt6i_idev = idev;
651 rt->rt6i_nexthop = neigh;
652 atomic_set(&rt->u.dst.__refcnt, 1);
653 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
654 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
655 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
656 rt->u.dst.output = output;
657
658#if 0 /* there's no chance to use these for ndisc */
659 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
660 ? DST_HOST
661 : 0;
662 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
663 rt->rt6i_dst.plen = 128;
664#endif
665
666 write_lock_bh(&rt6_lock);
667 rt->u.dst.next = ndisc_dst_gc_list;
668 ndisc_dst_gc_list = &rt->u.dst;
669 write_unlock_bh(&rt6_lock);
670
671 fib6_force_start_gc();
672
673out:
674 return (struct dst_entry *)rt;
675}
676
677int ndisc_dst_gc(int *more)
678{
679 struct dst_entry *dst, *next, **pprev;
680 int freed;
681
682 next = NULL;
683 pprev = &ndisc_dst_gc_list;
684 freed = 0;
685 while ((dst = *pprev) != NULL) {
686 if (!atomic_read(&dst->__refcnt)) {
687 *pprev = dst->next;
688 dst_free(dst);
689 freed++;
690 } else {
691 pprev = &dst->next;
692 (*more)++;
693 }
694 }
695
696 return freed;
697}
698
699static int ip6_dst_gc(void)
700{
701 static unsigned expire = 30*HZ;
702 static unsigned long last_gc;
703 unsigned long now = jiffies;
704
705 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
706 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
707 goto out;
708
709 expire++;
710 fib6_run_gc(expire);
711 last_gc = now;
712 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
713 expire = ip6_rt_gc_timeout>>1;
714
715out:
716 expire -= expire>>ip6_rt_gc_elasticity;
717 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
718}
719
720/* Clean host part of a prefix. Not necessary in radix tree,
721 but results in cleaner routing tables.
722
723 Remove it only when all the things will work!
724 */
725
726static int ipv6_get_mtu(struct net_device *dev)
727{
728 int mtu = IPV6_MIN_MTU;
729 struct inet6_dev *idev;
730
731 idev = in6_dev_get(dev);
732 if (idev) {
733 mtu = idev->cnf.mtu6;
734 in6_dev_put(idev);
735 }
736 return mtu;
737}
738
739int ipv6_get_hoplimit(struct net_device *dev)
740{
741 int hoplimit = ipv6_devconf.hop_limit;
742 struct inet6_dev *idev;
743
744 idev = in6_dev_get(dev);
745 if (idev) {
746 hoplimit = idev->cnf.hop_limit;
747 in6_dev_put(idev);
748 }
749 return hoplimit;
750}
751
752/*
753 *
754 */
755
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700756int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
757 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758{
759 int err;
760 struct rtmsg *r;
761 struct rtattr **rta;
762 struct rt6_info *rt = NULL;
763 struct net_device *dev = NULL;
764 struct inet6_dev *idev = NULL;
765 int addr_type;
766
767 rta = (struct rtattr **) _rtattr;
768
769 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
770 return -EINVAL;
771#ifndef CONFIG_IPV6_SUBTREES
772 if (rtmsg->rtmsg_src_len)
773 return -EINVAL;
774#endif
775 if (rtmsg->rtmsg_ifindex) {
776 err = -ENODEV;
777 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
778 if (!dev)
779 goto out;
780 idev = in6_dev_get(dev);
781 if (!idev)
782 goto out;
783 }
784
785 if (rtmsg->rtmsg_metric == 0)
786 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
787
788 rt = ip6_dst_alloc();
789
790 if (rt == NULL) {
791 err = -ENOMEM;
792 goto out;
793 }
794
795 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800796 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 if (nlh && (r = NLMSG_DATA(nlh))) {
798 rt->rt6i_protocol = r->rtm_protocol;
799 } else {
800 rt->rt6i_protocol = RTPROT_BOOT;
801 }
802
803 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
804
805 if (addr_type & IPV6_ADDR_MULTICAST)
806 rt->u.dst.input = ip6_mc_input;
807 else
808 rt->u.dst.input = ip6_forward;
809
810 rt->u.dst.output = ip6_output;
811
812 ipv6_addr_prefix(&rt->rt6i_dst.addr,
813 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
814 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
815 if (rt->rt6i_dst.plen == 128)
816 rt->u.dst.flags = DST_HOST;
817
818#ifdef CONFIG_IPV6_SUBTREES
819 ipv6_addr_prefix(&rt->rt6i_src.addr,
820 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
821 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
822#endif
823
824 rt->rt6i_metric = rtmsg->rtmsg_metric;
825
826 /* We cannot add true routes via loopback here,
827 they would result in kernel looping; promote them to reject routes
828 */
829 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
830 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
831 /* hold loopback dev/idev if we haven't done so. */
832 if (dev != &loopback_dev) {
833 if (dev) {
834 dev_put(dev);
835 in6_dev_put(idev);
836 }
837 dev = &loopback_dev;
838 dev_hold(dev);
839 idev = in6_dev_get(dev);
840 if (!idev) {
841 err = -ENODEV;
842 goto out;
843 }
844 }
845 rt->u.dst.output = ip6_pkt_discard_out;
846 rt->u.dst.input = ip6_pkt_discard;
847 rt->u.dst.error = -ENETUNREACH;
848 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
849 goto install_route;
850 }
851
852 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
853 struct in6_addr *gw_addr;
854 int gwa_type;
855
856 gw_addr = &rtmsg->rtmsg_gateway;
857 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
858 gwa_type = ipv6_addr_type(gw_addr);
859
860 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
861 struct rt6_info *grt;
862
863 /* IPv6 strictly inhibits using not link-local
864 addresses as nexthop address.
865 Otherwise, router will not able to send redirects.
866 It is very good, but in some (rare!) circumstances
867 (SIT, PtP, NBMA NOARP links) it is handy to allow
868 some exceptions. --ANK
869 */
870 err = -EINVAL;
871 if (!(gwa_type&IPV6_ADDR_UNICAST))
872 goto out;
873
874 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
875
876 err = -EHOSTUNREACH;
877 if (grt == NULL)
878 goto out;
879 if (dev) {
880 if (dev != grt->rt6i_dev) {
881 dst_release(&grt->u.dst);
882 goto out;
883 }
884 } else {
885 dev = grt->rt6i_dev;
886 idev = grt->rt6i_idev;
887 dev_hold(dev);
888 in6_dev_hold(grt->rt6i_idev);
889 }
890 if (!(grt->rt6i_flags&RTF_GATEWAY))
891 err = 0;
892 dst_release(&grt->u.dst);
893
894 if (err)
895 goto out;
896 }
897 err = -EINVAL;
898 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
899 goto out;
900 }
901
902 err = -ENODEV;
903 if (dev == NULL)
904 goto out;
905
906 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
907 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
908 if (IS_ERR(rt->rt6i_nexthop)) {
909 err = PTR_ERR(rt->rt6i_nexthop);
910 rt->rt6i_nexthop = NULL;
911 goto out;
912 }
913 }
914
915 rt->rt6i_flags = rtmsg->rtmsg_flags;
916
917install_route:
918 if (rta && rta[RTA_METRICS-1]) {
919 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
920 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
921
922 while (RTA_OK(attr, attrlen)) {
923 unsigned flavor = attr->rta_type;
924 if (flavor) {
925 if (flavor > RTAX_MAX) {
926 err = -EINVAL;
927 goto out;
928 }
929 rt->u.dst.metrics[flavor-1] =
930 *(u32 *)RTA_DATA(attr);
931 }
932 attr = RTA_NEXT(attr, attrlen);
933 }
934 }
935
936 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
937 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
938 if (!rt->u.dst.metrics[RTAX_MTU-1])
939 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
940 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
941 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
942 rt->u.dst.dev = dev;
943 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700944 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945
946out:
947 if (dev)
948 dev_put(dev);
949 if (idev)
950 in6_dev_put(idev);
951 if (rt)
952 dst_free((struct dst_entry *) rt);
953 return err;
954}
955
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700956int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957{
958 int err;
959
960 write_lock_bh(&rt6_lock);
961
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700962 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963 dst_release(&rt->u.dst);
964
965 write_unlock_bh(&rt6_lock);
966
967 return err;
968}
969
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700970static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971{
972 struct fib6_node *fn;
973 struct rt6_info *rt;
974 int err = -ESRCH;
975
976 read_lock_bh(&rt6_lock);
977
978 fn = fib6_locate(&ip6_routing_table,
979 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
980 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
981
982 if (fn) {
983 for (rt = fn->leaf; rt; rt = rt->u.next) {
984 if (rtmsg->rtmsg_ifindex &&
985 (rt->rt6i_dev == NULL ||
986 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
987 continue;
988 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
989 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
990 continue;
991 if (rtmsg->rtmsg_metric &&
992 rtmsg->rtmsg_metric != rt->rt6i_metric)
993 continue;
994 dst_hold(&rt->u.dst);
995 read_unlock_bh(&rt6_lock);
996
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700997 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 }
999 }
1000 read_unlock_bh(&rt6_lock);
1001
1002 return err;
1003}
1004
1005/*
1006 * Handle redirects
1007 */
1008void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1009 struct neighbour *neigh, u8 *lladdr, int on_link)
1010{
1011 struct rt6_info *rt, *nrt;
1012
1013 /* Locate old route to this destination. */
1014 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1015
1016 if (rt == NULL)
1017 return;
1018
1019 if (neigh->dev != rt->rt6i_dev)
1020 goto out;
1021
1022 /*
1023 * Current route is on-link; redirect is always invalid.
1024 *
1025 * Seems, previous statement is not true. It could
1026 * be node, which looks for us as on-link (f.e. proxy ndisc)
1027 * But then router serving it might decide, that we should
1028 * know truth 8)8) --ANK (980726).
1029 */
1030 if (!(rt->rt6i_flags&RTF_GATEWAY))
1031 goto out;
1032
1033 /*
1034 * RFC 2461 specifies that redirects should only be
1035 * accepted if they come from the nexthop to the target.
1036 * Due to the way default routers are chosen, this notion
1037 * is a bit fuzzy and one might need to check all default
1038 * routers.
1039 */
1040 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1041 if (rt->rt6i_flags & RTF_DEFAULT) {
1042 struct rt6_info *rt1;
1043
1044 read_lock(&rt6_lock);
1045 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1046 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1047 dst_hold(&rt1->u.dst);
1048 dst_release(&rt->u.dst);
1049 read_unlock(&rt6_lock);
1050 rt = rt1;
1051 goto source_ok;
1052 }
1053 }
1054 read_unlock(&rt6_lock);
1055 }
1056 if (net_ratelimit())
1057 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1058 "for redirect target\n");
1059 goto out;
1060 }
1061
1062source_ok:
1063
1064 /*
1065 * We have finally decided to accept it.
1066 */
1067
1068 neigh_update(neigh, lladdr, NUD_STALE,
1069 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1070 NEIGH_UPDATE_F_OVERRIDE|
1071 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1072 NEIGH_UPDATE_F_ISROUTER))
1073 );
1074
1075 /*
1076 * Redirect received -> path was valid.
1077 * Look, redirects are sent only in response to data packets,
1078 * so that this nexthop apparently is reachable. --ANK
1079 */
1080 dst_confirm(&rt->u.dst);
1081
1082 /* Duplicate redirect: silently ignore. */
1083 if (neigh == rt->u.dst.neighbour)
1084 goto out;
1085
1086 nrt = ip6_rt_copy(rt);
1087 if (nrt == NULL)
1088 goto out;
1089
1090 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1091 if (on_link)
1092 nrt->rt6i_flags &= ~RTF_GATEWAY;
1093
1094 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1095 nrt->rt6i_dst.plen = 128;
1096 nrt->u.dst.flags |= DST_HOST;
1097
1098 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1099 nrt->rt6i_nexthop = neigh_clone(neigh);
1100 /* Reset pmtu, it may be better */
1101 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1102 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1103
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001104 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 goto out;
1106
1107 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001108 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 return;
1110 }
1111
1112out:
1113 dst_release(&rt->u.dst);
1114 return;
1115}
1116
1117/*
1118 * Handle ICMP "packet too big" messages
1119 * i.e. Path MTU discovery
1120 */
1121
1122void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1123 struct net_device *dev, u32 pmtu)
1124{
1125 struct rt6_info *rt, *nrt;
1126 int allfrag = 0;
1127
1128 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1129 if (rt == NULL)
1130 return;
1131
1132 if (pmtu >= dst_mtu(&rt->u.dst))
1133 goto out;
1134
1135 if (pmtu < IPV6_MIN_MTU) {
1136 /*
1137 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1138 * MTU (1280) and a fragment header should always be included
1139 * after a node receiving Too Big message reporting PMTU is
1140 * less than the IPv6 Minimum Link MTU.
1141 */
1142 pmtu = IPV6_MIN_MTU;
1143 allfrag = 1;
1144 }
1145
1146 /* New mtu received -> path was valid.
1147 They are sent only in response to data packets,
1148 so that this nexthop apparently is reachable. --ANK
1149 */
1150 dst_confirm(&rt->u.dst);
1151
1152 /* Host route. If it is static, it would be better
1153 not to override it, but add new one, so that
1154 when cache entry will expire old pmtu
1155 would return automatically.
1156 */
1157 if (rt->rt6i_flags & RTF_CACHE) {
1158 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1159 if (allfrag)
1160 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1161 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1162 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1163 goto out;
1164 }
1165
1166 /* Network route.
1167 Two cases are possible:
1168 1. It is connected route. Action: COW
1169 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1170 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001171 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001172 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001173 else
1174 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001175
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001176 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001177 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1178 if (allfrag)
1179 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1180
1181 /* According to RFC 1981, detecting PMTU increase shouldn't be
1182 * happened within 5 mins, the recommended timer is 10 mins.
1183 * Here this route expiration time is set to ip6_rt_mtu_expires
1184 * which is 10 mins. After 10 mins the decreased pmtu is expired
1185 * and detecting PMTU increase will be automatically happened.
1186 */
1187 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1188 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1189
1190 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192out:
1193 dst_release(&rt->u.dst);
1194}
1195
1196/*
1197 * Misc support functions
1198 */
1199
1200static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1201{
1202 struct rt6_info *rt = ip6_dst_alloc();
1203
1204 if (rt) {
1205 rt->u.dst.input = ort->u.dst.input;
1206 rt->u.dst.output = ort->u.dst.output;
1207
1208 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1209 rt->u.dst.dev = ort->u.dst.dev;
1210 if (rt->u.dst.dev)
1211 dev_hold(rt->u.dst.dev);
1212 rt->rt6i_idev = ort->rt6i_idev;
1213 if (rt->rt6i_idev)
1214 in6_dev_hold(rt->rt6i_idev);
1215 rt->u.dst.lastuse = jiffies;
1216 rt->rt6i_expires = 0;
1217
1218 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1219 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1220 rt->rt6i_metric = 0;
1221
1222 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1223#ifdef CONFIG_IPV6_SUBTREES
1224 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1225#endif
1226 }
1227 return rt;
1228}
1229
1230struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1231{
1232 struct rt6_info *rt;
1233 struct fib6_node *fn;
1234
1235 fn = &ip6_routing_table;
1236
1237 write_lock_bh(&rt6_lock);
1238 for (rt = fn->leaf; rt; rt=rt->u.next) {
1239 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001240 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1242 break;
1243 }
1244 if (rt)
1245 dst_hold(&rt->u.dst);
1246 write_unlock_bh(&rt6_lock);
1247 return rt;
1248}
1249
1250struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1251 struct net_device *dev)
1252{
1253 struct in6_rtmsg rtmsg;
1254
1255 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1256 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1257 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1258 rtmsg.rtmsg_metric = 1024;
1259 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1260
1261 rtmsg.rtmsg_ifindex = dev->ifindex;
1262
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001263 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 return rt6_get_dflt_router(gwaddr, dev);
1265}
1266
1267void rt6_purge_dflt_routers(void)
1268{
1269 struct rt6_info *rt;
1270
1271restart:
1272 read_lock_bh(&rt6_lock);
1273 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1274 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1275 dst_hold(&rt->u.dst);
1276
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 read_unlock_bh(&rt6_lock);
1278
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001279 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280
1281 goto restart;
1282 }
1283 }
1284 read_unlock_bh(&rt6_lock);
1285}
1286
1287int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1288{
1289 struct in6_rtmsg rtmsg;
1290 int err;
1291
1292 switch(cmd) {
1293 case SIOCADDRT: /* Add a route */
1294 case SIOCDELRT: /* Delete a route */
1295 if (!capable(CAP_NET_ADMIN))
1296 return -EPERM;
1297 err = copy_from_user(&rtmsg, arg,
1298 sizeof(struct in6_rtmsg));
1299 if (err)
1300 return -EFAULT;
1301
1302 rtnl_lock();
1303 switch (cmd) {
1304 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001305 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 break;
1307 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001308 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 break;
1310 default:
1311 err = -EINVAL;
1312 }
1313 rtnl_unlock();
1314
1315 return err;
1316 };
1317
1318 return -EINVAL;
1319}
1320
1321/*
1322 * Drop the packet on the floor
1323 */
1324
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001325static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326{
1327 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1328 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1329 kfree_skb(skb);
1330 return 0;
1331}
1332
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001333static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334{
1335 skb->dev = skb->dst->dev;
1336 return ip6_pkt_discard(skb);
1337}
1338
1339/*
1340 * Allocate a dst for local (unicast / anycast) address.
1341 */
1342
1343struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1344 const struct in6_addr *addr,
1345 int anycast)
1346{
1347 struct rt6_info *rt = ip6_dst_alloc();
1348
1349 if (rt == NULL)
1350 return ERR_PTR(-ENOMEM);
1351
1352 dev_hold(&loopback_dev);
1353 in6_dev_hold(idev);
1354
1355 rt->u.dst.flags = DST_HOST;
1356 rt->u.dst.input = ip6_input;
1357 rt->u.dst.output = ip6_output;
1358 rt->rt6i_dev = &loopback_dev;
1359 rt->rt6i_idev = idev;
1360 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1361 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1362 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1363 rt->u.dst.obsolete = -1;
1364
1365 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001366 if (anycast)
1367 rt->rt6i_flags |= RTF_ANYCAST;
1368 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 rt->rt6i_flags |= RTF_LOCAL;
1370 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1371 if (rt->rt6i_nexthop == NULL) {
1372 dst_free((struct dst_entry *) rt);
1373 return ERR_PTR(-ENOMEM);
1374 }
1375
1376 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1377 rt->rt6i_dst.plen = 128;
1378
1379 atomic_set(&rt->u.dst.__refcnt, 1);
1380
1381 return rt;
1382}
1383
1384static int fib6_ifdown(struct rt6_info *rt, void *arg)
1385{
1386 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1387 rt != &ip6_null_entry) {
1388 RT6_TRACE("deleted by ifdown %p\n", rt);
1389 return -1;
1390 }
1391 return 0;
1392}
1393
1394void rt6_ifdown(struct net_device *dev)
1395{
1396 write_lock_bh(&rt6_lock);
1397 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1398 write_unlock_bh(&rt6_lock);
1399}
1400
1401struct rt6_mtu_change_arg
1402{
1403 struct net_device *dev;
1404 unsigned mtu;
1405};
1406
1407static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1408{
1409 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1410 struct inet6_dev *idev;
1411
1412 /* In IPv6 pmtu discovery is not optional,
1413 so that RTAX_MTU lock cannot disable it.
1414 We still use this lock to block changes
1415 caused by addrconf/ndisc.
1416 */
1417
1418 idev = __in6_dev_get(arg->dev);
1419 if (idev == NULL)
1420 return 0;
1421
1422 /* For administrative MTU increase, there is no way to discover
1423 IPv6 PMTU increase, so PMTU increase should be updated here.
1424 Since RFC 1981 doesn't include administrative MTU increase
1425 update PMTU increase is a MUST. (i.e. jumbo frame)
1426 */
1427 /*
1428 If new MTU is less than route PMTU, this new MTU will be the
1429 lowest MTU in the path, update the route PMTU to reflect PMTU
1430 decreases; if new MTU is greater than route PMTU, and the
1431 old MTU is the lowest MTU in the path, update the route PMTU
1432 to reflect the increase. In this case if the other nodes' MTU
1433 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1434 PMTU discouvery.
1435 */
1436 if (rt->rt6i_dev == arg->dev &&
1437 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1438 (dst_mtu(&rt->u.dst) > arg->mtu ||
1439 (dst_mtu(&rt->u.dst) < arg->mtu &&
1440 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1441 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1442 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1443 return 0;
1444}
1445
1446void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1447{
1448 struct rt6_mtu_change_arg arg;
1449
1450 arg.dev = dev;
1451 arg.mtu = mtu;
1452 read_lock_bh(&rt6_lock);
1453 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1454 read_unlock_bh(&rt6_lock);
1455}
1456
1457static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1458 struct in6_rtmsg *rtmsg)
1459{
1460 memset(rtmsg, 0, sizeof(*rtmsg));
1461
1462 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1463 rtmsg->rtmsg_src_len = r->rtm_src_len;
1464 rtmsg->rtmsg_flags = RTF_UP;
1465 if (r->rtm_type == RTN_UNREACHABLE)
1466 rtmsg->rtmsg_flags |= RTF_REJECT;
1467
1468 if (rta[RTA_GATEWAY-1]) {
1469 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1470 return -EINVAL;
1471 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1472 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1473 }
1474 if (rta[RTA_DST-1]) {
1475 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1476 return -EINVAL;
1477 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1478 }
1479 if (rta[RTA_SRC-1]) {
1480 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1481 return -EINVAL;
1482 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1483 }
1484 if (rta[RTA_OIF-1]) {
1485 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1486 return -EINVAL;
1487 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1488 }
1489 if (rta[RTA_PRIORITY-1]) {
1490 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1491 return -EINVAL;
1492 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1493 }
1494 return 0;
1495}
1496
1497int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1498{
1499 struct rtmsg *r = NLMSG_DATA(nlh);
1500 struct in6_rtmsg rtmsg;
1501
1502 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1503 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001504 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505}
1506
1507int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1508{
1509 struct rtmsg *r = NLMSG_DATA(nlh);
1510 struct in6_rtmsg rtmsg;
1511
1512 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1513 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001514 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515}
1516
1517struct rt6_rtnl_dump_arg
1518{
1519 struct sk_buff *skb;
1520 struct netlink_callback *cb;
1521};
1522
1523static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001524 struct in6_addr *dst, struct in6_addr *src,
1525 int iif, int type, u32 pid, u32 seq,
1526 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527{
1528 struct rtmsg *rtm;
1529 struct nlmsghdr *nlh;
1530 unsigned char *b = skb->tail;
1531 struct rta_cacheinfo ci;
1532
1533 if (prefix) { /* user wants prefix routes only */
1534 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1535 /* success since this is not a prefix route */
1536 return 1;
1537 }
1538 }
1539
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001540 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 rtm = NLMSG_DATA(nlh);
1542 rtm->rtm_family = AF_INET6;
1543 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1544 rtm->rtm_src_len = rt->rt6i_src.plen;
1545 rtm->rtm_tos = 0;
1546 rtm->rtm_table = RT_TABLE_MAIN;
1547 if (rt->rt6i_flags&RTF_REJECT)
1548 rtm->rtm_type = RTN_UNREACHABLE;
1549 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1550 rtm->rtm_type = RTN_LOCAL;
1551 else
1552 rtm->rtm_type = RTN_UNICAST;
1553 rtm->rtm_flags = 0;
1554 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1555 rtm->rtm_protocol = rt->rt6i_protocol;
1556 if (rt->rt6i_flags&RTF_DYNAMIC)
1557 rtm->rtm_protocol = RTPROT_REDIRECT;
1558 else if (rt->rt6i_flags & RTF_ADDRCONF)
1559 rtm->rtm_protocol = RTPROT_KERNEL;
1560 else if (rt->rt6i_flags&RTF_DEFAULT)
1561 rtm->rtm_protocol = RTPROT_RA;
1562
1563 if (rt->rt6i_flags&RTF_CACHE)
1564 rtm->rtm_flags |= RTM_F_CLONED;
1565
1566 if (dst) {
1567 RTA_PUT(skb, RTA_DST, 16, dst);
1568 rtm->rtm_dst_len = 128;
1569 } else if (rtm->rtm_dst_len)
1570 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1571#ifdef CONFIG_IPV6_SUBTREES
1572 if (src) {
1573 RTA_PUT(skb, RTA_SRC, 16, src);
1574 rtm->rtm_src_len = 128;
1575 } else if (rtm->rtm_src_len)
1576 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1577#endif
1578 if (iif)
1579 RTA_PUT(skb, RTA_IIF, 4, &iif);
1580 else if (dst) {
1581 struct in6_addr saddr_buf;
1582 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1583 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1584 }
1585 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1586 goto rtattr_failure;
1587 if (rt->u.dst.neighbour)
1588 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1589 if (rt->u.dst.dev)
1590 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1591 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1592 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1593 if (rt->rt6i_expires)
1594 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1595 else
1596 ci.rta_expires = 0;
1597 ci.rta_used = rt->u.dst.__use;
1598 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1599 ci.rta_error = rt->u.dst.error;
1600 ci.rta_id = 0;
1601 ci.rta_ts = 0;
1602 ci.rta_tsage = 0;
1603 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1604 nlh->nlmsg_len = skb->tail - b;
1605 return skb->len;
1606
1607nlmsg_failure:
1608rtattr_failure:
1609 skb_trim(skb, b - skb->data);
1610 return -1;
1611}
1612
1613static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1614{
1615 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1616 int prefix;
1617
1618 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1619 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1620 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1621 } else
1622 prefix = 0;
1623
1624 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1625 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001626 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627}
1628
1629static int fib6_dump_node(struct fib6_walker_t *w)
1630{
1631 int res;
1632 struct rt6_info *rt;
1633
1634 for (rt = w->leaf; rt; rt = rt->u.next) {
1635 res = rt6_dump_route(rt, w->args);
1636 if (res < 0) {
1637 /* Frame is full, suspend walking */
1638 w->leaf = rt;
1639 return 1;
1640 }
1641 BUG_TRAP(res!=0);
1642 }
1643 w->leaf = NULL;
1644 return 0;
1645}
1646
1647static void fib6_dump_end(struct netlink_callback *cb)
1648{
1649 struct fib6_walker_t *w = (void*)cb->args[0];
1650
1651 if (w) {
1652 cb->args[0] = 0;
1653 fib6_walker_unlink(w);
1654 kfree(w);
1655 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001656 cb->done = (void*)cb->args[1];
1657 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658}
1659
1660static int fib6_dump_done(struct netlink_callback *cb)
1661{
1662 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001663 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664}
1665
1666int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1667{
1668 struct rt6_rtnl_dump_arg arg;
1669 struct fib6_walker_t *w;
1670 int res;
1671
1672 arg.skb = skb;
1673 arg.cb = cb;
1674
1675 w = (void*)cb->args[0];
1676 if (w == NULL) {
1677 /* New dump:
1678 *
1679 * 1. hook callback destructor.
1680 */
1681 cb->args[1] = (long)cb->done;
1682 cb->done = fib6_dump_done;
1683
1684 /*
1685 * 2. allocate and initialize walker.
1686 */
David S. Miller9e147a12005-11-17 16:52:51 -08001687 w = kmalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 if (w == NULL)
1689 return -ENOMEM;
1690 RT6_TRACE("dump<%p", w);
1691 memset(w, 0, sizeof(*w));
1692 w->root = &ip6_routing_table;
1693 w->func = fib6_dump_node;
1694 w->args = &arg;
1695 cb->args[0] = (long)w;
1696 read_lock_bh(&rt6_lock);
1697 res = fib6_walk(w);
1698 read_unlock_bh(&rt6_lock);
1699 } else {
1700 w->args = &arg;
1701 read_lock_bh(&rt6_lock);
1702 res = fib6_walk_continue(w);
1703 read_unlock_bh(&rt6_lock);
1704 }
1705#if RT6_DEBUG >= 3
1706 if (res <= 0 && skb->len == 0)
1707 RT6_TRACE("%p>dump end\n", w);
1708#endif
1709 res = res < 0 ? res : skb->len;
1710 /* res < 0 is an error. (really, impossible)
1711 res == 0 means that dump is complete, but skb still can contain data.
1712 res > 0 dump is not complete, but frame is full.
1713 */
1714 /* Destroy walker, if dump of this table is complete. */
1715 if (res <= 0)
1716 fib6_dump_end(cb);
1717 return res;
1718}
1719
1720int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1721{
1722 struct rtattr **rta = arg;
1723 int iif = 0;
1724 int err = -ENOBUFS;
1725 struct sk_buff *skb;
1726 struct flowi fl;
1727 struct rt6_info *rt;
1728
1729 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1730 if (skb == NULL)
1731 goto out;
1732
1733 /* Reserve room for dummy headers, this skb can pass
1734 through good chunk of routing engine.
1735 */
1736 skb->mac.raw = skb->data;
1737 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1738
1739 memset(&fl, 0, sizeof(fl));
1740 if (rta[RTA_SRC-1])
1741 ipv6_addr_copy(&fl.fl6_src,
1742 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1743 if (rta[RTA_DST-1])
1744 ipv6_addr_copy(&fl.fl6_dst,
1745 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1746
1747 if (rta[RTA_IIF-1])
1748 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1749
1750 if (iif) {
1751 struct net_device *dev;
1752 dev = __dev_get_by_index(iif);
1753 if (!dev) {
1754 err = -ENODEV;
1755 goto out_free;
1756 }
1757 }
1758
1759 fl.oif = 0;
1760 if (rta[RTA_OIF-1])
1761 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1762
1763 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1764
1765 skb->dst = &rt->u.dst;
1766
1767 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1768 err = rt6_fill_node(skb, rt,
1769 &fl.fl6_dst, &fl.fl6_src,
1770 iif,
1771 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001772 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773 if (err < 0) {
1774 err = -EMSGSIZE;
1775 goto out_free;
1776 }
1777
1778 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1779 if (err > 0)
1780 err = 0;
1781out:
1782 return err;
1783out_free:
1784 kfree_skb(skb);
1785 goto out;
1786}
1787
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001788void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1789 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790{
1791 struct sk_buff *skb;
1792 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001793 u32 pid = current->pid;
1794 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001796 if (req)
1797 pid = req->pid;
1798 if (nlh)
1799 seq = nlh->nlmsg_seq;
1800
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 skb = alloc_skb(size, gfp_any());
1802 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001803 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 return;
1805 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001806 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07001808 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 return;
1810 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07001811 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1812 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813}
1814
1815/*
1816 * /proc
1817 */
1818
1819#ifdef CONFIG_PROC_FS
1820
1821#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1822
1823struct rt6_proc_arg
1824{
1825 char *buffer;
1826 int offset;
1827 int length;
1828 int skip;
1829 int len;
1830};
1831
1832static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1833{
1834 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1835 int i;
1836
1837 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1838 arg->skip++;
1839 return 0;
1840 }
1841
1842 if (arg->len >= arg->length)
1843 return 0;
1844
1845 for (i=0; i<16; i++) {
1846 sprintf(arg->buffer + arg->len, "%02x",
1847 rt->rt6i_dst.addr.s6_addr[i]);
1848 arg->len += 2;
1849 }
1850 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1851 rt->rt6i_dst.plen);
1852
1853#ifdef CONFIG_IPV6_SUBTREES
1854 for (i=0; i<16; i++) {
1855 sprintf(arg->buffer + arg->len, "%02x",
1856 rt->rt6i_src.addr.s6_addr[i]);
1857 arg->len += 2;
1858 }
1859 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1860 rt->rt6i_src.plen);
1861#else
1862 sprintf(arg->buffer + arg->len,
1863 "00000000000000000000000000000000 00 ");
1864 arg->len += 36;
1865#endif
1866
1867 if (rt->rt6i_nexthop) {
1868 for (i=0; i<16; i++) {
1869 sprintf(arg->buffer + arg->len, "%02x",
1870 rt->rt6i_nexthop->primary_key[i]);
1871 arg->len += 2;
1872 }
1873 } else {
1874 sprintf(arg->buffer + arg->len,
1875 "00000000000000000000000000000000");
1876 arg->len += 32;
1877 }
1878 arg->len += sprintf(arg->buffer + arg->len,
1879 " %08x %08x %08x %08x %8s\n",
1880 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1881 rt->u.dst.__use, rt->rt6i_flags,
1882 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1883 return 0;
1884}
1885
1886static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1887{
1888 struct rt6_proc_arg arg;
1889 arg.buffer = buffer;
1890 arg.offset = offset;
1891 arg.length = length;
1892 arg.skip = 0;
1893 arg.len = 0;
1894
1895 read_lock_bh(&rt6_lock);
1896 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1897 read_unlock_bh(&rt6_lock);
1898
1899 *start = buffer;
1900 if (offset)
1901 *start += offset % RT6_INFO_LEN;
1902
1903 arg.len -= offset % RT6_INFO_LEN;
1904
1905 if (arg.len > length)
1906 arg.len = length;
1907 if (arg.len < 0)
1908 arg.len = 0;
1909
1910 return arg.len;
1911}
1912
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1914{
1915 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1916 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1917 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1918 rt6_stats.fib_rt_cache,
1919 atomic_read(&ip6_dst_ops.entries),
1920 rt6_stats.fib_discarded_routes);
1921
1922 return 0;
1923}
1924
1925static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1926{
1927 return single_open(file, rt6_stats_seq_show, NULL);
1928}
1929
1930static struct file_operations rt6_stats_seq_fops = {
1931 .owner = THIS_MODULE,
1932 .open = rt6_stats_seq_open,
1933 .read = seq_read,
1934 .llseek = seq_lseek,
1935 .release = single_release,
1936};
1937#endif /* CONFIG_PROC_FS */
1938
1939#ifdef CONFIG_SYSCTL
1940
1941static int flush_delay;
1942
1943static
1944int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1945 void __user *buffer, size_t *lenp, loff_t *ppos)
1946{
1947 if (write) {
1948 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1949 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1950 return 0;
1951 } else
1952 return -EINVAL;
1953}
1954
1955ctl_table ipv6_route_table[] = {
1956 {
1957 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1958 .procname = "flush",
1959 .data = &flush_delay,
1960 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07001961 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 .proc_handler = &ipv6_sysctl_rtcache_flush
1963 },
1964 {
1965 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1966 .procname = "gc_thresh",
1967 .data = &ip6_dst_ops.gc_thresh,
1968 .maxlen = sizeof(int),
1969 .mode = 0644,
1970 .proc_handler = &proc_dointvec,
1971 },
1972 {
1973 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1974 .procname = "max_size",
1975 .data = &ip6_rt_max_size,
1976 .maxlen = sizeof(int),
1977 .mode = 0644,
1978 .proc_handler = &proc_dointvec,
1979 },
1980 {
1981 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1982 .procname = "gc_min_interval",
1983 .data = &ip6_rt_gc_min_interval,
1984 .maxlen = sizeof(int),
1985 .mode = 0644,
1986 .proc_handler = &proc_dointvec_jiffies,
1987 .strategy = &sysctl_jiffies,
1988 },
1989 {
1990 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
1991 .procname = "gc_timeout",
1992 .data = &ip6_rt_gc_timeout,
1993 .maxlen = sizeof(int),
1994 .mode = 0644,
1995 .proc_handler = &proc_dointvec_jiffies,
1996 .strategy = &sysctl_jiffies,
1997 },
1998 {
1999 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2000 .procname = "gc_interval",
2001 .data = &ip6_rt_gc_interval,
2002 .maxlen = sizeof(int),
2003 .mode = 0644,
2004 .proc_handler = &proc_dointvec_jiffies,
2005 .strategy = &sysctl_jiffies,
2006 },
2007 {
2008 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2009 .procname = "gc_elasticity",
2010 .data = &ip6_rt_gc_elasticity,
2011 .maxlen = sizeof(int),
2012 .mode = 0644,
2013 .proc_handler = &proc_dointvec_jiffies,
2014 .strategy = &sysctl_jiffies,
2015 },
2016 {
2017 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2018 .procname = "mtu_expires",
2019 .data = &ip6_rt_mtu_expires,
2020 .maxlen = sizeof(int),
2021 .mode = 0644,
2022 .proc_handler = &proc_dointvec_jiffies,
2023 .strategy = &sysctl_jiffies,
2024 },
2025 {
2026 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2027 .procname = "min_adv_mss",
2028 .data = &ip6_rt_min_advmss,
2029 .maxlen = sizeof(int),
2030 .mode = 0644,
2031 .proc_handler = &proc_dointvec_jiffies,
2032 .strategy = &sysctl_jiffies,
2033 },
2034 {
2035 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2036 .procname = "gc_min_interval_ms",
2037 .data = &ip6_rt_gc_min_interval,
2038 .maxlen = sizeof(int),
2039 .mode = 0644,
2040 .proc_handler = &proc_dointvec_ms_jiffies,
2041 .strategy = &sysctl_ms_jiffies,
2042 },
2043 { .ctl_name = 0 }
2044};
2045
2046#endif
2047
2048void __init ip6_route_init(void)
2049{
2050 struct proc_dir_entry *p;
2051
2052 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2053 sizeof(struct rt6_info),
2054 0, SLAB_HWCACHE_ALIGN,
2055 NULL, NULL);
2056 if (!ip6_dst_ops.kmem_cachep)
2057 panic("cannot create ip6_dst_cache");
2058
2059 fib6_init();
2060#ifdef CONFIG_PROC_FS
2061 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2062 if (p)
2063 p->owner = THIS_MODULE;
2064
2065 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2066#endif
2067#ifdef CONFIG_XFRM
2068 xfrm6_init();
2069#endif
2070}
2071
2072void ip6_route_cleanup(void)
2073{
2074#ifdef CONFIG_PROC_FS
2075 proc_net_remove("ipv6_route");
2076 proc_net_remove("rt6_stats");
2077#endif
2078#ifdef CONFIG_XFRM
2079 xfrm6_fini();
2080#endif
2081 rt6_ifdown(NULL);
2082 fib6_gc_cleanup();
2083 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2084}