blob: e16c9825c4dc1d689f599f0d66c8eb260e1c70bd [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
143struct fib6_node ip6_routing_table = {
144 .leaf = &ip6_null_entry,
145 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
146};
147
148/* Protects all the ip6 fib */
149
150DEFINE_RWLOCK(rt6_lock);
151
152
153/* allocate dst with ip6_dst_ops */
154static __inline__ struct rt6_info *ip6_dst_alloc(void)
155{
156 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
157}
158
159static void ip6_dst_destroy(struct dst_entry *dst)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (idev != NULL) {
165 rt->rt6i_idev = NULL;
166 in6_dev_put(idev);
167 }
168}
169
170static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
171 int how)
172{
173 struct rt6_info *rt = (struct rt6_info *)dst;
174 struct inet6_dev *idev = rt->rt6i_idev;
175
176 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
177 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
178 if (loopback_idev != NULL) {
179 rt->rt6i_idev = loopback_idev;
180 in6_dev_put(idev);
181 }
182 }
183}
184
185static __inline__ int rt6_check_expired(const struct rt6_info *rt)
186{
187 return (rt->rt6i_flags & RTF_EXPIRES &&
188 time_after(jiffies, rt->rt6i_expires));
189}
190
191/*
192 * Route lookup. Any rt6_lock is implied.
193 */
194
195static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
196 int oif,
197 int strict)
198{
199 struct rt6_info *local = NULL;
200 struct rt6_info *sprt;
201
202 if (oif) {
203 for (sprt = rt; sprt; sprt = sprt->u.next) {
204 struct net_device *dev = sprt->rt6i_dev;
205 if (dev->ifindex == oif)
206 return sprt;
207 if (dev->flags & IFF_LOOPBACK) {
208 if (sprt->rt6i_idev == NULL ||
209 sprt->rt6i_idev->dev->ifindex != oif) {
210 if (strict && oif)
211 continue;
212 if (local && (!oif ||
213 local->rt6i_idev->dev->ifindex == oif))
214 continue;
215 }
216 local = sprt;
217 }
218 }
219
220 if (local)
221 return local;
222
223 if (strict)
224 return &ip6_null_entry;
225 }
226 return rt;
227}
228
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800229#ifdef CONFIG_IPV6_ROUTER_PREF
230static void rt6_probe(struct rt6_info *rt)
231{
232 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 /*
234 * Okay, this does not seem to be appropriate
235 * for now, however, we need to check if it
236 * is really so; aka Router Reachability Probing.
237 *
238 * Router Reachability Probe MUST be rate-limited
239 * to no more than one per minute.
240 */
241 if (!neigh || (neigh->nud_state & NUD_VALID))
242 return;
243 read_lock_bh(&neigh->lock);
244 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800245 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800246 struct in6_addr mcaddr;
247 struct in6_addr *target;
248
249 neigh->updated = jiffies;
250 read_unlock_bh(&neigh->lock);
251
252 target = (struct in6_addr *)&neigh->primary_key;
253 addrconf_addr_solict_mult(target, &mcaddr);
254 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 } else
256 read_unlock_bh(&neigh->lock);
257}
258#else
259static inline void rt6_probe(struct rt6_info *rt)
260{
261 return;
262}
263#endif
264
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800266 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800268static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800270 struct net_device *dev = rt->rt6i_dev;
271 if (!oif || dev->ifindex == oif)
272 return 2;
273 if ((dev->flags & IFF_LOOPBACK) &&
274 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
275 return 1;
276 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277}
278
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800279static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800281 struct neighbour *neigh = rt->rt6i_nexthop;
282 int m = 0;
283 if (neigh) {
284 read_lock_bh(&neigh->lock);
285 if (neigh->nud_state & NUD_VALID)
286 m = 1;
287 read_unlock_bh(&neigh->lock);
288 }
289 return m;
290}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800292static int rt6_score_route(struct rt6_info *rt, int oif,
293 int strict)
294{
295 int m = rt6_check_dev(rt, oif);
296 if (!m && (strict & RT6_SELECT_F_IFACE))
297 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800298#ifdef CONFIG_IPV6_ROUTER_PREF
299 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
300#endif
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800301 if (rt6_check_neigh(rt))
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800302 m |= 16;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800303 else if (strict & RT6_SELECT_F_REACHABLE)
304 return -1;
305 return m;
306}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800308static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
309 int strict)
310{
311 struct rt6_info *match = NULL, *last = NULL;
312 struct rt6_info *rt, *rt0 = *head;
313 u32 metric;
314 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800316 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
317 __FUNCTION__, head, head ? *head : NULL, oif);
318
319 for (rt = rt0, metric = rt0->rt6i_metric;
320 rt && rt->rt6i_metric == metric;
321 rt = rt->u.next) {
322 int m;
323
324 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 continue;
326
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 m = rt6_score_route(rt, oif, strict);
330 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800333 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800334 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800337 } else {
338 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 }
340 }
341
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800342 if (!match &&
343 (strict & RT6_SELECT_F_REACHABLE) &&
344 last && last != rt0) {
345 /* no entries matched; do round-robin */
346 *head = rt0->u.next;
347 rt0->u.next = last->u.next;
348 last->u.next = rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 }
350
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800351 RT6_TRACE("%s() => %p, score=%d\n",
352 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355}
356
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800357#ifdef CONFIG_IPV6_ROUTE_INFO
358int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
359 struct in6_addr *gwaddr)
360{
361 struct route_info *rinfo = (struct route_info *) opt;
362 struct in6_addr prefix_buf, *prefix;
363 unsigned int pref;
364 u32 lifetime;
365 struct rt6_info *rt;
366
367 if (len < sizeof(struct route_info)) {
368 return -EINVAL;
369 }
370
371 /* Sanity check for prefix_len and length */
372 if (rinfo->length > 3) {
373 return -EINVAL;
374 } else if (rinfo->prefix_len > 128) {
375 return -EINVAL;
376 } else if (rinfo->prefix_len > 64) {
377 if (rinfo->length < 2) {
378 return -EINVAL;
379 }
380 } else if (rinfo->prefix_len > 0) {
381 if (rinfo->length < 1) {
382 return -EINVAL;
383 }
384 }
385
386 pref = rinfo->route_pref;
387 if (pref == ICMPV6_ROUTER_PREF_INVALID)
388 pref = ICMPV6_ROUTER_PREF_MEDIUM;
389
390 lifetime = htonl(rinfo->lifetime);
391 if (lifetime == 0xffffffff) {
392 /* infinity */
393 } else if (lifetime > 0x7fffffff/HZ) {
394 /* Avoid arithmetic overflow */
395 lifetime = 0x7fffffff/HZ - 1;
396 }
397
398 if (rinfo->length == 3)
399 prefix = (struct in6_addr *)rinfo->prefix;
400 else {
401 /* this function is safe */
402 ipv6_addr_prefix(&prefix_buf,
403 (struct in6_addr *)rinfo->prefix,
404 rinfo->prefix_len);
405 prefix = &prefix_buf;
406 }
407
408 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
409
410 if (rt && !lifetime) {
411 ip6_del_rt(rt, NULL, NULL, NULL);
412 rt = NULL;
413 }
414
415 if (!rt && lifetime)
416 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
417 pref);
418 else if (rt)
419 rt->rt6i_flags = RTF_ROUTEINFO |
420 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
421
422 if (rt) {
423 if (lifetime == 0xffffffff) {
424 rt->rt6i_flags &= ~RTF_EXPIRES;
425 } else {
426 rt->rt6i_expires = jiffies + HZ * lifetime;
427 rt->rt6i_flags |= RTF_EXPIRES;
428 }
429 dst_release(&rt->u.dst);
430 }
431 return 0;
432}
433#endif
434
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
436 int oif, int strict)
437{
438 struct fib6_node *fn;
439 struct rt6_info *rt;
440
441 read_lock_bh(&rt6_lock);
442 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
443 rt = rt6_device_match(fn->leaf, oif, strict);
444 dst_hold(&rt->u.dst);
445 rt->u.dst.__use++;
446 read_unlock_bh(&rt6_lock);
447
448 rt->u.dst.lastuse = jiffies;
449 if (rt->u.dst.error == 0)
450 return rt;
451 dst_release(&rt->u.dst);
452 return NULL;
453}
454
455/* ip6_ins_rt is called with FREE rt6_lock.
456 It takes new route entry, the addition fails by any reason the
457 route is freed. In any case, if caller does not hold it, it may
458 be destroyed.
459 */
460
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700461int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
462 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463{
464 int err;
465
466 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700467 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468 write_unlock_bh(&rt6_lock);
469
470 return err;
471}
472
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800473static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
474 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 struct rt6_info *rt;
477
478 /*
479 * Clone the route.
480 */
481
482 rt = ip6_rt_copy(ort);
483
484 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900485 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
486 if (rt->rt6i_dst.plen != 128 &&
487 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
488 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700489 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900490 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900492 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 rt->rt6i_dst.plen = 128;
494 rt->rt6i_flags |= RTF_CACHE;
495 rt->u.dst.flags |= DST_HOST;
496
497#ifdef CONFIG_IPV6_SUBTREES
498 if (rt->rt6i_src.plen && saddr) {
499 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
500 rt->rt6i_src.plen = 128;
501 }
502#endif
503
504 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
505
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800506 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800508 return rt;
509}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800511static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
512{
513 struct rt6_info *rt = ip6_rt_copy(ort);
514 if (rt) {
515 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
516 rt->rt6i_dst.plen = 128;
517 rt->rt6i_flags |= RTF_CACHE;
518 if (rt->rt6i_flags & RTF_REJECT)
519 rt->u.dst.error = ort->u.dst.error;
520 rt->u.dst.flags |= DST_HOST;
521 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
522 }
523 return rt;
524}
525
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526#define BACKTRACK() \
YOSHIFUJI Hideakibb133962006-03-20 17:01:43 -0800527if (rt == &ip6_null_entry) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 while ((fn = fn->parent) != NULL) { \
529 if (fn->fn_flags & RTN_ROOT) { \
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 goto out; \
531 } \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
534 } \
535}
536
537
538void ip6_route_input(struct sk_buff *skb)
539{
540 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800541 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 int strict;
543 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800544 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800545 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546
YOSHIFUJI Hideaki118f8c12006-03-20 17:01:06 -0800547 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548
549relookup:
550 read_lock_bh(&rt6_lock);
551
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800552restart_2:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
554 &skb->nh.ipv6h->saddr);
555
556restart:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800557 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800559 if (rt == &ip6_null_entry ||
560 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800561 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800563 dst_hold(&rt->u.dst);
564 read_unlock_bh(&rt6_lock);
565
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800566 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
567 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
568 else {
569#if CLONE_OFFLINK_ROUTE
570 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
571#else
572 goto out2;
573#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800576 dst_release(&rt->u.dst);
577 rt = nrt ? : &ip6_null_entry;
578
579 dst_hold(&rt->u.dst);
580 if (nrt) {
581 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
582 if (!err)
583 goto out2;
584 }
585
586 if (--attempts <= 0)
587 goto out2;
588
589 /*
590 * Race condition! In the gap, when rt6_lock was
591 * released someone could insert this route. Relookup.
592 */
593 dst_release(&rt->u.dst);
594 goto relookup;
595
596out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800597 if (reachable) {
598 reachable = 0;
599 goto restart_2;
600 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800601 dst_hold(&rt->u.dst);
602 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603out2:
604 rt->u.dst.lastuse = jiffies;
605 rt->u.dst.__use++;
606 skb->dst = (struct dst_entry *) rt;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800607 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608}
609
610struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
611{
612 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800613 struct rt6_info *rt, *nrt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 int strict;
615 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800616 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800617 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800619 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620
621relookup:
622 read_lock_bh(&rt6_lock);
623
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800624restart_2:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
626
627restart:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800628 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800629 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800630 if (rt == &ip6_null_entry ||
631 rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800634 dst_hold(&rt->u.dst);
635 read_unlock_bh(&rt6_lock);
636
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800637 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800638 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800639 else {
640#if CLONE_OFFLINK_ROUTE
641 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
642#else
643 goto out2;
644#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800646
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800647 dst_release(&rt->u.dst);
648 rt = nrt ? : &ip6_null_entry;
649
650 dst_hold(&rt->u.dst);
651 if (nrt) {
652 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
653 if (!err)
654 goto out2;
655 }
656
657 if (--attempts <= 0)
658 goto out2;
659
660 /*
661 * Race condition! In the gap, when rt6_lock was
662 * released someone could insert this route. Relookup.
663 */
664 dst_release(&rt->u.dst);
665 goto relookup;
666
667out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800668 if (reachable) {
669 reachable = 0;
670 goto restart_2;
671 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800672 dst_hold(&rt->u.dst);
673 read_unlock_bh(&rt6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674out2:
675 rt->u.dst.lastuse = jiffies;
676 rt->u.dst.__use++;
677 return &rt->u.dst;
678}
679
680
681/*
682 * Destination cache support functions
683 */
684
685static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
686{
687 struct rt6_info *rt;
688
689 rt = (struct rt6_info *) dst;
690
691 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
692 return dst;
693
694 return NULL;
695}
696
697static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
698{
699 struct rt6_info *rt = (struct rt6_info *) dst;
700
701 if (rt) {
702 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700703 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 else
705 dst_release(dst);
706 }
707 return NULL;
708}
709
710static void ip6_link_failure(struct sk_buff *skb)
711{
712 struct rt6_info *rt;
713
714 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
715
716 rt = (struct rt6_info *) skb->dst;
717 if (rt) {
718 if (rt->rt6i_flags&RTF_CACHE) {
719 dst_set_expires(&rt->u.dst, 0);
720 rt->rt6i_flags |= RTF_EXPIRES;
721 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
722 rt->rt6i_node->fn_sernum = -1;
723 }
724}
725
726static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
727{
728 struct rt6_info *rt6 = (struct rt6_info*)dst;
729
730 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
731 rt6->rt6i_flags |= RTF_MODIFIED;
732 if (mtu < IPV6_MIN_MTU) {
733 mtu = IPV6_MIN_MTU;
734 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
735 }
736 dst->metrics[RTAX_MTU-1] = mtu;
737 }
738}
739
740/* Protected by rt6_lock. */
741static struct dst_entry *ndisc_dst_gc_list;
742static int ipv6_get_mtu(struct net_device *dev);
743
744static inline unsigned int ipv6_advmss(unsigned int mtu)
745{
746 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
747
748 if (mtu < ip6_rt_min_advmss)
749 mtu = ip6_rt_min_advmss;
750
751 /*
752 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
753 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
754 * IPV6_MAXPLEN is also valid and means: "any MSS,
755 * rely only on pmtu discovery"
756 */
757 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
758 mtu = IPV6_MAXPLEN;
759 return mtu;
760}
761
762struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
763 struct neighbour *neigh,
764 struct in6_addr *addr,
765 int (*output)(struct sk_buff *))
766{
767 struct rt6_info *rt;
768 struct inet6_dev *idev = in6_dev_get(dev);
769
770 if (unlikely(idev == NULL))
771 return NULL;
772
773 rt = ip6_dst_alloc();
774 if (unlikely(rt == NULL)) {
775 in6_dev_put(idev);
776 goto out;
777 }
778
779 dev_hold(dev);
780 if (neigh)
781 neigh_hold(neigh);
782 else
783 neigh = ndisc_get_neigh(dev, addr);
784
785 rt->rt6i_dev = dev;
786 rt->rt6i_idev = idev;
787 rt->rt6i_nexthop = neigh;
788 atomic_set(&rt->u.dst.__refcnt, 1);
789 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
790 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
791 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
792 rt->u.dst.output = output;
793
794#if 0 /* there's no chance to use these for ndisc */
795 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
796 ? DST_HOST
797 : 0;
798 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
799 rt->rt6i_dst.plen = 128;
800#endif
801
802 write_lock_bh(&rt6_lock);
803 rt->u.dst.next = ndisc_dst_gc_list;
804 ndisc_dst_gc_list = &rt->u.dst;
805 write_unlock_bh(&rt6_lock);
806
807 fib6_force_start_gc();
808
809out:
810 return (struct dst_entry *)rt;
811}
812
813int ndisc_dst_gc(int *more)
814{
815 struct dst_entry *dst, *next, **pprev;
816 int freed;
817
818 next = NULL;
819 pprev = &ndisc_dst_gc_list;
820 freed = 0;
821 while ((dst = *pprev) != NULL) {
822 if (!atomic_read(&dst->__refcnt)) {
823 *pprev = dst->next;
824 dst_free(dst);
825 freed++;
826 } else {
827 pprev = &dst->next;
828 (*more)++;
829 }
830 }
831
832 return freed;
833}
834
835static int ip6_dst_gc(void)
836{
837 static unsigned expire = 30*HZ;
838 static unsigned long last_gc;
839 unsigned long now = jiffies;
840
841 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
842 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
843 goto out;
844
845 expire++;
846 fib6_run_gc(expire);
847 last_gc = now;
848 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
849 expire = ip6_rt_gc_timeout>>1;
850
851out:
852 expire -= expire>>ip6_rt_gc_elasticity;
853 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
854}
855
856/* Clean host part of a prefix. Not necessary in radix tree,
857 but results in cleaner routing tables.
858
859 Remove it only when all the things will work!
860 */
861
862static int ipv6_get_mtu(struct net_device *dev)
863{
864 int mtu = IPV6_MIN_MTU;
865 struct inet6_dev *idev;
866
867 idev = in6_dev_get(dev);
868 if (idev) {
869 mtu = idev->cnf.mtu6;
870 in6_dev_put(idev);
871 }
872 return mtu;
873}
874
875int ipv6_get_hoplimit(struct net_device *dev)
876{
877 int hoplimit = ipv6_devconf.hop_limit;
878 struct inet6_dev *idev;
879
880 idev = in6_dev_get(dev);
881 if (idev) {
882 hoplimit = idev->cnf.hop_limit;
883 in6_dev_put(idev);
884 }
885 return hoplimit;
886}
887
888/*
889 *
890 */
891
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700892int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
893 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894{
895 int err;
896 struct rtmsg *r;
897 struct rtattr **rta;
898 struct rt6_info *rt = NULL;
899 struct net_device *dev = NULL;
900 struct inet6_dev *idev = NULL;
901 int addr_type;
902
903 rta = (struct rtattr **) _rtattr;
904
905 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
906 return -EINVAL;
907#ifndef CONFIG_IPV6_SUBTREES
908 if (rtmsg->rtmsg_src_len)
909 return -EINVAL;
910#endif
911 if (rtmsg->rtmsg_ifindex) {
912 err = -ENODEV;
913 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
914 if (!dev)
915 goto out;
916 idev = in6_dev_get(dev);
917 if (!idev)
918 goto out;
919 }
920
921 if (rtmsg->rtmsg_metric == 0)
922 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
923
924 rt = ip6_dst_alloc();
925
926 if (rt == NULL) {
927 err = -ENOMEM;
928 goto out;
929 }
930
931 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800932 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933 if (nlh && (r = NLMSG_DATA(nlh))) {
934 rt->rt6i_protocol = r->rtm_protocol;
935 } else {
936 rt->rt6i_protocol = RTPROT_BOOT;
937 }
938
939 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
940
941 if (addr_type & IPV6_ADDR_MULTICAST)
942 rt->u.dst.input = ip6_mc_input;
943 else
944 rt->u.dst.input = ip6_forward;
945
946 rt->u.dst.output = ip6_output;
947
948 ipv6_addr_prefix(&rt->rt6i_dst.addr,
949 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
950 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
951 if (rt->rt6i_dst.plen == 128)
952 rt->u.dst.flags = DST_HOST;
953
954#ifdef CONFIG_IPV6_SUBTREES
955 ipv6_addr_prefix(&rt->rt6i_src.addr,
956 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
957 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
958#endif
959
960 rt->rt6i_metric = rtmsg->rtmsg_metric;
961
962 /* We cannot add true routes via loopback here,
963 they would result in kernel looping; promote them to reject routes
964 */
965 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
966 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
967 /* hold loopback dev/idev if we haven't done so. */
968 if (dev != &loopback_dev) {
969 if (dev) {
970 dev_put(dev);
971 in6_dev_put(idev);
972 }
973 dev = &loopback_dev;
974 dev_hold(dev);
975 idev = in6_dev_get(dev);
976 if (!idev) {
977 err = -ENODEV;
978 goto out;
979 }
980 }
981 rt->u.dst.output = ip6_pkt_discard_out;
982 rt->u.dst.input = ip6_pkt_discard;
983 rt->u.dst.error = -ENETUNREACH;
984 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
985 goto install_route;
986 }
987
988 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
989 struct in6_addr *gw_addr;
990 int gwa_type;
991
992 gw_addr = &rtmsg->rtmsg_gateway;
993 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
994 gwa_type = ipv6_addr_type(gw_addr);
995
996 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
997 struct rt6_info *grt;
998
999 /* IPv6 strictly inhibits using not link-local
1000 addresses as nexthop address.
1001 Otherwise, router will not able to send redirects.
1002 It is very good, but in some (rare!) circumstances
1003 (SIT, PtP, NBMA NOARP links) it is handy to allow
1004 some exceptions. --ANK
1005 */
1006 err = -EINVAL;
1007 if (!(gwa_type&IPV6_ADDR_UNICAST))
1008 goto out;
1009
1010 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1011
1012 err = -EHOSTUNREACH;
1013 if (grt == NULL)
1014 goto out;
1015 if (dev) {
1016 if (dev != grt->rt6i_dev) {
1017 dst_release(&grt->u.dst);
1018 goto out;
1019 }
1020 } else {
1021 dev = grt->rt6i_dev;
1022 idev = grt->rt6i_idev;
1023 dev_hold(dev);
1024 in6_dev_hold(grt->rt6i_idev);
1025 }
1026 if (!(grt->rt6i_flags&RTF_GATEWAY))
1027 err = 0;
1028 dst_release(&grt->u.dst);
1029
1030 if (err)
1031 goto out;
1032 }
1033 err = -EINVAL;
1034 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1035 goto out;
1036 }
1037
1038 err = -ENODEV;
1039 if (dev == NULL)
1040 goto out;
1041
1042 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1043 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1044 if (IS_ERR(rt->rt6i_nexthop)) {
1045 err = PTR_ERR(rt->rt6i_nexthop);
1046 rt->rt6i_nexthop = NULL;
1047 goto out;
1048 }
1049 }
1050
1051 rt->rt6i_flags = rtmsg->rtmsg_flags;
1052
1053install_route:
1054 if (rta && rta[RTA_METRICS-1]) {
1055 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1056 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1057
1058 while (RTA_OK(attr, attrlen)) {
1059 unsigned flavor = attr->rta_type;
1060 if (flavor) {
1061 if (flavor > RTAX_MAX) {
1062 err = -EINVAL;
1063 goto out;
1064 }
1065 rt->u.dst.metrics[flavor-1] =
1066 *(u32 *)RTA_DATA(attr);
1067 }
1068 attr = RTA_NEXT(attr, attrlen);
1069 }
1070 }
1071
1072 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1073 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1074 if (!rt->u.dst.metrics[RTAX_MTU-1])
1075 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1076 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1077 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1078 rt->u.dst.dev = dev;
1079 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001080 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
1082out:
1083 if (dev)
1084 dev_put(dev);
1085 if (idev)
1086 in6_dev_put(idev);
1087 if (rt)
1088 dst_free((struct dst_entry *) rt);
1089 return err;
1090}
1091
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001092int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
1094 int err;
1095
1096 write_lock_bh(&rt6_lock);
1097
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001098 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 dst_release(&rt->u.dst);
1100
1101 write_unlock_bh(&rt6_lock);
1102
1103 return err;
1104}
1105
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001106static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107{
1108 struct fib6_node *fn;
1109 struct rt6_info *rt;
1110 int err = -ESRCH;
1111
1112 read_lock_bh(&rt6_lock);
1113
1114 fn = fib6_locate(&ip6_routing_table,
1115 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1116 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1117
1118 if (fn) {
1119 for (rt = fn->leaf; rt; rt = rt->u.next) {
1120 if (rtmsg->rtmsg_ifindex &&
1121 (rt->rt6i_dev == NULL ||
1122 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1123 continue;
1124 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1125 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1126 continue;
1127 if (rtmsg->rtmsg_metric &&
1128 rtmsg->rtmsg_metric != rt->rt6i_metric)
1129 continue;
1130 dst_hold(&rt->u.dst);
1131 read_unlock_bh(&rt6_lock);
1132
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001133 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 }
1135 }
1136 read_unlock_bh(&rt6_lock);
1137
1138 return err;
1139}
1140
1141/*
1142 * Handle redirects
1143 */
1144void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1145 struct neighbour *neigh, u8 *lladdr, int on_link)
1146{
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001147 struct rt6_info *rt, *nrt = NULL;
1148 int strict;
1149 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150
1151 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001152 * Get the "current" route for this destination and
1153 * check if the redirect has come from approriate router.
1154 *
1155 * RFC 2461 specifies that redirects should only be
1156 * accepted if they come from the nexthop to the target.
1157 * Due to the way the routes are chosen, this notion
1158 * is a bit fuzzy and one might need to check all possible
1159 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 */
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001161 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001163 read_lock_bh(&rt6_lock);
1164 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1165restart:
1166 for (rt = fn->leaf; rt; rt = rt->u.next) {
1167 /*
1168 * Current route is on-link; redirect is always invalid.
1169 *
1170 * Seems, previous statement is not true. It could
1171 * be node, which looks for us as on-link (f.e. proxy ndisc)
1172 * But then router serving it might decide, that we should
1173 * know truth 8)8) --ANK (980726).
1174 */
1175 if (rt6_check_expired(rt))
1176 continue;
1177 if (!(rt->rt6i_flags & RTF_GATEWAY))
1178 continue;
1179 if (neigh->dev != rt->rt6i_dev)
1180 continue;
1181 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1182 continue;
1183 break;
1184 }
1185 if (rt)
1186 dst_hold(&rt->u.dst);
1187 else if (strict) {
1188 while ((fn = fn->parent) != NULL) {
1189 if (fn->fn_flags & RTN_ROOT)
1190 break;
1191 if (fn->fn_flags & RTN_RTINFO)
1192 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001194 }
1195 read_unlock_bh(&rt6_lock);
1196
1197 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 if (net_ratelimit())
1199 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1200 "for redirect target\n");
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001201 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 }
1203
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 /*
1205 * We have finally decided to accept it.
1206 */
1207
1208 neigh_update(neigh, lladdr, NUD_STALE,
1209 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1210 NEIGH_UPDATE_F_OVERRIDE|
1211 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1212 NEIGH_UPDATE_F_ISROUTER))
1213 );
1214
1215 /*
1216 * Redirect received -> path was valid.
1217 * Look, redirects are sent only in response to data packets,
1218 * so that this nexthop apparently is reachable. --ANK
1219 */
1220 dst_confirm(&rt->u.dst);
1221
1222 /* Duplicate redirect: silently ignore. */
1223 if (neigh == rt->u.dst.neighbour)
1224 goto out;
1225
1226 nrt = ip6_rt_copy(rt);
1227 if (nrt == NULL)
1228 goto out;
1229
1230 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1231 if (on_link)
1232 nrt->rt6i_flags &= ~RTF_GATEWAY;
1233
1234 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1235 nrt->rt6i_dst.plen = 128;
1236 nrt->u.dst.flags |= DST_HOST;
1237
1238 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1239 nrt->rt6i_nexthop = neigh_clone(neigh);
1240 /* Reset pmtu, it may be better */
1241 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1242 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1243
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001244 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 goto out;
1246
1247 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001248 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 return;
1250 }
1251
1252out:
1253 dst_release(&rt->u.dst);
1254 return;
1255}
1256
1257/*
1258 * Handle ICMP "packet too big" messages
1259 * i.e. Path MTU discovery
1260 */
1261
1262void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1263 struct net_device *dev, u32 pmtu)
1264{
1265 struct rt6_info *rt, *nrt;
1266 int allfrag = 0;
1267
1268 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1269 if (rt == NULL)
1270 return;
1271
1272 if (pmtu >= dst_mtu(&rt->u.dst))
1273 goto out;
1274
1275 if (pmtu < IPV6_MIN_MTU) {
1276 /*
1277 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1278 * MTU (1280) and a fragment header should always be included
1279 * after a node receiving Too Big message reporting PMTU is
1280 * less than the IPv6 Minimum Link MTU.
1281 */
1282 pmtu = IPV6_MIN_MTU;
1283 allfrag = 1;
1284 }
1285
1286 /* New mtu received -> path was valid.
1287 They are sent only in response to data packets,
1288 so that this nexthop apparently is reachable. --ANK
1289 */
1290 dst_confirm(&rt->u.dst);
1291
1292 /* Host route. If it is static, it would be better
1293 not to override it, but add new one, so that
1294 when cache entry will expire old pmtu
1295 would return automatically.
1296 */
1297 if (rt->rt6i_flags & RTF_CACHE) {
1298 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1299 if (allfrag)
1300 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1301 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1302 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1303 goto out;
1304 }
1305
1306 /* Network route.
1307 Two cases are possible:
1308 1. It is connected route. Action: COW
1309 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1310 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001311 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001312 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001313 else
1314 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001315
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001316 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001317 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1318 if (allfrag)
1319 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1320
1321 /* According to RFC 1981, detecting PMTU increase shouldn't be
1322 * happened within 5 mins, the recommended timer is 10 mins.
1323 * Here this route expiration time is set to ip6_rt_mtu_expires
1324 * which is 10 mins. After 10 mins the decreased pmtu is expired
1325 * and detecting PMTU increase will be automatically happened.
1326 */
1327 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1328 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1329
1330 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332out:
1333 dst_release(&rt->u.dst);
1334}
1335
1336/*
1337 * Misc support functions
1338 */
1339
1340static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1341{
1342 struct rt6_info *rt = ip6_dst_alloc();
1343
1344 if (rt) {
1345 rt->u.dst.input = ort->u.dst.input;
1346 rt->u.dst.output = ort->u.dst.output;
1347
1348 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1349 rt->u.dst.dev = ort->u.dst.dev;
1350 if (rt->u.dst.dev)
1351 dev_hold(rt->u.dst.dev);
1352 rt->rt6i_idev = ort->rt6i_idev;
1353 if (rt->rt6i_idev)
1354 in6_dev_hold(rt->rt6i_idev);
1355 rt->u.dst.lastuse = jiffies;
1356 rt->rt6i_expires = 0;
1357
1358 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1359 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1360 rt->rt6i_metric = 0;
1361
1362 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1363#ifdef CONFIG_IPV6_SUBTREES
1364 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1365#endif
1366 }
1367 return rt;
1368}
1369
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001370#ifdef CONFIG_IPV6_ROUTE_INFO
1371static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1372 struct in6_addr *gwaddr, int ifindex)
1373{
1374 struct fib6_node *fn;
1375 struct rt6_info *rt = NULL;
1376
1377 write_lock_bh(&rt6_lock);
1378 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1379 if (!fn)
1380 goto out;
1381
1382 for (rt = fn->leaf; rt; rt = rt->u.next) {
1383 if (rt->rt6i_dev->ifindex != ifindex)
1384 continue;
1385 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1386 continue;
1387 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1388 continue;
1389 dst_hold(&rt->u.dst);
1390 break;
1391 }
1392out:
1393 write_unlock_bh(&rt6_lock);
1394 return rt;
1395}
1396
1397static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1398 struct in6_addr *gwaddr, int ifindex,
1399 unsigned pref)
1400{
1401 struct in6_rtmsg rtmsg;
1402
1403 memset(&rtmsg, 0, sizeof(rtmsg));
1404 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1405 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1406 rtmsg.rtmsg_dst_len = prefixlen;
1407 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1408 rtmsg.rtmsg_metric = 1024;
1409 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001410 /* We should treat it as a default route if prefix length is 0. */
1411 if (!prefixlen)
1412 rtmsg.rtmsg_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001413 rtmsg.rtmsg_ifindex = ifindex;
1414
1415 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1416
1417 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1418}
1419#endif
1420
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1422{
1423 struct rt6_info *rt;
1424 struct fib6_node *fn;
1425
1426 fn = &ip6_routing_table;
1427
1428 write_lock_bh(&rt6_lock);
1429 for (rt = fn->leaf; rt; rt=rt->u.next) {
1430 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001431 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1433 break;
1434 }
1435 if (rt)
1436 dst_hold(&rt->u.dst);
1437 write_unlock_bh(&rt6_lock);
1438 return rt;
1439}
1440
1441struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001442 struct net_device *dev,
1443 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444{
1445 struct in6_rtmsg rtmsg;
1446
1447 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1448 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1449 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1450 rtmsg.rtmsg_metric = 1024;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001451 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1452 RTF_PREF(pref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453
1454 rtmsg.rtmsg_ifindex = dev->ifindex;
1455
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001456 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 return rt6_get_dflt_router(gwaddr, dev);
1458}
1459
1460void rt6_purge_dflt_routers(void)
1461{
1462 struct rt6_info *rt;
1463
1464restart:
1465 read_lock_bh(&rt6_lock);
1466 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1467 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1468 dst_hold(&rt->u.dst);
1469
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 read_unlock_bh(&rt6_lock);
1471
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001472 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473
1474 goto restart;
1475 }
1476 }
1477 read_unlock_bh(&rt6_lock);
1478}
1479
1480int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1481{
1482 struct in6_rtmsg rtmsg;
1483 int err;
1484
1485 switch(cmd) {
1486 case SIOCADDRT: /* Add a route */
1487 case SIOCDELRT: /* Delete a route */
1488 if (!capable(CAP_NET_ADMIN))
1489 return -EPERM;
1490 err = copy_from_user(&rtmsg, arg,
1491 sizeof(struct in6_rtmsg));
1492 if (err)
1493 return -EFAULT;
1494
1495 rtnl_lock();
1496 switch (cmd) {
1497 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001498 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 break;
1500 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001501 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 break;
1503 default:
1504 err = -EINVAL;
1505 }
1506 rtnl_unlock();
1507
1508 return err;
1509 };
1510
1511 return -EINVAL;
1512}
1513
1514/*
1515 * Drop the packet on the floor
1516 */
1517
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001518static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519{
1520 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1521 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1522 kfree_skb(skb);
1523 return 0;
1524}
1525
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001526static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527{
1528 skb->dev = skb->dst->dev;
1529 return ip6_pkt_discard(skb);
1530}
1531
1532/*
1533 * Allocate a dst for local (unicast / anycast) address.
1534 */
1535
1536struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1537 const struct in6_addr *addr,
1538 int anycast)
1539{
1540 struct rt6_info *rt = ip6_dst_alloc();
1541
1542 if (rt == NULL)
1543 return ERR_PTR(-ENOMEM);
1544
1545 dev_hold(&loopback_dev);
1546 in6_dev_hold(idev);
1547
1548 rt->u.dst.flags = DST_HOST;
1549 rt->u.dst.input = ip6_input;
1550 rt->u.dst.output = ip6_output;
1551 rt->rt6i_dev = &loopback_dev;
1552 rt->rt6i_idev = idev;
1553 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1554 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1555 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1556 rt->u.dst.obsolete = -1;
1557
1558 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001559 if (anycast)
1560 rt->rt6i_flags |= RTF_ANYCAST;
1561 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 rt->rt6i_flags |= RTF_LOCAL;
1563 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1564 if (rt->rt6i_nexthop == NULL) {
1565 dst_free((struct dst_entry *) rt);
1566 return ERR_PTR(-ENOMEM);
1567 }
1568
1569 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1570 rt->rt6i_dst.plen = 128;
1571
1572 atomic_set(&rt->u.dst.__refcnt, 1);
1573
1574 return rt;
1575}
1576
1577static int fib6_ifdown(struct rt6_info *rt, void *arg)
1578{
1579 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1580 rt != &ip6_null_entry) {
1581 RT6_TRACE("deleted by ifdown %p\n", rt);
1582 return -1;
1583 }
1584 return 0;
1585}
1586
1587void rt6_ifdown(struct net_device *dev)
1588{
1589 write_lock_bh(&rt6_lock);
1590 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1591 write_unlock_bh(&rt6_lock);
1592}
1593
1594struct rt6_mtu_change_arg
1595{
1596 struct net_device *dev;
1597 unsigned mtu;
1598};
1599
1600static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1601{
1602 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1603 struct inet6_dev *idev;
1604
1605 /* In IPv6 pmtu discovery is not optional,
1606 so that RTAX_MTU lock cannot disable it.
1607 We still use this lock to block changes
1608 caused by addrconf/ndisc.
1609 */
1610
1611 idev = __in6_dev_get(arg->dev);
1612 if (idev == NULL)
1613 return 0;
1614
1615 /* For administrative MTU increase, there is no way to discover
1616 IPv6 PMTU increase, so PMTU increase should be updated here.
1617 Since RFC 1981 doesn't include administrative MTU increase
1618 update PMTU increase is a MUST. (i.e. jumbo frame)
1619 */
1620 /*
1621 If new MTU is less than route PMTU, this new MTU will be the
1622 lowest MTU in the path, update the route PMTU to reflect PMTU
1623 decreases; if new MTU is greater than route PMTU, and the
1624 old MTU is the lowest MTU in the path, update the route PMTU
1625 to reflect the increase. In this case if the other nodes' MTU
1626 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1627 PMTU discouvery.
1628 */
1629 if (rt->rt6i_dev == arg->dev &&
1630 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1631 (dst_mtu(&rt->u.dst) > arg->mtu ||
1632 (dst_mtu(&rt->u.dst) < arg->mtu &&
1633 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1634 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1635 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1636 return 0;
1637}
1638
1639void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1640{
1641 struct rt6_mtu_change_arg arg;
1642
1643 arg.dev = dev;
1644 arg.mtu = mtu;
1645 read_lock_bh(&rt6_lock);
1646 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1647 read_unlock_bh(&rt6_lock);
1648}
1649
1650static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1651 struct in6_rtmsg *rtmsg)
1652{
1653 memset(rtmsg, 0, sizeof(*rtmsg));
1654
1655 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1656 rtmsg->rtmsg_src_len = r->rtm_src_len;
1657 rtmsg->rtmsg_flags = RTF_UP;
1658 if (r->rtm_type == RTN_UNREACHABLE)
1659 rtmsg->rtmsg_flags |= RTF_REJECT;
1660
1661 if (rta[RTA_GATEWAY-1]) {
1662 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1663 return -EINVAL;
1664 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1665 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1666 }
1667 if (rta[RTA_DST-1]) {
1668 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1669 return -EINVAL;
1670 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1671 }
1672 if (rta[RTA_SRC-1]) {
1673 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1674 return -EINVAL;
1675 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1676 }
1677 if (rta[RTA_OIF-1]) {
1678 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1679 return -EINVAL;
1680 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1681 }
1682 if (rta[RTA_PRIORITY-1]) {
1683 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1684 return -EINVAL;
1685 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1686 }
1687 return 0;
1688}
1689
1690int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1691{
1692 struct rtmsg *r = NLMSG_DATA(nlh);
1693 struct in6_rtmsg rtmsg;
1694
1695 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1696 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001697 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698}
1699
1700int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1701{
1702 struct rtmsg *r = NLMSG_DATA(nlh);
1703 struct in6_rtmsg rtmsg;
1704
1705 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1706 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001707 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708}
1709
1710struct rt6_rtnl_dump_arg
1711{
1712 struct sk_buff *skb;
1713 struct netlink_callback *cb;
1714};
1715
1716static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001717 struct in6_addr *dst, struct in6_addr *src,
1718 int iif, int type, u32 pid, u32 seq,
1719 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720{
1721 struct rtmsg *rtm;
1722 struct nlmsghdr *nlh;
1723 unsigned char *b = skb->tail;
1724 struct rta_cacheinfo ci;
1725
1726 if (prefix) { /* user wants prefix routes only */
1727 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1728 /* success since this is not a prefix route */
1729 return 1;
1730 }
1731 }
1732
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001733 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 rtm = NLMSG_DATA(nlh);
1735 rtm->rtm_family = AF_INET6;
1736 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1737 rtm->rtm_src_len = rt->rt6i_src.plen;
1738 rtm->rtm_tos = 0;
1739 rtm->rtm_table = RT_TABLE_MAIN;
1740 if (rt->rt6i_flags&RTF_REJECT)
1741 rtm->rtm_type = RTN_UNREACHABLE;
1742 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1743 rtm->rtm_type = RTN_LOCAL;
1744 else
1745 rtm->rtm_type = RTN_UNICAST;
1746 rtm->rtm_flags = 0;
1747 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1748 rtm->rtm_protocol = rt->rt6i_protocol;
1749 if (rt->rt6i_flags&RTF_DYNAMIC)
1750 rtm->rtm_protocol = RTPROT_REDIRECT;
1751 else if (rt->rt6i_flags & RTF_ADDRCONF)
1752 rtm->rtm_protocol = RTPROT_KERNEL;
1753 else if (rt->rt6i_flags&RTF_DEFAULT)
1754 rtm->rtm_protocol = RTPROT_RA;
1755
1756 if (rt->rt6i_flags&RTF_CACHE)
1757 rtm->rtm_flags |= RTM_F_CLONED;
1758
1759 if (dst) {
1760 RTA_PUT(skb, RTA_DST, 16, dst);
1761 rtm->rtm_dst_len = 128;
1762 } else if (rtm->rtm_dst_len)
1763 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1764#ifdef CONFIG_IPV6_SUBTREES
1765 if (src) {
1766 RTA_PUT(skb, RTA_SRC, 16, src);
1767 rtm->rtm_src_len = 128;
1768 } else if (rtm->rtm_src_len)
1769 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1770#endif
1771 if (iif)
1772 RTA_PUT(skb, RTA_IIF, 4, &iif);
1773 else if (dst) {
1774 struct in6_addr saddr_buf;
1775 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1776 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1777 }
1778 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1779 goto rtattr_failure;
1780 if (rt->u.dst.neighbour)
1781 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1782 if (rt->u.dst.dev)
1783 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1784 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1785 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1786 if (rt->rt6i_expires)
1787 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1788 else
1789 ci.rta_expires = 0;
1790 ci.rta_used = rt->u.dst.__use;
1791 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1792 ci.rta_error = rt->u.dst.error;
1793 ci.rta_id = 0;
1794 ci.rta_ts = 0;
1795 ci.rta_tsage = 0;
1796 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1797 nlh->nlmsg_len = skb->tail - b;
1798 return skb->len;
1799
1800nlmsg_failure:
1801rtattr_failure:
1802 skb_trim(skb, b - skb->data);
1803 return -1;
1804}
1805
1806static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1807{
1808 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1809 int prefix;
1810
1811 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1812 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1813 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1814 } else
1815 prefix = 0;
1816
1817 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1818 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001819 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820}
1821
1822static int fib6_dump_node(struct fib6_walker_t *w)
1823{
1824 int res;
1825 struct rt6_info *rt;
1826
1827 for (rt = w->leaf; rt; rt = rt->u.next) {
1828 res = rt6_dump_route(rt, w->args);
1829 if (res < 0) {
1830 /* Frame is full, suspend walking */
1831 w->leaf = rt;
1832 return 1;
1833 }
1834 BUG_TRAP(res!=0);
1835 }
1836 w->leaf = NULL;
1837 return 0;
1838}
1839
1840static void fib6_dump_end(struct netlink_callback *cb)
1841{
1842 struct fib6_walker_t *w = (void*)cb->args[0];
1843
1844 if (w) {
1845 cb->args[0] = 0;
1846 fib6_walker_unlink(w);
1847 kfree(w);
1848 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001849 cb->done = (void*)cb->args[1];
1850 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851}
1852
1853static int fib6_dump_done(struct netlink_callback *cb)
1854{
1855 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001856 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857}
1858
1859int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1860{
1861 struct rt6_rtnl_dump_arg arg;
1862 struct fib6_walker_t *w;
1863 int res;
1864
1865 arg.skb = skb;
1866 arg.cb = cb;
1867
1868 w = (void*)cb->args[0];
1869 if (w == NULL) {
1870 /* New dump:
1871 *
1872 * 1. hook callback destructor.
1873 */
1874 cb->args[1] = (long)cb->done;
1875 cb->done = fib6_dump_done;
1876
1877 /*
1878 * 2. allocate and initialize walker.
1879 */
David S. Miller9e147a12005-11-17 16:52:51 -08001880 w = kmalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881 if (w == NULL)
1882 return -ENOMEM;
1883 RT6_TRACE("dump<%p", w);
1884 memset(w, 0, sizeof(*w));
1885 w->root = &ip6_routing_table;
1886 w->func = fib6_dump_node;
1887 w->args = &arg;
1888 cb->args[0] = (long)w;
1889 read_lock_bh(&rt6_lock);
1890 res = fib6_walk(w);
1891 read_unlock_bh(&rt6_lock);
1892 } else {
1893 w->args = &arg;
1894 read_lock_bh(&rt6_lock);
1895 res = fib6_walk_continue(w);
1896 read_unlock_bh(&rt6_lock);
1897 }
1898#if RT6_DEBUG >= 3
1899 if (res <= 0 && skb->len == 0)
1900 RT6_TRACE("%p>dump end\n", w);
1901#endif
1902 res = res < 0 ? res : skb->len;
1903 /* res < 0 is an error. (really, impossible)
1904 res == 0 means that dump is complete, but skb still can contain data.
1905 res > 0 dump is not complete, but frame is full.
1906 */
1907 /* Destroy walker, if dump of this table is complete. */
1908 if (res <= 0)
1909 fib6_dump_end(cb);
1910 return res;
1911}
1912
1913int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1914{
1915 struct rtattr **rta = arg;
1916 int iif = 0;
1917 int err = -ENOBUFS;
1918 struct sk_buff *skb;
1919 struct flowi fl;
1920 struct rt6_info *rt;
1921
1922 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1923 if (skb == NULL)
1924 goto out;
1925
1926 /* Reserve room for dummy headers, this skb can pass
1927 through good chunk of routing engine.
1928 */
1929 skb->mac.raw = skb->data;
1930 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1931
1932 memset(&fl, 0, sizeof(fl));
1933 if (rta[RTA_SRC-1])
1934 ipv6_addr_copy(&fl.fl6_src,
1935 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1936 if (rta[RTA_DST-1])
1937 ipv6_addr_copy(&fl.fl6_dst,
1938 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1939
1940 if (rta[RTA_IIF-1])
1941 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1942
1943 if (iif) {
1944 struct net_device *dev;
1945 dev = __dev_get_by_index(iif);
1946 if (!dev) {
1947 err = -ENODEV;
1948 goto out_free;
1949 }
1950 }
1951
1952 fl.oif = 0;
1953 if (rta[RTA_OIF-1])
1954 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1955
1956 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1957
1958 skb->dst = &rt->u.dst;
1959
1960 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1961 err = rt6_fill_node(skb, rt,
1962 &fl.fl6_dst, &fl.fl6_src,
1963 iif,
1964 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001965 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 if (err < 0) {
1967 err = -EMSGSIZE;
1968 goto out_free;
1969 }
1970
1971 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1972 if (err > 0)
1973 err = 0;
1974out:
1975 return err;
1976out_free:
1977 kfree_skb(skb);
1978 goto out;
1979}
1980
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001981void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1982 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983{
1984 struct sk_buff *skb;
1985 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001986 u32 pid = current->pid;
1987 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001989 if (req)
1990 pid = req->pid;
1991 if (nlh)
1992 seq = nlh->nlmsg_seq;
1993
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994 skb = alloc_skb(size, gfp_any());
1995 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001996 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 return;
1998 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001999 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07002001 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 return;
2003 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07002004 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2005 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006}
2007
2008/*
2009 * /proc
2010 */
2011
2012#ifdef CONFIG_PROC_FS
2013
2014#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2015
2016struct rt6_proc_arg
2017{
2018 char *buffer;
2019 int offset;
2020 int length;
2021 int skip;
2022 int len;
2023};
2024
2025static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2026{
2027 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2028 int i;
2029
2030 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2031 arg->skip++;
2032 return 0;
2033 }
2034
2035 if (arg->len >= arg->length)
2036 return 0;
2037
2038 for (i=0; i<16; i++) {
2039 sprintf(arg->buffer + arg->len, "%02x",
2040 rt->rt6i_dst.addr.s6_addr[i]);
2041 arg->len += 2;
2042 }
2043 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2044 rt->rt6i_dst.plen);
2045
2046#ifdef CONFIG_IPV6_SUBTREES
2047 for (i=0; i<16; i++) {
2048 sprintf(arg->buffer + arg->len, "%02x",
2049 rt->rt6i_src.addr.s6_addr[i]);
2050 arg->len += 2;
2051 }
2052 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2053 rt->rt6i_src.plen);
2054#else
2055 sprintf(arg->buffer + arg->len,
2056 "00000000000000000000000000000000 00 ");
2057 arg->len += 36;
2058#endif
2059
2060 if (rt->rt6i_nexthop) {
2061 for (i=0; i<16; i++) {
2062 sprintf(arg->buffer + arg->len, "%02x",
2063 rt->rt6i_nexthop->primary_key[i]);
2064 arg->len += 2;
2065 }
2066 } else {
2067 sprintf(arg->buffer + arg->len,
2068 "00000000000000000000000000000000");
2069 arg->len += 32;
2070 }
2071 arg->len += sprintf(arg->buffer + arg->len,
2072 " %08x %08x %08x %08x %8s\n",
2073 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2074 rt->u.dst.__use, rt->rt6i_flags,
2075 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2076 return 0;
2077}
2078
2079static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2080{
2081 struct rt6_proc_arg arg;
2082 arg.buffer = buffer;
2083 arg.offset = offset;
2084 arg.length = length;
2085 arg.skip = 0;
2086 arg.len = 0;
2087
2088 read_lock_bh(&rt6_lock);
2089 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2090 read_unlock_bh(&rt6_lock);
2091
2092 *start = buffer;
2093 if (offset)
2094 *start += offset % RT6_INFO_LEN;
2095
2096 arg.len -= offset % RT6_INFO_LEN;
2097
2098 if (arg.len > length)
2099 arg.len = length;
2100 if (arg.len < 0)
2101 arg.len = 0;
2102
2103 return arg.len;
2104}
2105
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2107{
2108 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2109 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2110 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2111 rt6_stats.fib_rt_cache,
2112 atomic_read(&ip6_dst_ops.entries),
2113 rt6_stats.fib_discarded_routes);
2114
2115 return 0;
2116}
2117
2118static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2119{
2120 return single_open(file, rt6_stats_seq_show, NULL);
2121}
2122
2123static struct file_operations rt6_stats_seq_fops = {
2124 .owner = THIS_MODULE,
2125 .open = rt6_stats_seq_open,
2126 .read = seq_read,
2127 .llseek = seq_lseek,
2128 .release = single_release,
2129};
2130#endif /* CONFIG_PROC_FS */
2131
2132#ifdef CONFIG_SYSCTL
2133
2134static int flush_delay;
2135
2136static
2137int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2138 void __user *buffer, size_t *lenp, loff_t *ppos)
2139{
2140 if (write) {
2141 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2142 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2143 return 0;
2144 } else
2145 return -EINVAL;
2146}
2147
2148ctl_table ipv6_route_table[] = {
2149 {
2150 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2151 .procname = "flush",
2152 .data = &flush_delay,
2153 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002154 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 .proc_handler = &ipv6_sysctl_rtcache_flush
2156 },
2157 {
2158 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2159 .procname = "gc_thresh",
2160 .data = &ip6_dst_ops.gc_thresh,
2161 .maxlen = sizeof(int),
2162 .mode = 0644,
2163 .proc_handler = &proc_dointvec,
2164 },
2165 {
2166 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2167 .procname = "max_size",
2168 .data = &ip6_rt_max_size,
2169 .maxlen = sizeof(int),
2170 .mode = 0644,
2171 .proc_handler = &proc_dointvec,
2172 },
2173 {
2174 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2175 .procname = "gc_min_interval",
2176 .data = &ip6_rt_gc_min_interval,
2177 .maxlen = sizeof(int),
2178 .mode = 0644,
2179 .proc_handler = &proc_dointvec_jiffies,
2180 .strategy = &sysctl_jiffies,
2181 },
2182 {
2183 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2184 .procname = "gc_timeout",
2185 .data = &ip6_rt_gc_timeout,
2186 .maxlen = sizeof(int),
2187 .mode = 0644,
2188 .proc_handler = &proc_dointvec_jiffies,
2189 .strategy = &sysctl_jiffies,
2190 },
2191 {
2192 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2193 .procname = "gc_interval",
2194 .data = &ip6_rt_gc_interval,
2195 .maxlen = sizeof(int),
2196 .mode = 0644,
2197 .proc_handler = &proc_dointvec_jiffies,
2198 .strategy = &sysctl_jiffies,
2199 },
2200 {
2201 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2202 .procname = "gc_elasticity",
2203 .data = &ip6_rt_gc_elasticity,
2204 .maxlen = sizeof(int),
2205 .mode = 0644,
2206 .proc_handler = &proc_dointvec_jiffies,
2207 .strategy = &sysctl_jiffies,
2208 },
2209 {
2210 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2211 .procname = "mtu_expires",
2212 .data = &ip6_rt_mtu_expires,
2213 .maxlen = sizeof(int),
2214 .mode = 0644,
2215 .proc_handler = &proc_dointvec_jiffies,
2216 .strategy = &sysctl_jiffies,
2217 },
2218 {
2219 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2220 .procname = "min_adv_mss",
2221 .data = &ip6_rt_min_advmss,
2222 .maxlen = sizeof(int),
2223 .mode = 0644,
2224 .proc_handler = &proc_dointvec_jiffies,
2225 .strategy = &sysctl_jiffies,
2226 },
2227 {
2228 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2229 .procname = "gc_min_interval_ms",
2230 .data = &ip6_rt_gc_min_interval,
2231 .maxlen = sizeof(int),
2232 .mode = 0644,
2233 .proc_handler = &proc_dointvec_ms_jiffies,
2234 .strategy = &sysctl_ms_jiffies,
2235 },
2236 { .ctl_name = 0 }
2237};
2238
2239#endif
2240
2241void __init ip6_route_init(void)
2242{
2243 struct proc_dir_entry *p;
2244
2245 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2246 sizeof(struct rt6_info),
2247 0, SLAB_HWCACHE_ALIGN,
2248 NULL, NULL);
2249 if (!ip6_dst_ops.kmem_cachep)
2250 panic("cannot create ip6_dst_cache");
2251
2252 fib6_init();
2253#ifdef CONFIG_PROC_FS
2254 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2255 if (p)
2256 p->owner = THIS_MODULE;
2257
2258 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2259#endif
2260#ifdef CONFIG_XFRM
2261 xfrm6_init();
2262#endif
2263}
2264
2265void ip6_route_cleanup(void)
2266{
2267#ifdef CONFIG_PROC_FS
2268 proc_net_remove("ipv6_route");
2269 proc_net_remove("rt6_stats");
2270#endif
2271#ifdef CONFIG_XFRM
2272 xfrm6_fini();
2273#endif
2274 rt6_ifdown(NULL);
2275 fib6_gc_cleanup();
2276 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2277}