blob: e0d3ad02ffb5afc5d412da05c7986942e7780685 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
75
76static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2;
78static int ip6_rt_gc_timeout = 60*HZ;
79int ip6_rt_gc_interval = 30*HZ;
80static int ip6_rt_gc_elasticity = 9;
81static int ip6_rt_mtu_expires = 10*60*HZ;
82static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90static int ip6_dst_gc(void);
91
92static int ip6_pkt_discard(struct sk_buff *skb);
93static int ip6_pkt_discard_out(struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97static struct dst_ops ip6_dst_ops = {
98 .family = AF_INET6,
99 .protocol = __constant_htons(ETH_P_IPV6),
100 .gc = ip6_dst_gc,
101 .gc_thresh = 1024,
102 .check = ip6_dst_check,
103 .destroy = ip6_dst_destroy,
104 .ifdown = ip6_dst_ifdown,
105 .negative_advice = ip6_negative_advice,
106 .link_failure = ip6_link_failure,
107 .update_pmtu = ip6_rt_update_pmtu,
108 .entry_size = sizeof(struct rt6_info),
109};
110
111struct rt6_info ip6_null_entry = {
112 .u = {
113 .dst = {
114 .__refcnt = ATOMIC_INIT(1),
115 .__use = 1,
116 .dev = &loopback_dev,
117 .obsolete = -1,
118 .error = -ENETUNREACH,
119 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
120 .input = ip6_pkt_discard,
121 .output = ip6_pkt_discard_out,
122 .ops = &ip6_dst_ops,
123 .path = (struct dst_entry*)&ip6_null_entry,
124 }
125 },
126 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
127 .rt6i_metric = ~(u32) 0,
128 .rt6i_ref = ATOMIC_INIT(1),
129};
130
131struct fib6_node ip6_routing_table = {
132 .leaf = &ip6_null_entry,
133 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134};
135
136/* Protects all the ip6 fib */
137
138DEFINE_RWLOCK(rt6_lock);
139
140
141/* allocate dst with ip6_dst_ops */
142static __inline__ struct rt6_info *ip6_dst_alloc(void)
143{
144 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145}
146
147static void ip6_dst_destroy(struct dst_entry *dst)
148{
149 struct rt6_info *rt = (struct rt6_info *)dst;
150 struct inet6_dev *idev = rt->rt6i_idev;
151
152 if (idev != NULL) {
153 rt->rt6i_idev = NULL;
154 in6_dev_put(idev);
155 }
156}
157
158static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159 int how)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 if (loopback_idev != NULL) {
167 rt->rt6i_idev = loopback_idev;
168 in6_dev_put(idev);
169 }
170 }
171}
172
173static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174{
175 return (rt->rt6i_flags & RTF_EXPIRES &&
176 time_after(jiffies, rt->rt6i_expires));
177}
178
179/*
180 * Route lookup. Any rt6_lock is implied.
181 */
182
183static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184 int oif,
185 int strict)
186{
187 struct rt6_info *local = NULL;
188 struct rt6_info *sprt;
189
190 if (oif) {
191 for (sprt = rt; sprt; sprt = sprt->u.next) {
192 struct net_device *dev = sprt->rt6i_dev;
193 if (dev->ifindex == oif)
194 return sprt;
195 if (dev->flags & IFF_LOOPBACK) {
196 if (sprt->rt6i_idev == NULL ||
197 sprt->rt6i_idev->dev->ifindex != oif) {
198 if (strict && oif)
199 continue;
200 if (local && (!oif ||
201 local->rt6i_idev->dev->ifindex == oif))
202 continue;
203 }
204 local = sprt;
205 }
206 }
207
208 if (local)
209 return local;
210
211 if (strict)
212 return &ip6_null_entry;
213 }
214 return rt;
215}
216
217/*
218 * pointer to the last default router chosen. BH is disabled locally.
219 */
220static struct rt6_info *rt6_dflt_pointer;
221static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223void rt6_reset_dflt_pointer(struct rt6_info *rt)
224{
225 spin_lock_bh(&rt6_dflt_lock);
226 if (rt == NULL || rt == rt6_dflt_pointer) {
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 rt6_dflt_pointer = NULL;
229 }
230 spin_unlock_bh(&rt6_dflt_lock);
231}
232
233/* Default Router Selection (RFC 2461 6.3.6) */
234static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235{
236 struct rt6_info *match = NULL;
237 struct rt6_info *sprt;
238 int mpri = 0;
239
240 for (sprt = rt; sprt; sprt = sprt->u.next) {
241 struct neighbour *neigh;
242 int m = 0;
243
244 if (!oif ||
245 (sprt->rt6i_dev &&
246 sprt->rt6i_dev->ifindex == oif))
247 m += 8;
248
249 if (rt6_check_expired(sprt))
250 continue;
251
252 if (sprt == rt6_dflt_pointer)
253 m += 4;
254
255 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 read_lock_bh(&neigh->lock);
257 switch (neigh->nud_state) {
258 case NUD_REACHABLE:
259 m += 3;
260 break;
261
262 case NUD_STALE:
263 case NUD_DELAY:
264 case NUD_PROBE:
265 m += 2;
266 break;
267
268 case NUD_NOARP:
269 case NUD_PERMANENT:
270 m += 1;
271 break;
272
273 case NUD_INCOMPLETE:
274 default:
275 read_unlock_bh(&neigh->lock);
276 continue;
277 }
278 read_unlock_bh(&neigh->lock);
279 } else {
280 continue;
281 }
282
283 if (m > mpri || m >= 12) {
284 match = sprt;
285 mpri = m;
286 if (m >= 12) {
287 /* we choose the last default router if it
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
291 */
292 break;
293 }
294 }
295 }
296
297 spin_lock(&rt6_dflt_lock);
298 if (!match) {
299 /*
300 * No default routers are known to be reachable.
301 * SHOULD round robin
302 */
303 if (rt6_dflt_pointer) {
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
309 match = sprt;
310 break;
311 }
312 }
313 for (sprt = rt;
314 !match && sprt;
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
319 match = sprt;
320 break;
321 }
322 if (sprt == rt6_dflt_pointer)
323 break;
324 }
325 }
326 }
327
328 if (match) {
329 if (rt6_dflt_pointer != match)
330 RT6_TRACE("changed default router: %p->%p\n",
331 rt6_dflt_pointer, match);
332 rt6_dflt_pointer = match;
333 }
334 spin_unlock(&rt6_dflt_lock);
335
336 if (!match) {
337 /*
338 * Last Resort: if no default routers found,
339 * use addrconf default route.
340 * We don't record this route.
341 */
342 for (sprt = ip6_routing_table.leaf;
343 sprt; sprt = sprt->u.next) {
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
346 (!oif ||
347 (sprt->rt6i_dev &&
348 sprt->rt6i_dev->ifindex == oif))) {
349 match = sprt;
350 break;
351 }
352 }
353 if (!match) {
354 /* no default route. give up. */
355 match = &ip6_null_entry;
356 }
357 }
358
359 return match;
360}
361
362struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363 int oif, int strict)
364{
365 struct fib6_node *fn;
366 struct rt6_info *rt;
367
368 read_lock_bh(&rt6_lock);
369 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 rt = rt6_device_match(fn->leaf, oif, strict);
371 dst_hold(&rt->u.dst);
372 rt->u.dst.__use++;
373 read_unlock_bh(&rt6_lock);
374
375 rt->u.dst.lastuse = jiffies;
376 if (rt->u.dst.error == 0)
377 return rt;
378 dst_release(&rt->u.dst);
379 return NULL;
380}
381
382/* ip6_ins_rt is called with FREE rt6_lock.
383 It takes new route entry, the addition fails by any reason the
384 route is freed. In any case, if caller does not hold it, it may
385 be destroyed.
386 */
387
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700388int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390{
391 int err;
392
393 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700394 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 write_unlock_bh(&rt6_lock);
396
397 return err;
398}
399
400/* No rt6_lock! If COW failed, the function returns dead route entry
401 with dst->error set to errno value.
402 */
403
404static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700405 struct in6_addr *saddr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406{
407 int err;
408 struct rt6_info *rt;
409
410 /*
411 * Clone the route.
412 */
413
414 rt = ip6_rt_copy(ort);
415
416 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900417 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
418 if (rt->rt6i_dst.plen != 128 &&
419 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
420 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900422 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900424 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 rt->rt6i_dst.plen = 128;
426 rt->rt6i_flags |= RTF_CACHE;
427 rt->u.dst.flags |= DST_HOST;
428
429#ifdef CONFIG_IPV6_SUBTREES
430 if (rt->rt6i_src.plen && saddr) {
431 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
432 rt->rt6i_src.plen = 128;
433 }
434#endif
435
436 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
437
438 dst_hold(&rt->u.dst);
439
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700440 err = ip6_ins_rt(rt, NULL, NULL, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 if (err == 0)
442 return rt;
443
444 rt->u.dst.error = err;
445
446 return rt;
447 }
448 dst_hold(&ip6_null_entry.u.dst);
449 return &ip6_null_entry;
450}
451
452#define BACKTRACK() \
453if (rt == &ip6_null_entry && strict) { \
454 while ((fn = fn->parent) != NULL) { \
455 if (fn->fn_flags & RTN_ROOT) { \
456 dst_hold(&rt->u.dst); \
457 goto out; \
458 } \
459 if (fn->fn_flags & RTN_RTINFO) \
460 goto restart; \
461 } \
462}
463
464
465void ip6_route_input(struct sk_buff *skb)
466{
467 struct fib6_node *fn;
468 struct rt6_info *rt;
469 int strict;
470 int attempts = 3;
471
472 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
473
474relookup:
475 read_lock_bh(&rt6_lock);
476
477 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
478 &skb->nh.ipv6h->saddr);
479
480restart:
481 rt = fn->leaf;
482
483 if ((rt->rt6i_flags & RTF_CACHE)) {
484 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
485 BACKTRACK();
486 dst_hold(&rt->u.dst);
487 goto out;
488 }
489
Yan Zheng9d17f212005-10-28 15:12:00 -0700490 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 BACKTRACK();
492
493 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
494 struct rt6_info *nrt;
495 dst_hold(&rt->u.dst);
496 read_unlock_bh(&rt6_lock);
497
498 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700499 &skb->nh.ipv6h->saddr,
500 &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
502 dst_release(&rt->u.dst);
503 rt = nrt;
504
505 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
506 goto out2;
507
508 /* Race condition! In the gap, when rt6_lock was
509 released someone could insert this route. Relookup.
510 */
511 dst_release(&rt->u.dst);
512 goto relookup;
513 }
514 dst_hold(&rt->u.dst);
515
516out:
517 read_unlock_bh(&rt6_lock);
518out2:
519 rt->u.dst.lastuse = jiffies;
520 rt->u.dst.__use++;
521 skb->dst = (struct dst_entry *) rt;
522}
523
524struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
525{
526 struct fib6_node *fn;
527 struct rt6_info *rt;
528 int strict;
529 int attempts = 3;
530
531 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
532
533relookup:
534 read_lock_bh(&rt6_lock);
535
536 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
537
538restart:
539 rt = fn->leaf;
540
541 if ((rt->rt6i_flags & RTF_CACHE)) {
542 rt = rt6_device_match(rt, fl->oif, strict);
543 BACKTRACK();
544 dst_hold(&rt->u.dst);
545 goto out;
546 }
547 if (rt->rt6i_flags & RTF_DEFAULT) {
548 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
549 rt = rt6_best_dflt(rt, fl->oif);
550 } else {
551 rt = rt6_device_match(rt, fl->oif, strict);
552 BACKTRACK();
553 }
554
555 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
556 struct rt6_info *nrt;
557 dst_hold(&rt->u.dst);
558 read_unlock_bh(&rt6_lock);
559
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700560 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
562 dst_release(&rt->u.dst);
563 rt = nrt;
564
565 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
566 goto out2;
567
568 /* Race condition! In the gap, when rt6_lock was
569 released someone could insert this route. Relookup.
570 */
571 dst_release(&rt->u.dst);
572 goto relookup;
573 }
574 dst_hold(&rt->u.dst);
575
576out:
577 read_unlock_bh(&rt6_lock);
578out2:
579 rt->u.dst.lastuse = jiffies;
580 rt->u.dst.__use++;
581 return &rt->u.dst;
582}
583
584
585/*
586 * Destination cache support functions
587 */
588
589static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
590{
591 struct rt6_info *rt;
592
593 rt = (struct rt6_info *) dst;
594
595 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
596 return dst;
597
598 return NULL;
599}
600
601static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
602{
603 struct rt6_info *rt = (struct rt6_info *) dst;
604
605 if (rt) {
606 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700607 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 else
609 dst_release(dst);
610 }
611 return NULL;
612}
613
614static void ip6_link_failure(struct sk_buff *skb)
615{
616 struct rt6_info *rt;
617
618 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
619
620 rt = (struct rt6_info *) skb->dst;
621 if (rt) {
622 if (rt->rt6i_flags&RTF_CACHE) {
623 dst_set_expires(&rt->u.dst, 0);
624 rt->rt6i_flags |= RTF_EXPIRES;
625 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
626 rt->rt6i_node->fn_sernum = -1;
627 }
628}
629
630static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
631{
632 struct rt6_info *rt6 = (struct rt6_info*)dst;
633
634 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
635 rt6->rt6i_flags |= RTF_MODIFIED;
636 if (mtu < IPV6_MIN_MTU) {
637 mtu = IPV6_MIN_MTU;
638 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
639 }
640 dst->metrics[RTAX_MTU-1] = mtu;
641 }
642}
643
644/* Protected by rt6_lock. */
645static struct dst_entry *ndisc_dst_gc_list;
646static int ipv6_get_mtu(struct net_device *dev);
647
648static inline unsigned int ipv6_advmss(unsigned int mtu)
649{
650 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
651
652 if (mtu < ip6_rt_min_advmss)
653 mtu = ip6_rt_min_advmss;
654
655 /*
656 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
657 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
658 * IPV6_MAXPLEN is also valid and means: "any MSS,
659 * rely only on pmtu discovery"
660 */
661 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
662 mtu = IPV6_MAXPLEN;
663 return mtu;
664}
665
666struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
667 struct neighbour *neigh,
668 struct in6_addr *addr,
669 int (*output)(struct sk_buff *))
670{
671 struct rt6_info *rt;
672 struct inet6_dev *idev = in6_dev_get(dev);
673
674 if (unlikely(idev == NULL))
675 return NULL;
676
677 rt = ip6_dst_alloc();
678 if (unlikely(rt == NULL)) {
679 in6_dev_put(idev);
680 goto out;
681 }
682
683 dev_hold(dev);
684 if (neigh)
685 neigh_hold(neigh);
686 else
687 neigh = ndisc_get_neigh(dev, addr);
688
689 rt->rt6i_dev = dev;
690 rt->rt6i_idev = idev;
691 rt->rt6i_nexthop = neigh;
692 atomic_set(&rt->u.dst.__refcnt, 1);
693 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
694 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
695 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
696 rt->u.dst.output = output;
697
698#if 0 /* there's no chance to use these for ndisc */
699 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
700 ? DST_HOST
701 : 0;
702 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
703 rt->rt6i_dst.plen = 128;
704#endif
705
706 write_lock_bh(&rt6_lock);
707 rt->u.dst.next = ndisc_dst_gc_list;
708 ndisc_dst_gc_list = &rt->u.dst;
709 write_unlock_bh(&rt6_lock);
710
711 fib6_force_start_gc();
712
713out:
714 return (struct dst_entry *)rt;
715}
716
717int ndisc_dst_gc(int *more)
718{
719 struct dst_entry *dst, *next, **pprev;
720 int freed;
721
722 next = NULL;
723 pprev = &ndisc_dst_gc_list;
724 freed = 0;
725 while ((dst = *pprev) != NULL) {
726 if (!atomic_read(&dst->__refcnt)) {
727 *pprev = dst->next;
728 dst_free(dst);
729 freed++;
730 } else {
731 pprev = &dst->next;
732 (*more)++;
733 }
734 }
735
736 return freed;
737}
738
739static int ip6_dst_gc(void)
740{
741 static unsigned expire = 30*HZ;
742 static unsigned long last_gc;
743 unsigned long now = jiffies;
744
745 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
746 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
747 goto out;
748
749 expire++;
750 fib6_run_gc(expire);
751 last_gc = now;
752 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
753 expire = ip6_rt_gc_timeout>>1;
754
755out:
756 expire -= expire>>ip6_rt_gc_elasticity;
757 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
758}
759
760/* Clean host part of a prefix. Not necessary in radix tree,
761 but results in cleaner routing tables.
762
763 Remove it only when all the things will work!
764 */
765
766static int ipv6_get_mtu(struct net_device *dev)
767{
768 int mtu = IPV6_MIN_MTU;
769 struct inet6_dev *idev;
770
771 idev = in6_dev_get(dev);
772 if (idev) {
773 mtu = idev->cnf.mtu6;
774 in6_dev_put(idev);
775 }
776 return mtu;
777}
778
779int ipv6_get_hoplimit(struct net_device *dev)
780{
781 int hoplimit = ipv6_devconf.hop_limit;
782 struct inet6_dev *idev;
783
784 idev = in6_dev_get(dev);
785 if (idev) {
786 hoplimit = idev->cnf.hop_limit;
787 in6_dev_put(idev);
788 }
789 return hoplimit;
790}
791
792/*
793 *
794 */
795
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700796int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
797 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798{
799 int err;
800 struct rtmsg *r;
801 struct rtattr **rta;
802 struct rt6_info *rt = NULL;
803 struct net_device *dev = NULL;
804 struct inet6_dev *idev = NULL;
805 int addr_type;
806
807 rta = (struct rtattr **) _rtattr;
808
809 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
810 return -EINVAL;
811#ifndef CONFIG_IPV6_SUBTREES
812 if (rtmsg->rtmsg_src_len)
813 return -EINVAL;
814#endif
815 if (rtmsg->rtmsg_ifindex) {
816 err = -ENODEV;
817 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
818 if (!dev)
819 goto out;
820 idev = in6_dev_get(dev);
821 if (!idev)
822 goto out;
823 }
824
825 if (rtmsg->rtmsg_metric == 0)
826 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
827
828 rt = ip6_dst_alloc();
829
830 if (rt == NULL) {
831 err = -ENOMEM;
832 goto out;
833 }
834
835 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800836 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 if (nlh && (r = NLMSG_DATA(nlh))) {
838 rt->rt6i_protocol = r->rtm_protocol;
839 } else {
840 rt->rt6i_protocol = RTPROT_BOOT;
841 }
842
843 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
844
845 if (addr_type & IPV6_ADDR_MULTICAST)
846 rt->u.dst.input = ip6_mc_input;
847 else
848 rt->u.dst.input = ip6_forward;
849
850 rt->u.dst.output = ip6_output;
851
852 ipv6_addr_prefix(&rt->rt6i_dst.addr,
853 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
854 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
855 if (rt->rt6i_dst.plen == 128)
856 rt->u.dst.flags = DST_HOST;
857
858#ifdef CONFIG_IPV6_SUBTREES
859 ipv6_addr_prefix(&rt->rt6i_src.addr,
860 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
861 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
862#endif
863
864 rt->rt6i_metric = rtmsg->rtmsg_metric;
865
866 /* We cannot add true routes via loopback here,
867 they would result in kernel looping; promote them to reject routes
868 */
869 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
870 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
871 /* hold loopback dev/idev if we haven't done so. */
872 if (dev != &loopback_dev) {
873 if (dev) {
874 dev_put(dev);
875 in6_dev_put(idev);
876 }
877 dev = &loopback_dev;
878 dev_hold(dev);
879 idev = in6_dev_get(dev);
880 if (!idev) {
881 err = -ENODEV;
882 goto out;
883 }
884 }
885 rt->u.dst.output = ip6_pkt_discard_out;
886 rt->u.dst.input = ip6_pkt_discard;
887 rt->u.dst.error = -ENETUNREACH;
888 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
889 goto install_route;
890 }
891
892 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
893 struct in6_addr *gw_addr;
894 int gwa_type;
895
896 gw_addr = &rtmsg->rtmsg_gateway;
897 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
898 gwa_type = ipv6_addr_type(gw_addr);
899
900 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
901 struct rt6_info *grt;
902
903 /* IPv6 strictly inhibits using not link-local
904 addresses as nexthop address.
905 Otherwise, router will not able to send redirects.
906 It is very good, but in some (rare!) circumstances
907 (SIT, PtP, NBMA NOARP links) it is handy to allow
908 some exceptions. --ANK
909 */
910 err = -EINVAL;
911 if (!(gwa_type&IPV6_ADDR_UNICAST))
912 goto out;
913
914 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
915
916 err = -EHOSTUNREACH;
917 if (grt == NULL)
918 goto out;
919 if (dev) {
920 if (dev != grt->rt6i_dev) {
921 dst_release(&grt->u.dst);
922 goto out;
923 }
924 } else {
925 dev = grt->rt6i_dev;
926 idev = grt->rt6i_idev;
927 dev_hold(dev);
928 in6_dev_hold(grt->rt6i_idev);
929 }
930 if (!(grt->rt6i_flags&RTF_GATEWAY))
931 err = 0;
932 dst_release(&grt->u.dst);
933
934 if (err)
935 goto out;
936 }
937 err = -EINVAL;
938 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
939 goto out;
940 }
941
942 err = -ENODEV;
943 if (dev == NULL)
944 goto out;
945
946 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
947 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
948 if (IS_ERR(rt->rt6i_nexthop)) {
949 err = PTR_ERR(rt->rt6i_nexthop);
950 rt->rt6i_nexthop = NULL;
951 goto out;
952 }
953 }
954
955 rt->rt6i_flags = rtmsg->rtmsg_flags;
956
957install_route:
958 if (rta && rta[RTA_METRICS-1]) {
959 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
960 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
961
962 while (RTA_OK(attr, attrlen)) {
963 unsigned flavor = attr->rta_type;
964 if (flavor) {
965 if (flavor > RTAX_MAX) {
966 err = -EINVAL;
967 goto out;
968 }
969 rt->u.dst.metrics[flavor-1] =
970 *(u32 *)RTA_DATA(attr);
971 }
972 attr = RTA_NEXT(attr, attrlen);
973 }
974 }
975
976 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
977 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
978 if (!rt->u.dst.metrics[RTAX_MTU-1])
979 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
980 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
981 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
982 rt->u.dst.dev = dev;
983 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700984 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985
986out:
987 if (dev)
988 dev_put(dev);
989 if (idev)
990 in6_dev_put(idev);
991 if (rt)
992 dst_free((struct dst_entry *) rt);
993 return err;
994}
995
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700996int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997{
998 int err;
999
1000 write_lock_bh(&rt6_lock);
1001
1002 rt6_reset_dflt_pointer(NULL);
1003
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001004 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 dst_release(&rt->u.dst);
1006
1007 write_unlock_bh(&rt6_lock);
1008
1009 return err;
1010}
1011
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001012static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013{
1014 struct fib6_node *fn;
1015 struct rt6_info *rt;
1016 int err = -ESRCH;
1017
1018 read_lock_bh(&rt6_lock);
1019
1020 fn = fib6_locate(&ip6_routing_table,
1021 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1022 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1023
1024 if (fn) {
1025 for (rt = fn->leaf; rt; rt = rt->u.next) {
1026 if (rtmsg->rtmsg_ifindex &&
1027 (rt->rt6i_dev == NULL ||
1028 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1029 continue;
1030 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1031 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1032 continue;
1033 if (rtmsg->rtmsg_metric &&
1034 rtmsg->rtmsg_metric != rt->rt6i_metric)
1035 continue;
1036 dst_hold(&rt->u.dst);
1037 read_unlock_bh(&rt6_lock);
1038
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001039 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 }
1041 }
1042 read_unlock_bh(&rt6_lock);
1043
1044 return err;
1045}
1046
1047/*
1048 * Handle redirects
1049 */
1050void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1051 struct neighbour *neigh, u8 *lladdr, int on_link)
1052{
1053 struct rt6_info *rt, *nrt;
1054
1055 /* Locate old route to this destination. */
1056 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1057
1058 if (rt == NULL)
1059 return;
1060
1061 if (neigh->dev != rt->rt6i_dev)
1062 goto out;
1063
1064 /*
1065 * Current route is on-link; redirect is always invalid.
1066 *
1067 * Seems, previous statement is not true. It could
1068 * be node, which looks for us as on-link (f.e. proxy ndisc)
1069 * But then router serving it might decide, that we should
1070 * know truth 8)8) --ANK (980726).
1071 */
1072 if (!(rt->rt6i_flags&RTF_GATEWAY))
1073 goto out;
1074
1075 /*
1076 * RFC 2461 specifies that redirects should only be
1077 * accepted if they come from the nexthop to the target.
1078 * Due to the way default routers are chosen, this notion
1079 * is a bit fuzzy and one might need to check all default
1080 * routers.
1081 */
1082 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1083 if (rt->rt6i_flags & RTF_DEFAULT) {
1084 struct rt6_info *rt1;
1085
1086 read_lock(&rt6_lock);
1087 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1088 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1089 dst_hold(&rt1->u.dst);
1090 dst_release(&rt->u.dst);
1091 read_unlock(&rt6_lock);
1092 rt = rt1;
1093 goto source_ok;
1094 }
1095 }
1096 read_unlock(&rt6_lock);
1097 }
1098 if (net_ratelimit())
1099 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1100 "for redirect target\n");
1101 goto out;
1102 }
1103
1104source_ok:
1105
1106 /*
1107 * We have finally decided to accept it.
1108 */
1109
1110 neigh_update(neigh, lladdr, NUD_STALE,
1111 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1112 NEIGH_UPDATE_F_OVERRIDE|
1113 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1114 NEIGH_UPDATE_F_ISROUTER))
1115 );
1116
1117 /*
1118 * Redirect received -> path was valid.
1119 * Look, redirects are sent only in response to data packets,
1120 * so that this nexthop apparently is reachable. --ANK
1121 */
1122 dst_confirm(&rt->u.dst);
1123
1124 /* Duplicate redirect: silently ignore. */
1125 if (neigh == rt->u.dst.neighbour)
1126 goto out;
1127
1128 nrt = ip6_rt_copy(rt);
1129 if (nrt == NULL)
1130 goto out;
1131
1132 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1133 if (on_link)
1134 nrt->rt6i_flags &= ~RTF_GATEWAY;
1135
1136 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1137 nrt->rt6i_dst.plen = 128;
1138 nrt->u.dst.flags |= DST_HOST;
1139
1140 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1141 nrt->rt6i_nexthop = neigh_clone(neigh);
1142 /* Reset pmtu, it may be better */
1143 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1144 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1145
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001146 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 goto out;
1148
1149 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001150 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 return;
1152 }
1153
1154out:
1155 dst_release(&rt->u.dst);
1156 return;
1157}
1158
1159/*
1160 * Handle ICMP "packet too big" messages
1161 * i.e. Path MTU discovery
1162 */
1163
1164void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1165 struct net_device *dev, u32 pmtu)
1166{
1167 struct rt6_info *rt, *nrt;
1168 int allfrag = 0;
1169
1170 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1171 if (rt == NULL)
1172 return;
1173
1174 if (pmtu >= dst_mtu(&rt->u.dst))
1175 goto out;
1176
1177 if (pmtu < IPV6_MIN_MTU) {
1178 /*
1179 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1180 * MTU (1280) and a fragment header should always be included
1181 * after a node receiving Too Big message reporting PMTU is
1182 * less than the IPv6 Minimum Link MTU.
1183 */
1184 pmtu = IPV6_MIN_MTU;
1185 allfrag = 1;
1186 }
1187
1188 /* New mtu received -> path was valid.
1189 They are sent only in response to data packets,
1190 so that this nexthop apparently is reachable. --ANK
1191 */
1192 dst_confirm(&rt->u.dst);
1193
1194 /* Host route. If it is static, it would be better
1195 not to override it, but add new one, so that
1196 when cache entry will expire old pmtu
1197 would return automatically.
1198 */
1199 if (rt->rt6i_flags & RTF_CACHE) {
1200 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1201 if (allfrag)
1202 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1203 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1204 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1205 goto out;
1206 }
1207
1208 /* Network route.
1209 Two cases are possible:
1210 1. It is connected route. Action: COW
1211 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1212 */
1213 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001214 nrt = rt6_cow(rt, daddr, saddr, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 if (!nrt->u.dst.error) {
1216 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1217 if (allfrag)
1218 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1219 /* According to RFC 1981, detecting PMTU increase shouldn't be
1220 happened within 5 mins, the recommended timer is 10 mins.
1221 Here this route expiration time is set to ip6_rt_mtu_expires
1222 which is 10 mins. After 10 mins the decreased pmtu is expired
1223 and detecting PMTU increase will be automatically happened.
1224 */
1225 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1226 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1227 }
1228 dst_release(&nrt->u.dst);
1229 } else {
1230 nrt = ip6_rt_copy(rt);
1231 if (nrt == NULL)
1232 goto out;
1233 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1234 nrt->rt6i_dst.plen = 128;
1235 nrt->u.dst.flags |= DST_HOST;
1236 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1237 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1238 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1239 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1240 if (allfrag)
1241 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001242 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 }
1244
1245out:
1246 dst_release(&rt->u.dst);
1247}
1248
1249/*
1250 * Misc support functions
1251 */
1252
1253static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1254{
1255 struct rt6_info *rt = ip6_dst_alloc();
1256
1257 if (rt) {
1258 rt->u.dst.input = ort->u.dst.input;
1259 rt->u.dst.output = ort->u.dst.output;
1260
1261 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1262 rt->u.dst.dev = ort->u.dst.dev;
1263 if (rt->u.dst.dev)
1264 dev_hold(rt->u.dst.dev);
1265 rt->rt6i_idev = ort->rt6i_idev;
1266 if (rt->rt6i_idev)
1267 in6_dev_hold(rt->rt6i_idev);
1268 rt->u.dst.lastuse = jiffies;
1269 rt->rt6i_expires = 0;
1270
1271 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1272 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1273 rt->rt6i_metric = 0;
1274
1275 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1276#ifdef CONFIG_IPV6_SUBTREES
1277 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1278#endif
1279 }
1280 return rt;
1281}
1282
1283struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1284{
1285 struct rt6_info *rt;
1286 struct fib6_node *fn;
1287
1288 fn = &ip6_routing_table;
1289
1290 write_lock_bh(&rt6_lock);
1291 for (rt = fn->leaf; rt; rt=rt->u.next) {
1292 if (dev == rt->rt6i_dev &&
1293 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1294 break;
1295 }
1296 if (rt)
1297 dst_hold(&rt->u.dst);
1298 write_unlock_bh(&rt6_lock);
1299 return rt;
1300}
1301
1302struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1303 struct net_device *dev)
1304{
1305 struct in6_rtmsg rtmsg;
1306
1307 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1308 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1309 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1310 rtmsg.rtmsg_metric = 1024;
1311 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1312
1313 rtmsg.rtmsg_ifindex = dev->ifindex;
1314
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001315 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 return rt6_get_dflt_router(gwaddr, dev);
1317}
1318
1319void rt6_purge_dflt_routers(void)
1320{
1321 struct rt6_info *rt;
1322
1323restart:
1324 read_lock_bh(&rt6_lock);
1325 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1326 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1327 dst_hold(&rt->u.dst);
1328
1329 rt6_reset_dflt_pointer(NULL);
1330
1331 read_unlock_bh(&rt6_lock);
1332
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001333 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
1335 goto restart;
1336 }
1337 }
1338 read_unlock_bh(&rt6_lock);
1339}
1340
1341int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1342{
1343 struct in6_rtmsg rtmsg;
1344 int err;
1345
1346 switch(cmd) {
1347 case SIOCADDRT: /* Add a route */
1348 case SIOCDELRT: /* Delete a route */
1349 if (!capable(CAP_NET_ADMIN))
1350 return -EPERM;
1351 err = copy_from_user(&rtmsg, arg,
1352 sizeof(struct in6_rtmsg));
1353 if (err)
1354 return -EFAULT;
1355
1356 rtnl_lock();
1357 switch (cmd) {
1358 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001359 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 break;
1361 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001362 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 break;
1364 default:
1365 err = -EINVAL;
1366 }
1367 rtnl_unlock();
1368
1369 return err;
1370 };
1371
1372 return -EINVAL;
1373}
1374
1375/*
1376 * Drop the packet on the floor
1377 */
1378
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001379static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380{
1381 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1382 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1383 kfree_skb(skb);
1384 return 0;
1385}
1386
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001387static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388{
1389 skb->dev = skb->dst->dev;
1390 return ip6_pkt_discard(skb);
1391}
1392
1393/*
1394 * Allocate a dst for local (unicast / anycast) address.
1395 */
1396
1397struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1398 const struct in6_addr *addr,
1399 int anycast)
1400{
1401 struct rt6_info *rt = ip6_dst_alloc();
1402
1403 if (rt == NULL)
1404 return ERR_PTR(-ENOMEM);
1405
1406 dev_hold(&loopback_dev);
1407 in6_dev_hold(idev);
1408
1409 rt->u.dst.flags = DST_HOST;
1410 rt->u.dst.input = ip6_input;
1411 rt->u.dst.output = ip6_output;
1412 rt->rt6i_dev = &loopback_dev;
1413 rt->rt6i_idev = idev;
1414 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1415 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1416 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1417 rt->u.dst.obsolete = -1;
1418
1419 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001420 if (anycast)
1421 rt->rt6i_flags |= RTF_ANYCAST;
1422 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 rt->rt6i_flags |= RTF_LOCAL;
1424 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1425 if (rt->rt6i_nexthop == NULL) {
1426 dst_free((struct dst_entry *) rt);
1427 return ERR_PTR(-ENOMEM);
1428 }
1429
1430 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1431 rt->rt6i_dst.plen = 128;
1432
1433 atomic_set(&rt->u.dst.__refcnt, 1);
1434
1435 return rt;
1436}
1437
1438static int fib6_ifdown(struct rt6_info *rt, void *arg)
1439{
1440 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1441 rt != &ip6_null_entry) {
1442 RT6_TRACE("deleted by ifdown %p\n", rt);
1443 return -1;
1444 }
1445 return 0;
1446}
1447
1448void rt6_ifdown(struct net_device *dev)
1449{
1450 write_lock_bh(&rt6_lock);
1451 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1452 write_unlock_bh(&rt6_lock);
1453}
1454
1455struct rt6_mtu_change_arg
1456{
1457 struct net_device *dev;
1458 unsigned mtu;
1459};
1460
1461static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1462{
1463 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1464 struct inet6_dev *idev;
1465
1466 /* In IPv6 pmtu discovery is not optional,
1467 so that RTAX_MTU lock cannot disable it.
1468 We still use this lock to block changes
1469 caused by addrconf/ndisc.
1470 */
1471
1472 idev = __in6_dev_get(arg->dev);
1473 if (idev == NULL)
1474 return 0;
1475
1476 /* For administrative MTU increase, there is no way to discover
1477 IPv6 PMTU increase, so PMTU increase should be updated here.
1478 Since RFC 1981 doesn't include administrative MTU increase
1479 update PMTU increase is a MUST. (i.e. jumbo frame)
1480 */
1481 /*
1482 If new MTU is less than route PMTU, this new MTU will be the
1483 lowest MTU in the path, update the route PMTU to reflect PMTU
1484 decreases; if new MTU is greater than route PMTU, and the
1485 old MTU is the lowest MTU in the path, update the route PMTU
1486 to reflect the increase. In this case if the other nodes' MTU
1487 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1488 PMTU discouvery.
1489 */
1490 if (rt->rt6i_dev == arg->dev &&
1491 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1492 (dst_mtu(&rt->u.dst) > arg->mtu ||
1493 (dst_mtu(&rt->u.dst) < arg->mtu &&
1494 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1495 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1496 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1497 return 0;
1498}
1499
1500void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1501{
1502 struct rt6_mtu_change_arg arg;
1503
1504 arg.dev = dev;
1505 arg.mtu = mtu;
1506 read_lock_bh(&rt6_lock);
1507 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1508 read_unlock_bh(&rt6_lock);
1509}
1510
1511static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1512 struct in6_rtmsg *rtmsg)
1513{
1514 memset(rtmsg, 0, sizeof(*rtmsg));
1515
1516 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1517 rtmsg->rtmsg_src_len = r->rtm_src_len;
1518 rtmsg->rtmsg_flags = RTF_UP;
1519 if (r->rtm_type == RTN_UNREACHABLE)
1520 rtmsg->rtmsg_flags |= RTF_REJECT;
1521
1522 if (rta[RTA_GATEWAY-1]) {
1523 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1524 return -EINVAL;
1525 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1526 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1527 }
1528 if (rta[RTA_DST-1]) {
1529 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1530 return -EINVAL;
1531 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1532 }
1533 if (rta[RTA_SRC-1]) {
1534 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1535 return -EINVAL;
1536 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1537 }
1538 if (rta[RTA_OIF-1]) {
1539 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1540 return -EINVAL;
1541 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1542 }
1543 if (rta[RTA_PRIORITY-1]) {
1544 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1545 return -EINVAL;
1546 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1547 }
1548 return 0;
1549}
1550
1551int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1552{
1553 struct rtmsg *r = NLMSG_DATA(nlh);
1554 struct in6_rtmsg rtmsg;
1555
1556 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1557 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001558 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559}
1560
1561int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1562{
1563 struct rtmsg *r = NLMSG_DATA(nlh);
1564 struct in6_rtmsg rtmsg;
1565
1566 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1567 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001568 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569}
1570
1571struct rt6_rtnl_dump_arg
1572{
1573 struct sk_buff *skb;
1574 struct netlink_callback *cb;
1575};
1576
1577static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001578 struct in6_addr *dst, struct in6_addr *src,
1579 int iif, int type, u32 pid, u32 seq,
1580 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581{
1582 struct rtmsg *rtm;
1583 struct nlmsghdr *nlh;
1584 unsigned char *b = skb->tail;
1585 struct rta_cacheinfo ci;
1586
1587 if (prefix) { /* user wants prefix routes only */
1588 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1589 /* success since this is not a prefix route */
1590 return 1;
1591 }
1592 }
1593
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001594 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 rtm = NLMSG_DATA(nlh);
1596 rtm->rtm_family = AF_INET6;
1597 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1598 rtm->rtm_src_len = rt->rt6i_src.plen;
1599 rtm->rtm_tos = 0;
1600 rtm->rtm_table = RT_TABLE_MAIN;
1601 if (rt->rt6i_flags&RTF_REJECT)
1602 rtm->rtm_type = RTN_UNREACHABLE;
1603 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1604 rtm->rtm_type = RTN_LOCAL;
1605 else
1606 rtm->rtm_type = RTN_UNICAST;
1607 rtm->rtm_flags = 0;
1608 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1609 rtm->rtm_protocol = rt->rt6i_protocol;
1610 if (rt->rt6i_flags&RTF_DYNAMIC)
1611 rtm->rtm_protocol = RTPROT_REDIRECT;
1612 else if (rt->rt6i_flags & RTF_ADDRCONF)
1613 rtm->rtm_protocol = RTPROT_KERNEL;
1614 else if (rt->rt6i_flags&RTF_DEFAULT)
1615 rtm->rtm_protocol = RTPROT_RA;
1616
1617 if (rt->rt6i_flags&RTF_CACHE)
1618 rtm->rtm_flags |= RTM_F_CLONED;
1619
1620 if (dst) {
1621 RTA_PUT(skb, RTA_DST, 16, dst);
1622 rtm->rtm_dst_len = 128;
1623 } else if (rtm->rtm_dst_len)
1624 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1625#ifdef CONFIG_IPV6_SUBTREES
1626 if (src) {
1627 RTA_PUT(skb, RTA_SRC, 16, src);
1628 rtm->rtm_src_len = 128;
1629 } else if (rtm->rtm_src_len)
1630 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1631#endif
1632 if (iif)
1633 RTA_PUT(skb, RTA_IIF, 4, &iif);
1634 else if (dst) {
1635 struct in6_addr saddr_buf;
1636 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1637 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1638 }
1639 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1640 goto rtattr_failure;
1641 if (rt->u.dst.neighbour)
1642 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1643 if (rt->u.dst.dev)
1644 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1645 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1646 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1647 if (rt->rt6i_expires)
1648 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1649 else
1650 ci.rta_expires = 0;
1651 ci.rta_used = rt->u.dst.__use;
1652 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1653 ci.rta_error = rt->u.dst.error;
1654 ci.rta_id = 0;
1655 ci.rta_ts = 0;
1656 ci.rta_tsage = 0;
1657 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1658 nlh->nlmsg_len = skb->tail - b;
1659 return skb->len;
1660
1661nlmsg_failure:
1662rtattr_failure:
1663 skb_trim(skb, b - skb->data);
1664 return -1;
1665}
1666
1667static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1668{
1669 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1670 int prefix;
1671
1672 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1673 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1674 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1675 } else
1676 prefix = 0;
1677
1678 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1679 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001680 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681}
1682
1683static int fib6_dump_node(struct fib6_walker_t *w)
1684{
1685 int res;
1686 struct rt6_info *rt;
1687
1688 for (rt = w->leaf; rt; rt = rt->u.next) {
1689 res = rt6_dump_route(rt, w->args);
1690 if (res < 0) {
1691 /* Frame is full, suspend walking */
1692 w->leaf = rt;
1693 return 1;
1694 }
1695 BUG_TRAP(res!=0);
1696 }
1697 w->leaf = NULL;
1698 return 0;
1699}
1700
1701static void fib6_dump_end(struct netlink_callback *cb)
1702{
1703 struct fib6_walker_t *w = (void*)cb->args[0];
1704
1705 if (w) {
1706 cb->args[0] = 0;
1707 fib6_walker_unlink(w);
1708 kfree(w);
1709 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001710 cb->done = (void*)cb->args[1];
1711 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712}
1713
1714static int fib6_dump_done(struct netlink_callback *cb)
1715{
1716 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001717 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718}
1719
1720int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1721{
1722 struct rt6_rtnl_dump_arg arg;
1723 struct fib6_walker_t *w;
1724 int res;
1725
1726 arg.skb = skb;
1727 arg.cb = cb;
1728
1729 w = (void*)cb->args[0];
1730 if (w == NULL) {
1731 /* New dump:
1732 *
1733 * 1. hook callback destructor.
1734 */
1735 cb->args[1] = (long)cb->done;
1736 cb->done = fib6_dump_done;
1737
1738 /*
1739 * 2. allocate and initialize walker.
1740 */
David S. Miller9e147a12005-11-17 16:52:51 -08001741 w = kmalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 if (w == NULL)
1743 return -ENOMEM;
1744 RT6_TRACE("dump<%p", w);
1745 memset(w, 0, sizeof(*w));
1746 w->root = &ip6_routing_table;
1747 w->func = fib6_dump_node;
1748 w->args = &arg;
1749 cb->args[0] = (long)w;
1750 read_lock_bh(&rt6_lock);
1751 res = fib6_walk(w);
1752 read_unlock_bh(&rt6_lock);
1753 } else {
1754 w->args = &arg;
1755 read_lock_bh(&rt6_lock);
1756 res = fib6_walk_continue(w);
1757 read_unlock_bh(&rt6_lock);
1758 }
1759#if RT6_DEBUG >= 3
1760 if (res <= 0 && skb->len == 0)
1761 RT6_TRACE("%p>dump end\n", w);
1762#endif
1763 res = res < 0 ? res : skb->len;
1764 /* res < 0 is an error. (really, impossible)
1765 res == 0 means that dump is complete, but skb still can contain data.
1766 res > 0 dump is not complete, but frame is full.
1767 */
1768 /* Destroy walker, if dump of this table is complete. */
1769 if (res <= 0)
1770 fib6_dump_end(cb);
1771 return res;
1772}
1773
1774int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1775{
1776 struct rtattr **rta = arg;
1777 int iif = 0;
1778 int err = -ENOBUFS;
1779 struct sk_buff *skb;
1780 struct flowi fl;
1781 struct rt6_info *rt;
1782
1783 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1784 if (skb == NULL)
1785 goto out;
1786
1787 /* Reserve room for dummy headers, this skb can pass
1788 through good chunk of routing engine.
1789 */
1790 skb->mac.raw = skb->data;
1791 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1792
1793 memset(&fl, 0, sizeof(fl));
1794 if (rta[RTA_SRC-1])
1795 ipv6_addr_copy(&fl.fl6_src,
1796 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1797 if (rta[RTA_DST-1])
1798 ipv6_addr_copy(&fl.fl6_dst,
1799 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1800
1801 if (rta[RTA_IIF-1])
1802 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1803
1804 if (iif) {
1805 struct net_device *dev;
1806 dev = __dev_get_by_index(iif);
1807 if (!dev) {
1808 err = -ENODEV;
1809 goto out_free;
1810 }
1811 }
1812
1813 fl.oif = 0;
1814 if (rta[RTA_OIF-1])
1815 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1816
1817 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1818
1819 skb->dst = &rt->u.dst;
1820
1821 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1822 err = rt6_fill_node(skb, rt,
1823 &fl.fl6_dst, &fl.fl6_src,
1824 iif,
1825 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001826 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 if (err < 0) {
1828 err = -EMSGSIZE;
1829 goto out_free;
1830 }
1831
1832 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1833 if (err > 0)
1834 err = 0;
1835out:
1836 return err;
1837out_free:
1838 kfree_skb(skb);
1839 goto out;
1840}
1841
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001842void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1843 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844{
1845 struct sk_buff *skb;
1846 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001847 u32 pid = current->pid;
1848 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001850 if (req)
1851 pid = req->pid;
1852 if (nlh)
1853 seq = nlh->nlmsg_seq;
1854
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855 skb = alloc_skb(size, gfp_any());
1856 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001857 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 return;
1859 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001860 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07001862 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 return;
1864 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07001865 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1866 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867}
1868
1869/*
1870 * /proc
1871 */
1872
1873#ifdef CONFIG_PROC_FS
1874
1875#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1876
1877struct rt6_proc_arg
1878{
1879 char *buffer;
1880 int offset;
1881 int length;
1882 int skip;
1883 int len;
1884};
1885
1886static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1887{
1888 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1889 int i;
1890
1891 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1892 arg->skip++;
1893 return 0;
1894 }
1895
1896 if (arg->len >= arg->length)
1897 return 0;
1898
1899 for (i=0; i<16; i++) {
1900 sprintf(arg->buffer + arg->len, "%02x",
1901 rt->rt6i_dst.addr.s6_addr[i]);
1902 arg->len += 2;
1903 }
1904 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1905 rt->rt6i_dst.plen);
1906
1907#ifdef CONFIG_IPV6_SUBTREES
1908 for (i=0; i<16; i++) {
1909 sprintf(arg->buffer + arg->len, "%02x",
1910 rt->rt6i_src.addr.s6_addr[i]);
1911 arg->len += 2;
1912 }
1913 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1914 rt->rt6i_src.plen);
1915#else
1916 sprintf(arg->buffer + arg->len,
1917 "00000000000000000000000000000000 00 ");
1918 arg->len += 36;
1919#endif
1920
1921 if (rt->rt6i_nexthop) {
1922 for (i=0; i<16; i++) {
1923 sprintf(arg->buffer + arg->len, "%02x",
1924 rt->rt6i_nexthop->primary_key[i]);
1925 arg->len += 2;
1926 }
1927 } else {
1928 sprintf(arg->buffer + arg->len,
1929 "00000000000000000000000000000000");
1930 arg->len += 32;
1931 }
1932 arg->len += sprintf(arg->buffer + arg->len,
1933 " %08x %08x %08x %08x %8s\n",
1934 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1935 rt->u.dst.__use, rt->rt6i_flags,
1936 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1937 return 0;
1938}
1939
1940static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1941{
1942 struct rt6_proc_arg arg;
1943 arg.buffer = buffer;
1944 arg.offset = offset;
1945 arg.length = length;
1946 arg.skip = 0;
1947 arg.len = 0;
1948
1949 read_lock_bh(&rt6_lock);
1950 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1951 read_unlock_bh(&rt6_lock);
1952
1953 *start = buffer;
1954 if (offset)
1955 *start += offset % RT6_INFO_LEN;
1956
1957 arg.len -= offset % RT6_INFO_LEN;
1958
1959 if (arg.len > length)
1960 arg.len = length;
1961 if (arg.len < 0)
1962 arg.len = 0;
1963
1964 return arg.len;
1965}
1966
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1968{
1969 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1970 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1971 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1972 rt6_stats.fib_rt_cache,
1973 atomic_read(&ip6_dst_ops.entries),
1974 rt6_stats.fib_discarded_routes);
1975
1976 return 0;
1977}
1978
1979static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1980{
1981 return single_open(file, rt6_stats_seq_show, NULL);
1982}
1983
1984static struct file_operations rt6_stats_seq_fops = {
1985 .owner = THIS_MODULE,
1986 .open = rt6_stats_seq_open,
1987 .read = seq_read,
1988 .llseek = seq_lseek,
1989 .release = single_release,
1990};
1991#endif /* CONFIG_PROC_FS */
1992
1993#ifdef CONFIG_SYSCTL
1994
1995static int flush_delay;
1996
1997static
1998int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1999 void __user *buffer, size_t *lenp, loff_t *ppos)
2000{
2001 if (write) {
2002 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2003 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2004 return 0;
2005 } else
2006 return -EINVAL;
2007}
2008
2009ctl_table ipv6_route_table[] = {
2010 {
2011 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2012 .procname = "flush",
2013 .data = &flush_delay,
2014 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002015 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 .proc_handler = &ipv6_sysctl_rtcache_flush
2017 },
2018 {
2019 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2020 .procname = "gc_thresh",
2021 .data = &ip6_dst_ops.gc_thresh,
2022 .maxlen = sizeof(int),
2023 .mode = 0644,
2024 .proc_handler = &proc_dointvec,
2025 },
2026 {
2027 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2028 .procname = "max_size",
2029 .data = &ip6_rt_max_size,
2030 .maxlen = sizeof(int),
2031 .mode = 0644,
2032 .proc_handler = &proc_dointvec,
2033 },
2034 {
2035 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2036 .procname = "gc_min_interval",
2037 .data = &ip6_rt_gc_min_interval,
2038 .maxlen = sizeof(int),
2039 .mode = 0644,
2040 .proc_handler = &proc_dointvec_jiffies,
2041 .strategy = &sysctl_jiffies,
2042 },
2043 {
2044 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2045 .procname = "gc_timeout",
2046 .data = &ip6_rt_gc_timeout,
2047 .maxlen = sizeof(int),
2048 .mode = 0644,
2049 .proc_handler = &proc_dointvec_jiffies,
2050 .strategy = &sysctl_jiffies,
2051 },
2052 {
2053 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2054 .procname = "gc_interval",
2055 .data = &ip6_rt_gc_interval,
2056 .maxlen = sizeof(int),
2057 .mode = 0644,
2058 .proc_handler = &proc_dointvec_jiffies,
2059 .strategy = &sysctl_jiffies,
2060 },
2061 {
2062 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2063 .procname = "gc_elasticity",
2064 .data = &ip6_rt_gc_elasticity,
2065 .maxlen = sizeof(int),
2066 .mode = 0644,
2067 .proc_handler = &proc_dointvec_jiffies,
2068 .strategy = &sysctl_jiffies,
2069 },
2070 {
2071 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2072 .procname = "mtu_expires",
2073 .data = &ip6_rt_mtu_expires,
2074 .maxlen = sizeof(int),
2075 .mode = 0644,
2076 .proc_handler = &proc_dointvec_jiffies,
2077 .strategy = &sysctl_jiffies,
2078 },
2079 {
2080 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2081 .procname = "min_adv_mss",
2082 .data = &ip6_rt_min_advmss,
2083 .maxlen = sizeof(int),
2084 .mode = 0644,
2085 .proc_handler = &proc_dointvec_jiffies,
2086 .strategy = &sysctl_jiffies,
2087 },
2088 {
2089 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2090 .procname = "gc_min_interval_ms",
2091 .data = &ip6_rt_gc_min_interval,
2092 .maxlen = sizeof(int),
2093 .mode = 0644,
2094 .proc_handler = &proc_dointvec_ms_jiffies,
2095 .strategy = &sysctl_ms_jiffies,
2096 },
2097 { .ctl_name = 0 }
2098};
2099
2100#endif
2101
2102void __init ip6_route_init(void)
2103{
2104 struct proc_dir_entry *p;
2105
2106 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2107 sizeof(struct rt6_info),
2108 0, SLAB_HWCACHE_ALIGN,
2109 NULL, NULL);
2110 if (!ip6_dst_ops.kmem_cachep)
2111 panic("cannot create ip6_dst_cache");
2112
2113 fib6_init();
2114#ifdef CONFIG_PROC_FS
2115 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2116 if (p)
2117 p->owner = THIS_MODULE;
2118
2119 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2120#endif
2121#ifdef CONFIG_XFRM
2122 xfrm6_init();
2123#endif
2124}
2125
2126void ip6_route_cleanup(void)
2127{
2128#ifdef CONFIG_PROC_FS
2129 proc_net_remove("ipv6_route");
2130 proc_net_remove("rt6_stats");
2131#endif
2132#ifdef CONFIG_XFRM
2133 xfrm6_fini();
2134#endif
2135 rt6_ifdown(NULL);
2136 fib6_gc_cleanup();
2137 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2138}