blob: 66140f13d1197130c29f5d9fa90c4f10a3bf3819 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
27#include <linux/config.h>
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
74
75static int ip6_rt_max_size = 4096;
76static int ip6_rt_gc_min_interval = HZ / 2;
77static int ip6_rt_gc_timeout = 60*HZ;
78int ip6_rt_gc_interval = 30*HZ;
79static int ip6_rt_gc_elasticity = 9;
80static int ip6_rt_mtu_expires = 10*60*HZ;
81static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
89static int ip6_dst_gc(void);
90
91static int ip6_pkt_discard(struct sk_buff *skb);
92static int ip6_pkt_discard_out(struct sk_buff *skb);
93static void ip6_link_failure(struct sk_buff *skb);
94static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96static struct dst_ops ip6_dst_ops = {
97 .family = AF_INET6,
98 .protocol = __constant_htons(ETH_P_IPV6),
99 .gc = ip6_dst_gc,
100 .gc_thresh = 1024,
101 .check = ip6_dst_check,
102 .destroy = ip6_dst_destroy,
103 .ifdown = ip6_dst_ifdown,
104 .negative_advice = ip6_negative_advice,
105 .link_failure = ip6_link_failure,
106 .update_pmtu = ip6_rt_update_pmtu,
107 .entry_size = sizeof(struct rt6_info),
108};
109
110struct rt6_info ip6_null_entry = {
111 .u = {
112 .dst = {
113 .__refcnt = ATOMIC_INIT(1),
114 .__use = 1,
115 .dev = &loopback_dev,
116 .obsolete = -1,
117 .error = -ENETUNREACH,
118 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
119 .input = ip6_pkt_discard,
120 .output = ip6_pkt_discard_out,
121 .ops = &ip6_dst_ops,
122 .path = (struct dst_entry*)&ip6_null_entry,
123 }
124 },
125 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
126 .rt6i_metric = ~(u32) 0,
127 .rt6i_ref = ATOMIC_INIT(1),
128};
129
130struct fib6_node ip6_routing_table = {
131 .leaf = &ip6_null_entry,
132 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133};
134
135/* Protects all the ip6 fib */
136
137DEFINE_RWLOCK(rt6_lock);
138
139
140/* allocate dst with ip6_dst_ops */
141static __inline__ struct rt6_info *ip6_dst_alloc(void)
142{
143 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144}
145
146static void ip6_dst_destroy(struct dst_entry *dst)
147{
148 struct rt6_info *rt = (struct rt6_info *)dst;
149 struct inet6_dev *idev = rt->rt6i_idev;
150
151 if (idev != NULL) {
152 rt->rt6i_idev = NULL;
153 in6_dev_put(idev);
154 }
155}
156
157static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 int how)
159{
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
162
163 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 if (loopback_idev != NULL) {
166 rt->rt6i_idev = loopback_idev;
167 in6_dev_put(idev);
168 }
169 }
170}
171
172static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173{
174 return (rt->rt6i_flags & RTF_EXPIRES &&
175 time_after(jiffies, rt->rt6i_expires));
176}
177
178/*
179 * Route lookup. Any rt6_lock is implied.
180 */
181
182static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 int oif,
184 int strict)
185{
186 struct rt6_info *local = NULL;
187 struct rt6_info *sprt;
188
189 if (oif) {
190 for (sprt = rt; sprt; sprt = sprt->u.next) {
191 struct net_device *dev = sprt->rt6i_dev;
192 if (dev->ifindex == oif)
193 return sprt;
194 if (dev->flags & IFF_LOOPBACK) {
195 if (sprt->rt6i_idev == NULL ||
196 sprt->rt6i_idev->dev->ifindex != oif) {
197 if (strict && oif)
198 continue;
199 if (local && (!oif ||
200 local->rt6i_idev->dev->ifindex == oif))
201 continue;
202 }
203 local = sprt;
204 }
205 }
206
207 if (local)
208 return local;
209
210 if (strict)
211 return &ip6_null_entry;
212 }
213 return rt;
214}
215
216/*
217 * pointer to the last default router chosen. BH is disabled locally.
218 */
219static struct rt6_info *rt6_dflt_pointer;
220static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222void rt6_reset_dflt_pointer(struct rt6_info *rt)
223{
224 spin_lock_bh(&rt6_dflt_lock);
225 if (rt == NULL || rt == rt6_dflt_pointer) {
226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 rt6_dflt_pointer = NULL;
228 }
229 spin_unlock_bh(&rt6_dflt_lock);
230}
231
232/* Default Router Selection (RFC 2461 6.3.6) */
233static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234{
235 struct rt6_info *match = NULL;
236 struct rt6_info *sprt;
237 int mpri = 0;
238
239 for (sprt = rt; sprt; sprt = sprt->u.next) {
240 struct neighbour *neigh;
241 int m = 0;
242
243 if (!oif ||
244 (sprt->rt6i_dev &&
245 sprt->rt6i_dev->ifindex == oif))
246 m += 8;
247
248 if (rt6_check_expired(sprt))
249 continue;
250
251 if (sprt == rt6_dflt_pointer)
252 m += 4;
253
254 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 read_lock_bh(&neigh->lock);
256 switch (neigh->nud_state) {
257 case NUD_REACHABLE:
258 m += 3;
259 break;
260
261 case NUD_STALE:
262 case NUD_DELAY:
263 case NUD_PROBE:
264 m += 2;
265 break;
266
267 case NUD_NOARP:
268 case NUD_PERMANENT:
269 m += 1;
270 break;
271
272 case NUD_INCOMPLETE:
273 default:
274 read_unlock_bh(&neigh->lock);
275 continue;
276 }
277 read_unlock_bh(&neigh->lock);
278 } else {
279 continue;
280 }
281
282 if (m > mpri || m >= 12) {
283 match = sprt;
284 mpri = m;
285 if (m >= 12) {
286 /* we choose the last default router if it
287 * is in (probably) reachable state.
288 * If route changed, we should do pmtu
289 * discovery. --yoshfuji
290 */
291 break;
292 }
293 }
294 }
295
296 spin_lock(&rt6_dflt_lock);
297 if (!match) {
298 /*
299 * No default routers are known to be reachable.
300 * SHOULD round robin
301 */
302 if (rt6_dflt_pointer) {
303 for (sprt = rt6_dflt_pointer->u.next;
304 sprt; sprt = sprt->u.next) {
305 if (sprt->u.dst.obsolete <= 0 &&
306 sprt->u.dst.error == 0 &&
307 !rt6_check_expired(sprt)) {
308 match = sprt;
309 break;
310 }
311 }
312 for (sprt = rt;
313 !match && sprt;
314 sprt = sprt->u.next) {
315 if (sprt->u.dst.obsolete <= 0 &&
316 sprt->u.dst.error == 0 &&
317 !rt6_check_expired(sprt)) {
318 match = sprt;
319 break;
320 }
321 if (sprt == rt6_dflt_pointer)
322 break;
323 }
324 }
325 }
326
327 if (match) {
328 if (rt6_dflt_pointer != match)
329 RT6_TRACE("changed default router: %p->%p\n",
330 rt6_dflt_pointer, match);
331 rt6_dflt_pointer = match;
332 }
333 spin_unlock(&rt6_dflt_lock);
334
335 if (!match) {
336 /*
337 * Last Resort: if no default routers found,
338 * use addrconf default route.
339 * We don't record this route.
340 */
341 for (sprt = ip6_routing_table.leaf;
342 sprt; sprt = sprt->u.next) {
343 if (!rt6_check_expired(sprt) &&
344 (sprt->rt6i_flags & RTF_DEFAULT) &&
345 (!oif ||
346 (sprt->rt6i_dev &&
347 sprt->rt6i_dev->ifindex == oif))) {
348 match = sprt;
349 break;
350 }
351 }
352 if (!match) {
353 /* no default route. give up. */
354 match = &ip6_null_entry;
355 }
356 }
357
358 return match;
359}
360
361struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 int oif, int strict)
363{
364 struct fib6_node *fn;
365 struct rt6_info *rt;
366
367 read_lock_bh(&rt6_lock);
368 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 rt = rt6_device_match(fn->leaf, oif, strict);
370 dst_hold(&rt->u.dst);
371 rt->u.dst.__use++;
372 read_unlock_bh(&rt6_lock);
373
374 rt->u.dst.lastuse = jiffies;
375 if (rt->u.dst.error == 0)
376 return rt;
377 dst_release(&rt->u.dst);
378 return NULL;
379}
380
381/* ip6_ins_rt is called with FREE rt6_lock.
382 It takes new route entry, the addition fails by any reason the
383 route is freed. In any case, if caller does not hold it, it may
384 be destroyed.
385 */
386
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700387int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389{
390 int err;
391
392 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700393 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 write_unlock_bh(&rt6_lock);
395
396 return err;
397}
398
399/* No rt6_lock! If COW failed, the function returns dead route entry
400 with dst->error set to errno value.
401 */
402
403static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700404 struct in6_addr *saddr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 int err;
407 struct rt6_info *rt;
408
409 /*
410 * Clone the route.
411 */
412
413 rt = ip6_rt_copy(ort);
414
415 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900416 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417 if (rt->rt6i_dst.plen != 128 &&
418 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900421 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900423 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 rt->rt6i_dst.plen = 128;
425 rt->rt6i_flags |= RTF_CACHE;
426 rt->u.dst.flags |= DST_HOST;
427
428#ifdef CONFIG_IPV6_SUBTREES
429 if (rt->rt6i_src.plen && saddr) {
430 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431 rt->rt6i_src.plen = 128;
432 }
433#endif
434
435 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436
437 dst_hold(&rt->u.dst);
438
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700439 err = ip6_ins_rt(rt, NULL, NULL, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 if (err == 0)
441 return rt;
442
443 rt->u.dst.error = err;
444
445 return rt;
446 }
447 dst_hold(&ip6_null_entry.u.dst);
448 return &ip6_null_entry;
449}
450
451#define BACKTRACK() \
452if (rt == &ip6_null_entry && strict) { \
453 while ((fn = fn->parent) != NULL) { \
454 if (fn->fn_flags & RTN_ROOT) { \
455 dst_hold(&rt->u.dst); \
456 goto out; \
457 } \
458 if (fn->fn_flags & RTN_RTINFO) \
459 goto restart; \
460 } \
461}
462
463
464void ip6_route_input(struct sk_buff *skb)
465{
466 struct fib6_node *fn;
467 struct rt6_info *rt;
468 int strict;
469 int attempts = 3;
470
471 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
472
473relookup:
474 read_lock_bh(&rt6_lock);
475
476 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
477 &skb->nh.ipv6h->saddr);
478
479restart:
480 rt = fn->leaf;
481
482 if ((rt->rt6i_flags & RTF_CACHE)) {
483 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
484 BACKTRACK();
485 dst_hold(&rt->u.dst);
486 goto out;
487 }
488
Yan Zheng9d17f212005-10-28 15:12:00 -0700489 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 BACKTRACK();
491
492 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
493 struct rt6_info *nrt;
494 dst_hold(&rt->u.dst);
495 read_unlock_bh(&rt6_lock);
496
497 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700498 &skb->nh.ipv6h->saddr,
499 &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
501 dst_release(&rt->u.dst);
502 rt = nrt;
503
504 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
505 goto out2;
506
507 /* Race condition! In the gap, when rt6_lock was
508 released someone could insert this route. Relookup.
509 */
510 dst_release(&rt->u.dst);
511 goto relookup;
512 }
513 dst_hold(&rt->u.dst);
514
515out:
516 read_unlock_bh(&rt6_lock);
517out2:
518 rt->u.dst.lastuse = jiffies;
519 rt->u.dst.__use++;
520 skb->dst = (struct dst_entry *) rt;
521}
522
523struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
524{
525 struct fib6_node *fn;
526 struct rt6_info *rt;
527 int strict;
528 int attempts = 3;
529
530 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
531
532relookup:
533 read_lock_bh(&rt6_lock);
534
535 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
536
537restart:
538 rt = fn->leaf;
539
540 if ((rt->rt6i_flags & RTF_CACHE)) {
541 rt = rt6_device_match(rt, fl->oif, strict);
542 BACKTRACK();
543 dst_hold(&rt->u.dst);
544 goto out;
545 }
546 if (rt->rt6i_flags & RTF_DEFAULT) {
547 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
548 rt = rt6_best_dflt(rt, fl->oif);
549 } else {
550 rt = rt6_device_match(rt, fl->oif, strict);
551 BACKTRACK();
552 }
553
554 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
555 struct rt6_info *nrt;
556 dst_hold(&rt->u.dst);
557 read_unlock_bh(&rt6_lock);
558
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700559 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560
561 dst_release(&rt->u.dst);
562 rt = nrt;
563
564 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
565 goto out2;
566
567 /* Race condition! In the gap, when rt6_lock was
568 released someone could insert this route. Relookup.
569 */
570 dst_release(&rt->u.dst);
571 goto relookup;
572 }
573 dst_hold(&rt->u.dst);
574
575out:
576 read_unlock_bh(&rt6_lock);
577out2:
578 rt->u.dst.lastuse = jiffies;
579 rt->u.dst.__use++;
580 return &rt->u.dst;
581}
582
583
584/*
585 * Destination cache support functions
586 */
587
588static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
589{
590 struct rt6_info *rt;
591
592 rt = (struct rt6_info *) dst;
593
594 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
595 return dst;
596
597 return NULL;
598}
599
600static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
601{
602 struct rt6_info *rt = (struct rt6_info *) dst;
603
604 if (rt) {
605 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700606 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 else
608 dst_release(dst);
609 }
610 return NULL;
611}
612
613static void ip6_link_failure(struct sk_buff *skb)
614{
615 struct rt6_info *rt;
616
617 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
618
619 rt = (struct rt6_info *) skb->dst;
620 if (rt) {
621 if (rt->rt6i_flags&RTF_CACHE) {
622 dst_set_expires(&rt->u.dst, 0);
623 rt->rt6i_flags |= RTF_EXPIRES;
624 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
625 rt->rt6i_node->fn_sernum = -1;
626 }
627}
628
629static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
630{
631 struct rt6_info *rt6 = (struct rt6_info*)dst;
632
633 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
634 rt6->rt6i_flags |= RTF_MODIFIED;
635 if (mtu < IPV6_MIN_MTU) {
636 mtu = IPV6_MIN_MTU;
637 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
638 }
639 dst->metrics[RTAX_MTU-1] = mtu;
640 }
641}
642
643/* Protected by rt6_lock. */
644static struct dst_entry *ndisc_dst_gc_list;
645static int ipv6_get_mtu(struct net_device *dev);
646
647static inline unsigned int ipv6_advmss(unsigned int mtu)
648{
649 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
650
651 if (mtu < ip6_rt_min_advmss)
652 mtu = ip6_rt_min_advmss;
653
654 /*
655 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
656 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
657 * IPV6_MAXPLEN is also valid and means: "any MSS,
658 * rely only on pmtu discovery"
659 */
660 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
661 mtu = IPV6_MAXPLEN;
662 return mtu;
663}
664
665struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
666 struct neighbour *neigh,
667 struct in6_addr *addr,
668 int (*output)(struct sk_buff *))
669{
670 struct rt6_info *rt;
671 struct inet6_dev *idev = in6_dev_get(dev);
672
673 if (unlikely(idev == NULL))
674 return NULL;
675
676 rt = ip6_dst_alloc();
677 if (unlikely(rt == NULL)) {
678 in6_dev_put(idev);
679 goto out;
680 }
681
682 dev_hold(dev);
683 if (neigh)
684 neigh_hold(neigh);
685 else
686 neigh = ndisc_get_neigh(dev, addr);
687
688 rt->rt6i_dev = dev;
689 rt->rt6i_idev = idev;
690 rt->rt6i_nexthop = neigh;
691 atomic_set(&rt->u.dst.__refcnt, 1);
692 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
693 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
694 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
695 rt->u.dst.output = output;
696
697#if 0 /* there's no chance to use these for ndisc */
698 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
699 ? DST_HOST
700 : 0;
701 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
702 rt->rt6i_dst.plen = 128;
703#endif
704
705 write_lock_bh(&rt6_lock);
706 rt->u.dst.next = ndisc_dst_gc_list;
707 ndisc_dst_gc_list = &rt->u.dst;
708 write_unlock_bh(&rt6_lock);
709
710 fib6_force_start_gc();
711
712out:
713 return (struct dst_entry *)rt;
714}
715
716int ndisc_dst_gc(int *more)
717{
718 struct dst_entry *dst, *next, **pprev;
719 int freed;
720
721 next = NULL;
722 pprev = &ndisc_dst_gc_list;
723 freed = 0;
724 while ((dst = *pprev) != NULL) {
725 if (!atomic_read(&dst->__refcnt)) {
726 *pprev = dst->next;
727 dst_free(dst);
728 freed++;
729 } else {
730 pprev = &dst->next;
731 (*more)++;
732 }
733 }
734
735 return freed;
736}
737
738static int ip6_dst_gc(void)
739{
740 static unsigned expire = 30*HZ;
741 static unsigned long last_gc;
742 unsigned long now = jiffies;
743
744 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
745 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
746 goto out;
747
748 expire++;
749 fib6_run_gc(expire);
750 last_gc = now;
751 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
752 expire = ip6_rt_gc_timeout>>1;
753
754out:
755 expire -= expire>>ip6_rt_gc_elasticity;
756 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
757}
758
759/* Clean host part of a prefix. Not necessary in radix tree,
760 but results in cleaner routing tables.
761
762 Remove it only when all the things will work!
763 */
764
765static int ipv6_get_mtu(struct net_device *dev)
766{
767 int mtu = IPV6_MIN_MTU;
768 struct inet6_dev *idev;
769
770 idev = in6_dev_get(dev);
771 if (idev) {
772 mtu = idev->cnf.mtu6;
773 in6_dev_put(idev);
774 }
775 return mtu;
776}
777
778int ipv6_get_hoplimit(struct net_device *dev)
779{
780 int hoplimit = ipv6_devconf.hop_limit;
781 struct inet6_dev *idev;
782
783 idev = in6_dev_get(dev);
784 if (idev) {
785 hoplimit = idev->cnf.hop_limit;
786 in6_dev_put(idev);
787 }
788 return hoplimit;
789}
790
791/*
792 *
793 */
794
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700795int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
796 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797{
798 int err;
799 struct rtmsg *r;
800 struct rtattr **rta;
801 struct rt6_info *rt = NULL;
802 struct net_device *dev = NULL;
803 struct inet6_dev *idev = NULL;
804 int addr_type;
805
806 rta = (struct rtattr **) _rtattr;
807
808 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
809 return -EINVAL;
810#ifndef CONFIG_IPV6_SUBTREES
811 if (rtmsg->rtmsg_src_len)
812 return -EINVAL;
813#endif
814 if (rtmsg->rtmsg_ifindex) {
815 err = -ENODEV;
816 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
817 if (!dev)
818 goto out;
819 idev = in6_dev_get(dev);
820 if (!idev)
821 goto out;
822 }
823
824 if (rtmsg->rtmsg_metric == 0)
825 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
826
827 rt = ip6_dst_alloc();
828
829 if (rt == NULL) {
830 err = -ENOMEM;
831 goto out;
832 }
833
834 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800835 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 if (nlh && (r = NLMSG_DATA(nlh))) {
837 rt->rt6i_protocol = r->rtm_protocol;
838 } else {
839 rt->rt6i_protocol = RTPROT_BOOT;
840 }
841
842 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
843
844 if (addr_type & IPV6_ADDR_MULTICAST)
845 rt->u.dst.input = ip6_mc_input;
846 else
847 rt->u.dst.input = ip6_forward;
848
849 rt->u.dst.output = ip6_output;
850
851 ipv6_addr_prefix(&rt->rt6i_dst.addr,
852 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
853 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
854 if (rt->rt6i_dst.plen == 128)
855 rt->u.dst.flags = DST_HOST;
856
857#ifdef CONFIG_IPV6_SUBTREES
858 ipv6_addr_prefix(&rt->rt6i_src.addr,
859 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
860 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
861#endif
862
863 rt->rt6i_metric = rtmsg->rtmsg_metric;
864
865 /* We cannot add true routes via loopback here,
866 they would result in kernel looping; promote them to reject routes
867 */
868 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
869 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
870 /* hold loopback dev/idev if we haven't done so. */
871 if (dev != &loopback_dev) {
872 if (dev) {
873 dev_put(dev);
874 in6_dev_put(idev);
875 }
876 dev = &loopback_dev;
877 dev_hold(dev);
878 idev = in6_dev_get(dev);
879 if (!idev) {
880 err = -ENODEV;
881 goto out;
882 }
883 }
884 rt->u.dst.output = ip6_pkt_discard_out;
885 rt->u.dst.input = ip6_pkt_discard;
886 rt->u.dst.error = -ENETUNREACH;
887 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
888 goto install_route;
889 }
890
891 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
892 struct in6_addr *gw_addr;
893 int gwa_type;
894
895 gw_addr = &rtmsg->rtmsg_gateway;
896 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
897 gwa_type = ipv6_addr_type(gw_addr);
898
899 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
900 struct rt6_info *grt;
901
902 /* IPv6 strictly inhibits using not link-local
903 addresses as nexthop address.
904 Otherwise, router will not able to send redirects.
905 It is very good, but in some (rare!) circumstances
906 (SIT, PtP, NBMA NOARP links) it is handy to allow
907 some exceptions. --ANK
908 */
909 err = -EINVAL;
910 if (!(gwa_type&IPV6_ADDR_UNICAST))
911 goto out;
912
913 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
914
915 err = -EHOSTUNREACH;
916 if (grt == NULL)
917 goto out;
918 if (dev) {
919 if (dev != grt->rt6i_dev) {
920 dst_release(&grt->u.dst);
921 goto out;
922 }
923 } else {
924 dev = grt->rt6i_dev;
925 idev = grt->rt6i_idev;
926 dev_hold(dev);
927 in6_dev_hold(grt->rt6i_idev);
928 }
929 if (!(grt->rt6i_flags&RTF_GATEWAY))
930 err = 0;
931 dst_release(&grt->u.dst);
932
933 if (err)
934 goto out;
935 }
936 err = -EINVAL;
937 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
938 goto out;
939 }
940
941 err = -ENODEV;
942 if (dev == NULL)
943 goto out;
944
945 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
946 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
947 if (IS_ERR(rt->rt6i_nexthop)) {
948 err = PTR_ERR(rt->rt6i_nexthop);
949 rt->rt6i_nexthop = NULL;
950 goto out;
951 }
952 }
953
954 rt->rt6i_flags = rtmsg->rtmsg_flags;
955
956install_route:
957 if (rta && rta[RTA_METRICS-1]) {
958 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
959 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
960
961 while (RTA_OK(attr, attrlen)) {
962 unsigned flavor = attr->rta_type;
963 if (flavor) {
964 if (flavor > RTAX_MAX) {
965 err = -EINVAL;
966 goto out;
967 }
968 rt->u.dst.metrics[flavor-1] =
969 *(u32 *)RTA_DATA(attr);
970 }
971 attr = RTA_NEXT(attr, attrlen);
972 }
973 }
974
975 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
976 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
977 if (!rt->u.dst.metrics[RTAX_MTU-1])
978 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
979 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
980 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
981 rt->u.dst.dev = dev;
982 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700983 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
985out:
986 if (dev)
987 dev_put(dev);
988 if (idev)
989 in6_dev_put(idev);
990 if (rt)
991 dst_free((struct dst_entry *) rt);
992 return err;
993}
994
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700995int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996{
997 int err;
998
999 write_lock_bh(&rt6_lock);
1000
1001 rt6_reset_dflt_pointer(NULL);
1002
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001003 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 dst_release(&rt->u.dst);
1005
1006 write_unlock_bh(&rt6_lock);
1007
1008 return err;
1009}
1010
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001011static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012{
1013 struct fib6_node *fn;
1014 struct rt6_info *rt;
1015 int err = -ESRCH;
1016
1017 read_lock_bh(&rt6_lock);
1018
1019 fn = fib6_locate(&ip6_routing_table,
1020 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1021 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1022
1023 if (fn) {
1024 for (rt = fn->leaf; rt; rt = rt->u.next) {
1025 if (rtmsg->rtmsg_ifindex &&
1026 (rt->rt6i_dev == NULL ||
1027 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1028 continue;
1029 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1030 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1031 continue;
1032 if (rtmsg->rtmsg_metric &&
1033 rtmsg->rtmsg_metric != rt->rt6i_metric)
1034 continue;
1035 dst_hold(&rt->u.dst);
1036 read_unlock_bh(&rt6_lock);
1037
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001038 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 }
1040 }
1041 read_unlock_bh(&rt6_lock);
1042
1043 return err;
1044}
1045
1046/*
1047 * Handle redirects
1048 */
1049void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1050 struct neighbour *neigh, u8 *lladdr, int on_link)
1051{
1052 struct rt6_info *rt, *nrt;
1053
1054 /* Locate old route to this destination. */
1055 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1056
1057 if (rt == NULL)
1058 return;
1059
1060 if (neigh->dev != rt->rt6i_dev)
1061 goto out;
1062
1063 /*
1064 * Current route is on-link; redirect is always invalid.
1065 *
1066 * Seems, previous statement is not true. It could
1067 * be node, which looks for us as on-link (f.e. proxy ndisc)
1068 * But then router serving it might decide, that we should
1069 * know truth 8)8) --ANK (980726).
1070 */
1071 if (!(rt->rt6i_flags&RTF_GATEWAY))
1072 goto out;
1073
1074 /*
1075 * RFC 2461 specifies that redirects should only be
1076 * accepted if they come from the nexthop to the target.
1077 * Due to the way default routers are chosen, this notion
1078 * is a bit fuzzy and one might need to check all default
1079 * routers.
1080 */
1081 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1082 if (rt->rt6i_flags & RTF_DEFAULT) {
1083 struct rt6_info *rt1;
1084
1085 read_lock(&rt6_lock);
1086 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1087 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1088 dst_hold(&rt1->u.dst);
1089 dst_release(&rt->u.dst);
1090 read_unlock(&rt6_lock);
1091 rt = rt1;
1092 goto source_ok;
1093 }
1094 }
1095 read_unlock(&rt6_lock);
1096 }
1097 if (net_ratelimit())
1098 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1099 "for redirect target\n");
1100 goto out;
1101 }
1102
1103source_ok:
1104
1105 /*
1106 * We have finally decided to accept it.
1107 */
1108
1109 neigh_update(neigh, lladdr, NUD_STALE,
1110 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1111 NEIGH_UPDATE_F_OVERRIDE|
1112 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1113 NEIGH_UPDATE_F_ISROUTER))
1114 );
1115
1116 /*
1117 * Redirect received -> path was valid.
1118 * Look, redirects are sent only in response to data packets,
1119 * so that this nexthop apparently is reachable. --ANK
1120 */
1121 dst_confirm(&rt->u.dst);
1122
1123 /* Duplicate redirect: silently ignore. */
1124 if (neigh == rt->u.dst.neighbour)
1125 goto out;
1126
1127 nrt = ip6_rt_copy(rt);
1128 if (nrt == NULL)
1129 goto out;
1130
1131 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1132 if (on_link)
1133 nrt->rt6i_flags &= ~RTF_GATEWAY;
1134
1135 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1136 nrt->rt6i_dst.plen = 128;
1137 nrt->u.dst.flags |= DST_HOST;
1138
1139 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1140 nrt->rt6i_nexthop = neigh_clone(neigh);
1141 /* Reset pmtu, it may be better */
1142 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1143 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1144
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001145 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 goto out;
1147
1148 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001149 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 return;
1151 }
1152
1153out:
1154 dst_release(&rt->u.dst);
1155 return;
1156}
1157
1158/*
1159 * Handle ICMP "packet too big" messages
1160 * i.e. Path MTU discovery
1161 */
1162
1163void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1164 struct net_device *dev, u32 pmtu)
1165{
1166 struct rt6_info *rt, *nrt;
1167 int allfrag = 0;
1168
1169 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1170 if (rt == NULL)
1171 return;
1172
1173 if (pmtu >= dst_mtu(&rt->u.dst))
1174 goto out;
1175
1176 if (pmtu < IPV6_MIN_MTU) {
1177 /*
1178 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1179 * MTU (1280) and a fragment header should always be included
1180 * after a node receiving Too Big message reporting PMTU is
1181 * less than the IPv6 Minimum Link MTU.
1182 */
1183 pmtu = IPV6_MIN_MTU;
1184 allfrag = 1;
1185 }
1186
1187 /* New mtu received -> path was valid.
1188 They are sent only in response to data packets,
1189 so that this nexthop apparently is reachable. --ANK
1190 */
1191 dst_confirm(&rt->u.dst);
1192
1193 /* Host route. If it is static, it would be better
1194 not to override it, but add new one, so that
1195 when cache entry will expire old pmtu
1196 would return automatically.
1197 */
1198 if (rt->rt6i_flags & RTF_CACHE) {
1199 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1200 if (allfrag)
1201 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1202 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1203 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1204 goto out;
1205 }
1206
1207 /* Network route.
1208 Two cases are possible:
1209 1. It is connected route. Action: COW
1210 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1211 */
1212 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001213 nrt = rt6_cow(rt, daddr, saddr, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 if (!nrt->u.dst.error) {
1215 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1216 if (allfrag)
1217 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1218 /* According to RFC 1981, detecting PMTU increase shouldn't be
1219 happened within 5 mins, the recommended timer is 10 mins.
1220 Here this route expiration time is set to ip6_rt_mtu_expires
1221 which is 10 mins. After 10 mins the decreased pmtu is expired
1222 and detecting PMTU increase will be automatically happened.
1223 */
1224 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1225 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1226 }
1227 dst_release(&nrt->u.dst);
1228 } else {
1229 nrt = ip6_rt_copy(rt);
1230 if (nrt == NULL)
1231 goto out;
1232 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1233 nrt->rt6i_dst.plen = 128;
1234 nrt->u.dst.flags |= DST_HOST;
1235 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1236 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1237 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1238 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1239 if (allfrag)
1240 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001241 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 }
1243
1244out:
1245 dst_release(&rt->u.dst);
1246}
1247
1248/*
1249 * Misc support functions
1250 */
1251
1252static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1253{
1254 struct rt6_info *rt = ip6_dst_alloc();
1255
1256 if (rt) {
1257 rt->u.dst.input = ort->u.dst.input;
1258 rt->u.dst.output = ort->u.dst.output;
1259
1260 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1261 rt->u.dst.dev = ort->u.dst.dev;
1262 if (rt->u.dst.dev)
1263 dev_hold(rt->u.dst.dev);
1264 rt->rt6i_idev = ort->rt6i_idev;
1265 if (rt->rt6i_idev)
1266 in6_dev_hold(rt->rt6i_idev);
1267 rt->u.dst.lastuse = jiffies;
1268 rt->rt6i_expires = 0;
1269
1270 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1271 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1272 rt->rt6i_metric = 0;
1273
1274 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1275#ifdef CONFIG_IPV6_SUBTREES
1276 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1277#endif
1278 }
1279 return rt;
1280}
1281
1282struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1283{
1284 struct rt6_info *rt;
1285 struct fib6_node *fn;
1286
1287 fn = &ip6_routing_table;
1288
1289 write_lock_bh(&rt6_lock);
1290 for (rt = fn->leaf; rt; rt=rt->u.next) {
1291 if (dev == rt->rt6i_dev &&
1292 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1293 break;
1294 }
1295 if (rt)
1296 dst_hold(&rt->u.dst);
1297 write_unlock_bh(&rt6_lock);
1298 return rt;
1299}
1300
1301struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1302 struct net_device *dev)
1303{
1304 struct in6_rtmsg rtmsg;
1305
1306 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1307 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1308 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1309 rtmsg.rtmsg_metric = 1024;
1310 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1311
1312 rtmsg.rtmsg_ifindex = dev->ifindex;
1313
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001314 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 return rt6_get_dflt_router(gwaddr, dev);
1316}
1317
1318void rt6_purge_dflt_routers(void)
1319{
1320 struct rt6_info *rt;
1321
1322restart:
1323 read_lock_bh(&rt6_lock);
1324 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1325 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1326 dst_hold(&rt->u.dst);
1327
1328 rt6_reset_dflt_pointer(NULL);
1329
1330 read_unlock_bh(&rt6_lock);
1331
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001332 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
1334 goto restart;
1335 }
1336 }
1337 read_unlock_bh(&rt6_lock);
1338}
1339
1340int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1341{
1342 struct in6_rtmsg rtmsg;
1343 int err;
1344
1345 switch(cmd) {
1346 case SIOCADDRT: /* Add a route */
1347 case SIOCDELRT: /* Delete a route */
1348 if (!capable(CAP_NET_ADMIN))
1349 return -EPERM;
1350 err = copy_from_user(&rtmsg, arg,
1351 sizeof(struct in6_rtmsg));
1352 if (err)
1353 return -EFAULT;
1354
1355 rtnl_lock();
1356 switch (cmd) {
1357 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001358 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 break;
1360 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001361 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 break;
1363 default:
1364 err = -EINVAL;
1365 }
1366 rtnl_unlock();
1367
1368 return err;
1369 };
1370
1371 return -EINVAL;
1372}
1373
1374/*
1375 * Drop the packet on the floor
1376 */
1377
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001378static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379{
1380 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1381 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1382 kfree_skb(skb);
1383 return 0;
1384}
1385
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001386static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387{
1388 skb->dev = skb->dst->dev;
1389 return ip6_pkt_discard(skb);
1390}
1391
1392/*
1393 * Allocate a dst for local (unicast / anycast) address.
1394 */
1395
1396struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1397 const struct in6_addr *addr,
1398 int anycast)
1399{
1400 struct rt6_info *rt = ip6_dst_alloc();
1401
1402 if (rt == NULL)
1403 return ERR_PTR(-ENOMEM);
1404
1405 dev_hold(&loopback_dev);
1406 in6_dev_hold(idev);
1407
1408 rt->u.dst.flags = DST_HOST;
1409 rt->u.dst.input = ip6_input;
1410 rt->u.dst.output = ip6_output;
1411 rt->rt6i_dev = &loopback_dev;
1412 rt->rt6i_idev = idev;
1413 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1414 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1415 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1416 rt->u.dst.obsolete = -1;
1417
1418 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001419 if (anycast)
1420 rt->rt6i_flags |= RTF_ANYCAST;
1421 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 rt->rt6i_flags |= RTF_LOCAL;
1423 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1424 if (rt->rt6i_nexthop == NULL) {
1425 dst_free((struct dst_entry *) rt);
1426 return ERR_PTR(-ENOMEM);
1427 }
1428
1429 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1430 rt->rt6i_dst.plen = 128;
1431
1432 atomic_set(&rt->u.dst.__refcnt, 1);
1433
1434 return rt;
1435}
1436
1437static int fib6_ifdown(struct rt6_info *rt, void *arg)
1438{
1439 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1440 rt != &ip6_null_entry) {
1441 RT6_TRACE("deleted by ifdown %p\n", rt);
1442 return -1;
1443 }
1444 return 0;
1445}
1446
1447void rt6_ifdown(struct net_device *dev)
1448{
1449 write_lock_bh(&rt6_lock);
1450 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1451 write_unlock_bh(&rt6_lock);
1452}
1453
1454struct rt6_mtu_change_arg
1455{
1456 struct net_device *dev;
1457 unsigned mtu;
1458};
1459
1460static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1461{
1462 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1463 struct inet6_dev *idev;
1464
1465 /* In IPv6 pmtu discovery is not optional,
1466 so that RTAX_MTU lock cannot disable it.
1467 We still use this lock to block changes
1468 caused by addrconf/ndisc.
1469 */
1470
1471 idev = __in6_dev_get(arg->dev);
1472 if (idev == NULL)
1473 return 0;
1474
1475 /* For administrative MTU increase, there is no way to discover
1476 IPv6 PMTU increase, so PMTU increase should be updated here.
1477 Since RFC 1981 doesn't include administrative MTU increase
1478 update PMTU increase is a MUST. (i.e. jumbo frame)
1479 */
1480 /*
1481 If new MTU is less than route PMTU, this new MTU will be the
1482 lowest MTU in the path, update the route PMTU to reflect PMTU
1483 decreases; if new MTU is greater than route PMTU, and the
1484 old MTU is the lowest MTU in the path, update the route PMTU
1485 to reflect the increase. In this case if the other nodes' MTU
1486 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1487 PMTU discouvery.
1488 */
1489 if (rt->rt6i_dev == arg->dev &&
1490 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1491 (dst_mtu(&rt->u.dst) > arg->mtu ||
1492 (dst_mtu(&rt->u.dst) < arg->mtu &&
1493 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1494 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1495 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1496 return 0;
1497}
1498
1499void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1500{
1501 struct rt6_mtu_change_arg arg;
1502
1503 arg.dev = dev;
1504 arg.mtu = mtu;
1505 read_lock_bh(&rt6_lock);
1506 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1507 read_unlock_bh(&rt6_lock);
1508}
1509
1510static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1511 struct in6_rtmsg *rtmsg)
1512{
1513 memset(rtmsg, 0, sizeof(*rtmsg));
1514
1515 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1516 rtmsg->rtmsg_src_len = r->rtm_src_len;
1517 rtmsg->rtmsg_flags = RTF_UP;
1518 if (r->rtm_type == RTN_UNREACHABLE)
1519 rtmsg->rtmsg_flags |= RTF_REJECT;
1520
1521 if (rta[RTA_GATEWAY-1]) {
1522 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1523 return -EINVAL;
1524 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1525 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1526 }
1527 if (rta[RTA_DST-1]) {
1528 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1529 return -EINVAL;
1530 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1531 }
1532 if (rta[RTA_SRC-1]) {
1533 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1534 return -EINVAL;
1535 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1536 }
1537 if (rta[RTA_OIF-1]) {
1538 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1539 return -EINVAL;
1540 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1541 }
1542 if (rta[RTA_PRIORITY-1]) {
1543 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1544 return -EINVAL;
1545 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1546 }
1547 return 0;
1548}
1549
1550int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1551{
1552 struct rtmsg *r = NLMSG_DATA(nlh);
1553 struct in6_rtmsg rtmsg;
1554
1555 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1556 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001557 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558}
1559
1560int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1561{
1562 struct rtmsg *r = NLMSG_DATA(nlh);
1563 struct in6_rtmsg rtmsg;
1564
1565 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1566 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001567 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568}
1569
1570struct rt6_rtnl_dump_arg
1571{
1572 struct sk_buff *skb;
1573 struct netlink_callback *cb;
1574};
1575
1576static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001577 struct in6_addr *dst, struct in6_addr *src,
1578 int iif, int type, u32 pid, u32 seq,
1579 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580{
1581 struct rtmsg *rtm;
1582 struct nlmsghdr *nlh;
1583 unsigned char *b = skb->tail;
1584 struct rta_cacheinfo ci;
1585
1586 if (prefix) { /* user wants prefix routes only */
1587 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1588 /* success since this is not a prefix route */
1589 return 1;
1590 }
1591 }
1592
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001593 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 rtm = NLMSG_DATA(nlh);
1595 rtm->rtm_family = AF_INET6;
1596 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1597 rtm->rtm_src_len = rt->rt6i_src.plen;
1598 rtm->rtm_tos = 0;
1599 rtm->rtm_table = RT_TABLE_MAIN;
1600 if (rt->rt6i_flags&RTF_REJECT)
1601 rtm->rtm_type = RTN_UNREACHABLE;
1602 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1603 rtm->rtm_type = RTN_LOCAL;
1604 else
1605 rtm->rtm_type = RTN_UNICAST;
1606 rtm->rtm_flags = 0;
1607 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1608 rtm->rtm_protocol = rt->rt6i_protocol;
1609 if (rt->rt6i_flags&RTF_DYNAMIC)
1610 rtm->rtm_protocol = RTPROT_REDIRECT;
1611 else if (rt->rt6i_flags & RTF_ADDRCONF)
1612 rtm->rtm_protocol = RTPROT_KERNEL;
1613 else if (rt->rt6i_flags&RTF_DEFAULT)
1614 rtm->rtm_protocol = RTPROT_RA;
1615
1616 if (rt->rt6i_flags&RTF_CACHE)
1617 rtm->rtm_flags |= RTM_F_CLONED;
1618
1619 if (dst) {
1620 RTA_PUT(skb, RTA_DST, 16, dst);
1621 rtm->rtm_dst_len = 128;
1622 } else if (rtm->rtm_dst_len)
1623 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1624#ifdef CONFIG_IPV6_SUBTREES
1625 if (src) {
1626 RTA_PUT(skb, RTA_SRC, 16, src);
1627 rtm->rtm_src_len = 128;
1628 } else if (rtm->rtm_src_len)
1629 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1630#endif
1631 if (iif)
1632 RTA_PUT(skb, RTA_IIF, 4, &iif);
1633 else if (dst) {
1634 struct in6_addr saddr_buf;
1635 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1636 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1637 }
1638 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1639 goto rtattr_failure;
1640 if (rt->u.dst.neighbour)
1641 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1642 if (rt->u.dst.dev)
1643 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1644 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1645 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1646 if (rt->rt6i_expires)
1647 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1648 else
1649 ci.rta_expires = 0;
1650 ci.rta_used = rt->u.dst.__use;
1651 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1652 ci.rta_error = rt->u.dst.error;
1653 ci.rta_id = 0;
1654 ci.rta_ts = 0;
1655 ci.rta_tsage = 0;
1656 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1657 nlh->nlmsg_len = skb->tail - b;
1658 return skb->len;
1659
1660nlmsg_failure:
1661rtattr_failure:
1662 skb_trim(skb, b - skb->data);
1663 return -1;
1664}
1665
1666static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1667{
1668 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1669 int prefix;
1670
1671 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1672 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1673 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1674 } else
1675 prefix = 0;
1676
1677 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1678 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001679 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680}
1681
1682static int fib6_dump_node(struct fib6_walker_t *w)
1683{
1684 int res;
1685 struct rt6_info *rt;
1686
1687 for (rt = w->leaf; rt; rt = rt->u.next) {
1688 res = rt6_dump_route(rt, w->args);
1689 if (res < 0) {
1690 /* Frame is full, suspend walking */
1691 w->leaf = rt;
1692 return 1;
1693 }
1694 BUG_TRAP(res!=0);
1695 }
1696 w->leaf = NULL;
1697 return 0;
1698}
1699
1700static void fib6_dump_end(struct netlink_callback *cb)
1701{
1702 struct fib6_walker_t *w = (void*)cb->args[0];
1703
1704 if (w) {
1705 cb->args[0] = 0;
1706 fib6_walker_unlink(w);
1707 kfree(w);
1708 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001709 cb->done = (void*)cb->args[1];
1710 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711}
1712
1713static int fib6_dump_done(struct netlink_callback *cb)
1714{
1715 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001716 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717}
1718
1719int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1720{
1721 struct rt6_rtnl_dump_arg arg;
1722 struct fib6_walker_t *w;
1723 int res;
1724
1725 arg.skb = skb;
1726 arg.cb = cb;
1727
1728 w = (void*)cb->args[0];
1729 if (w == NULL) {
1730 /* New dump:
1731 *
1732 * 1. hook callback destructor.
1733 */
1734 cb->args[1] = (long)cb->done;
1735 cb->done = fib6_dump_done;
1736
1737 /*
1738 * 2. allocate and initialize walker.
1739 */
David S. Miller9e147a12005-11-17 16:52:51 -08001740 w = kmalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 if (w == NULL)
1742 return -ENOMEM;
1743 RT6_TRACE("dump<%p", w);
1744 memset(w, 0, sizeof(*w));
1745 w->root = &ip6_routing_table;
1746 w->func = fib6_dump_node;
1747 w->args = &arg;
1748 cb->args[0] = (long)w;
1749 read_lock_bh(&rt6_lock);
1750 res = fib6_walk(w);
1751 read_unlock_bh(&rt6_lock);
1752 } else {
1753 w->args = &arg;
1754 read_lock_bh(&rt6_lock);
1755 res = fib6_walk_continue(w);
1756 read_unlock_bh(&rt6_lock);
1757 }
1758#if RT6_DEBUG >= 3
1759 if (res <= 0 && skb->len == 0)
1760 RT6_TRACE("%p>dump end\n", w);
1761#endif
1762 res = res < 0 ? res : skb->len;
1763 /* res < 0 is an error. (really, impossible)
1764 res == 0 means that dump is complete, but skb still can contain data.
1765 res > 0 dump is not complete, but frame is full.
1766 */
1767 /* Destroy walker, if dump of this table is complete. */
1768 if (res <= 0)
1769 fib6_dump_end(cb);
1770 return res;
1771}
1772
1773int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1774{
1775 struct rtattr **rta = arg;
1776 int iif = 0;
1777 int err = -ENOBUFS;
1778 struct sk_buff *skb;
1779 struct flowi fl;
1780 struct rt6_info *rt;
1781
1782 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1783 if (skb == NULL)
1784 goto out;
1785
1786 /* Reserve room for dummy headers, this skb can pass
1787 through good chunk of routing engine.
1788 */
1789 skb->mac.raw = skb->data;
1790 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1791
1792 memset(&fl, 0, sizeof(fl));
1793 if (rta[RTA_SRC-1])
1794 ipv6_addr_copy(&fl.fl6_src,
1795 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1796 if (rta[RTA_DST-1])
1797 ipv6_addr_copy(&fl.fl6_dst,
1798 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1799
1800 if (rta[RTA_IIF-1])
1801 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1802
1803 if (iif) {
1804 struct net_device *dev;
1805 dev = __dev_get_by_index(iif);
1806 if (!dev) {
1807 err = -ENODEV;
1808 goto out_free;
1809 }
1810 }
1811
1812 fl.oif = 0;
1813 if (rta[RTA_OIF-1])
1814 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1815
1816 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1817
1818 skb->dst = &rt->u.dst;
1819
1820 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821 err = rt6_fill_node(skb, rt,
1822 &fl.fl6_dst, &fl.fl6_src,
1823 iif,
1824 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001825 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 if (err < 0) {
1827 err = -EMSGSIZE;
1828 goto out_free;
1829 }
1830
1831 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1832 if (err > 0)
1833 err = 0;
1834out:
1835 return err;
1836out_free:
1837 kfree_skb(skb);
1838 goto out;
1839}
1840
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001841void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1842 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843{
1844 struct sk_buff *skb;
1845 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001846 u32 pid = current->pid;
1847 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001849 if (req)
1850 pid = req->pid;
1851 if (nlh)
1852 seq = nlh->nlmsg_seq;
1853
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 skb = alloc_skb(size, gfp_any());
1855 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001856 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 return;
1858 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001859 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07001861 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 return;
1863 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07001864 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1865 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866}
1867
1868/*
1869 * /proc
1870 */
1871
1872#ifdef CONFIG_PROC_FS
1873
1874#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1875
1876struct rt6_proc_arg
1877{
1878 char *buffer;
1879 int offset;
1880 int length;
1881 int skip;
1882 int len;
1883};
1884
1885static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1886{
1887 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1888 int i;
1889
1890 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1891 arg->skip++;
1892 return 0;
1893 }
1894
1895 if (arg->len >= arg->length)
1896 return 0;
1897
1898 for (i=0; i<16; i++) {
1899 sprintf(arg->buffer + arg->len, "%02x",
1900 rt->rt6i_dst.addr.s6_addr[i]);
1901 arg->len += 2;
1902 }
1903 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1904 rt->rt6i_dst.plen);
1905
1906#ifdef CONFIG_IPV6_SUBTREES
1907 for (i=0; i<16; i++) {
1908 sprintf(arg->buffer + arg->len, "%02x",
1909 rt->rt6i_src.addr.s6_addr[i]);
1910 arg->len += 2;
1911 }
1912 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1913 rt->rt6i_src.plen);
1914#else
1915 sprintf(arg->buffer + arg->len,
1916 "00000000000000000000000000000000 00 ");
1917 arg->len += 36;
1918#endif
1919
1920 if (rt->rt6i_nexthop) {
1921 for (i=0; i<16; i++) {
1922 sprintf(arg->buffer + arg->len, "%02x",
1923 rt->rt6i_nexthop->primary_key[i]);
1924 arg->len += 2;
1925 }
1926 } else {
1927 sprintf(arg->buffer + arg->len,
1928 "00000000000000000000000000000000");
1929 arg->len += 32;
1930 }
1931 arg->len += sprintf(arg->buffer + arg->len,
1932 " %08x %08x %08x %08x %8s\n",
1933 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1934 rt->u.dst.__use, rt->rt6i_flags,
1935 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1936 return 0;
1937}
1938
1939static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1940{
1941 struct rt6_proc_arg arg;
1942 arg.buffer = buffer;
1943 arg.offset = offset;
1944 arg.length = length;
1945 arg.skip = 0;
1946 arg.len = 0;
1947
1948 read_lock_bh(&rt6_lock);
1949 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1950 read_unlock_bh(&rt6_lock);
1951
1952 *start = buffer;
1953 if (offset)
1954 *start += offset % RT6_INFO_LEN;
1955
1956 arg.len -= offset % RT6_INFO_LEN;
1957
1958 if (arg.len > length)
1959 arg.len = length;
1960 if (arg.len < 0)
1961 arg.len = 0;
1962
1963 return arg.len;
1964}
1965
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1967{
1968 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1969 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1970 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1971 rt6_stats.fib_rt_cache,
1972 atomic_read(&ip6_dst_ops.entries),
1973 rt6_stats.fib_discarded_routes);
1974
1975 return 0;
1976}
1977
1978static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1979{
1980 return single_open(file, rt6_stats_seq_show, NULL);
1981}
1982
1983static struct file_operations rt6_stats_seq_fops = {
1984 .owner = THIS_MODULE,
1985 .open = rt6_stats_seq_open,
1986 .read = seq_read,
1987 .llseek = seq_lseek,
1988 .release = single_release,
1989};
1990#endif /* CONFIG_PROC_FS */
1991
1992#ifdef CONFIG_SYSCTL
1993
1994static int flush_delay;
1995
1996static
1997int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1998 void __user *buffer, size_t *lenp, loff_t *ppos)
1999{
2000 if (write) {
2001 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2002 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2003 return 0;
2004 } else
2005 return -EINVAL;
2006}
2007
2008ctl_table ipv6_route_table[] = {
2009 {
2010 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2011 .procname = "flush",
2012 .data = &flush_delay,
2013 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002014 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015 .proc_handler = &ipv6_sysctl_rtcache_flush
2016 },
2017 {
2018 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2019 .procname = "gc_thresh",
2020 .data = &ip6_dst_ops.gc_thresh,
2021 .maxlen = sizeof(int),
2022 .mode = 0644,
2023 .proc_handler = &proc_dointvec,
2024 },
2025 {
2026 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2027 .procname = "max_size",
2028 .data = &ip6_rt_max_size,
2029 .maxlen = sizeof(int),
2030 .mode = 0644,
2031 .proc_handler = &proc_dointvec,
2032 },
2033 {
2034 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2035 .procname = "gc_min_interval",
2036 .data = &ip6_rt_gc_min_interval,
2037 .maxlen = sizeof(int),
2038 .mode = 0644,
2039 .proc_handler = &proc_dointvec_jiffies,
2040 .strategy = &sysctl_jiffies,
2041 },
2042 {
2043 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2044 .procname = "gc_timeout",
2045 .data = &ip6_rt_gc_timeout,
2046 .maxlen = sizeof(int),
2047 .mode = 0644,
2048 .proc_handler = &proc_dointvec_jiffies,
2049 .strategy = &sysctl_jiffies,
2050 },
2051 {
2052 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2053 .procname = "gc_interval",
2054 .data = &ip6_rt_gc_interval,
2055 .maxlen = sizeof(int),
2056 .mode = 0644,
2057 .proc_handler = &proc_dointvec_jiffies,
2058 .strategy = &sysctl_jiffies,
2059 },
2060 {
2061 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2062 .procname = "gc_elasticity",
2063 .data = &ip6_rt_gc_elasticity,
2064 .maxlen = sizeof(int),
2065 .mode = 0644,
2066 .proc_handler = &proc_dointvec_jiffies,
2067 .strategy = &sysctl_jiffies,
2068 },
2069 {
2070 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2071 .procname = "mtu_expires",
2072 .data = &ip6_rt_mtu_expires,
2073 .maxlen = sizeof(int),
2074 .mode = 0644,
2075 .proc_handler = &proc_dointvec_jiffies,
2076 .strategy = &sysctl_jiffies,
2077 },
2078 {
2079 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2080 .procname = "min_adv_mss",
2081 .data = &ip6_rt_min_advmss,
2082 .maxlen = sizeof(int),
2083 .mode = 0644,
2084 .proc_handler = &proc_dointvec_jiffies,
2085 .strategy = &sysctl_jiffies,
2086 },
2087 {
2088 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2089 .procname = "gc_min_interval_ms",
2090 .data = &ip6_rt_gc_min_interval,
2091 .maxlen = sizeof(int),
2092 .mode = 0644,
2093 .proc_handler = &proc_dointvec_ms_jiffies,
2094 .strategy = &sysctl_ms_jiffies,
2095 },
2096 { .ctl_name = 0 }
2097};
2098
2099#endif
2100
2101void __init ip6_route_init(void)
2102{
2103 struct proc_dir_entry *p;
2104
2105 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2106 sizeof(struct rt6_info),
2107 0, SLAB_HWCACHE_ALIGN,
2108 NULL, NULL);
2109 if (!ip6_dst_ops.kmem_cachep)
2110 panic("cannot create ip6_dst_cache");
2111
2112 fib6_init();
2113#ifdef CONFIG_PROC_FS
2114 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2115 if (p)
2116 p->owner = THIS_MODULE;
2117
2118 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2119#endif
2120#ifdef CONFIG_XFRM
2121 xfrm6_init();
2122#endif
2123}
2124
2125void ip6_route_cleanup(void)
2126{
2127#ifdef CONFIG_PROC_FS
2128 proc_net_remove("ipv6_route");
2129 proc_net_remove("rt6_stats");
2130#endif
2131#ifdef CONFIG_XFRM
2132 xfrm6_fini();
2133#endif
2134 rt6_ifdown(NULL);
2135 fib6_gc_cleanup();
2136 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2137}