blob: 5b58a516bb162dbe89a5ea83e593444502b7a63b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
75
76static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2;
78static int ip6_rt_gc_timeout = 60*HZ;
79int ip6_rt_gc_interval = 30*HZ;
80static int ip6_rt_gc_elasticity = 9;
81static int ip6_rt_mtu_expires = 10*60*HZ;
82static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90static int ip6_dst_gc(void);
91
92static int ip6_pkt_discard(struct sk_buff *skb);
93static int ip6_pkt_discard_out(struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97static struct dst_ops ip6_dst_ops = {
98 .family = AF_INET6,
99 .protocol = __constant_htons(ETH_P_IPV6),
100 .gc = ip6_dst_gc,
101 .gc_thresh = 1024,
102 .check = ip6_dst_check,
103 .destroy = ip6_dst_destroy,
104 .ifdown = ip6_dst_ifdown,
105 .negative_advice = ip6_negative_advice,
106 .link_failure = ip6_link_failure,
107 .update_pmtu = ip6_rt_update_pmtu,
108 .entry_size = sizeof(struct rt6_info),
109};
110
111struct rt6_info ip6_null_entry = {
112 .u = {
113 .dst = {
114 .__refcnt = ATOMIC_INIT(1),
115 .__use = 1,
116 .dev = &loopback_dev,
117 .obsolete = -1,
118 .error = -ENETUNREACH,
119 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
120 .input = ip6_pkt_discard,
121 .output = ip6_pkt_discard_out,
122 .ops = &ip6_dst_ops,
123 .path = (struct dst_entry*)&ip6_null_entry,
124 }
125 },
126 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
127 .rt6i_metric = ~(u32) 0,
128 .rt6i_ref = ATOMIC_INIT(1),
129};
130
131struct fib6_node ip6_routing_table = {
132 .leaf = &ip6_null_entry,
133 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134};
135
136/* Protects all the ip6 fib */
137
138DEFINE_RWLOCK(rt6_lock);
139
140
141/* allocate dst with ip6_dst_ops */
142static __inline__ struct rt6_info *ip6_dst_alloc(void)
143{
144 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145}
146
147static void ip6_dst_destroy(struct dst_entry *dst)
148{
149 struct rt6_info *rt = (struct rt6_info *)dst;
150 struct inet6_dev *idev = rt->rt6i_idev;
151
152 if (idev != NULL) {
153 rt->rt6i_idev = NULL;
154 in6_dev_put(idev);
155 }
156}
157
158static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159 int how)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 if (loopback_idev != NULL) {
167 rt->rt6i_idev = loopback_idev;
168 in6_dev_put(idev);
169 }
170 }
171}
172
173static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174{
175 return (rt->rt6i_flags & RTF_EXPIRES &&
176 time_after(jiffies, rt->rt6i_expires));
177}
178
179/*
180 * Route lookup. Any rt6_lock is implied.
181 */
182
183static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184 int oif,
185 int strict)
186{
187 struct rt6_info *local = NULL;
188 struct rt6_info *sprt;
189
190 if (oif) {
191 for (sprt = rt; sprt; sprt = sprt->u.next) {
192 struct net_device *dev = sprt->rt6i_dev;
193 if (dev->ifindex == oif)
194 return sprt;
195 if (dev->flags & IFF_LOOPBACK) {
196 if (sprt->rt6i_idev == NULL ||
197 sprt->rt6i_idev->dev->ifindex != oif) {
198 if (strict && oif)
199 continue;
200 if (local && (!oif ||
201 local->rt6i_idev->dev->ifindex == oif))
202 continue;
203 }
204 local = sprt;
205 }
206 }
207
208 if (local)
209 return local;
210
211 if (strict)
212 return &ip6_null_entry;
213 }
214 return rt;
215}
216
217/*
218 * pointer to the last default router chosen. BH is disabled locally.
219 */
220static struct rt6_info *rt6_dflt_pointer;
221static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223void rt6_reset_dflt_pointer(struct rt6_info *rt)
224{
225 spin_lock_bh(&rt6_dflt_lock);
226 if (rt == NULL || rt == rt6_dflt_pointer) {
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 rt6_dflt_pointer = NULL;
229 }
230 spin_unlock_bh(&rt6_dflt_lock);
231}
232
233/* Default Router Selection (RFC 2461 6.3.6) */
234static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235{
236 struct rt6_info *match = NULL;
237 struct rt6_info *sprt;
238 int mpri = 0;
239
240 for (sprt = rt; sprt; sprt = sprt->u.next) {
241 struct neighbour *neigh;
242 int m = 0;
243
244 if (!oif ||
245 (sprt->rt6i_dev &&
246 sprt->rt6i_dev->ifindex == oif))
247 m += 8;
248
249 if (rt6_check_expired(sprt))
250 continue;
251
252 if (sprt == rt6_dflt_pointer)
253 m += 4;
254
255 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 read_lock_bh(&neigh->lock);
257 switch (neigh->nud_state) {
258 case NUD_REACHABLE:
259 m += 3;
260 break;
261
262 case NUD_STALE:
263 case NUD_DELAY:
264 case NUD_PROBE:
265 m += 2;
266 break;
267
268 case NUD_NOARP:
269 case NUD_PERMANENT:
270 m += 1;
271 break;
272
273 case NUD_INCOMPLETE:
274 default:
275 read_unlock_bh(&neigh->lock);
276 continue;
277 }
278 read_unlock_bh(&neigh->lock);
279 } else {
280 continue;
281 }
282
283 if (m > mpri || m >= 12) {
284 match = sprt;
285 mpri = m;
286 if (m >= 12) {
287 /* we choose the last default router if it
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
291 */
292 break;
293 }
294 }
295 }
296
297 spin_lock(&rt6_dflt_lock);
298 if (!match) {
299 /*
300 * No default routers are known to be reachable.
301 * SHOULD round robin
302 */
303 if (rt6_dflt_pointer) {
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
309 match = sprt;
310 break;
311 }
312 }
313 for (sprt = rt;
314 !match && sprt;
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
319 match = sprt;
320 break;
321 }
322 if (sprt == rt6_dflt_pointer)
323 break;
324 }
325 }
326 }
327
328 if (match) {
329 if (rt6_dflt_pointer != match)
330 RT6_TRACE("changed default router: %p->%p\n",
331 rt6_dflt_pointer, match);
332 rt6_dflt_pointer = match;
333 }
334 spin_unlock(&rt6_dflt_lock);
335
336 if (!match) {
337 /*
338 * Last Resort: if no default routers found,
339 * use addrconf default route.
340 * We don't record this route.
341 */
342 for (sprt = ip6_routing_table.leaf;
343 sprt; sprt = sprt->u.next) {
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
346 (!oif ||
347 (sprt->rt6i_dev &&
348 sprt->rt6i_dev->ifindex == oif))) {
349 match = sprt;
350 break;
351 }
352 }
353 if (!match) {
354 /* no default route. give up. */
355 match = &ip6_null_entry;
356 }
357 }
358
359 return match;
360}
361
362struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363 int oif, int strict)
364{
365 struct fib6_node *fn;
366 struct rt6_info *rt;
367
368 read_lock_bh(&rt6_lock);
369 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 rt = rt6_device_match(fn->leaf, oif, strict);
371 dst_hold(&rt->u.dst);
372 rt->u.dst.__use++;
373 read_unlock_bh(&rt6_lock);
374
375 rt->u.dst.lastuse = jiffies;
376 if (rt->u.dst.error == 0)
377 return rt;
378 dst_release(&rt->u.dst);
379 return NULL;
380}
381
382/* ip6_ins_rt is called with FREE rt6_lock.
383 It takes new route entry, the addition fails by any reason the
384 route is freed. In any case, if caller does not hold it, it may
385 be destroyed.
386 */
387
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700388int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390{
391 int err;
392
393 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700394 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 write_unlock_bh(&rt6_lock);
396
397 return err;
398}
399
400/* No rt6_lock! If COW failed, the function returns dead route entry
401 with dst->error set to errno value.
402 */
403
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800404static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
405 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 struct rt6_info *rt;
408
409 /*
410 * Clone the route.
411 */
412
413 rt = ip6_rt_copy(ort);
414
415 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900416 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417 if (rt->rt6i_dst.plen != 128 &&
418 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900421 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900423 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 rt->rt6i_dst.plen = 128;
425 rt->rt6i_flags |= RTF_CACHE;
426 rt->u.dst.flags |= DST_HOST;
427
428#ifdef CONFIG_IPV6_SUBTREES
429 if (rt->rt6i_src.plen && saddr) {
430 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431 rt->rt6i_src.plen = 128;
432 }
433#endif
434
435 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800437 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800439 return rt;
440}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800442static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
443 struct in6_addr *saddr, struct netlink_skb_parms *req)
444{
445 struct rt6_info *rt = rt6_alloc_cow(ort, daddr, saddr);
446 int err;
447
448 if (!rt) {
449 dst_hold(&ip6_null_entry.u.dst);
450 return &ip6_null_entry;
451 }
452
453 dst_hold(&rt->u.dst);
454
455 err = ip6_ins_rt(rt, NULL, NULL, req);
456 if (err)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457 rt->u.dst.error = err;
458
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800459 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460}
461
462#define BACKTRACK() \
463if (rt == &ip6_null_entry && strict) { \
464 while ((fn = fn->parent) != NULL) { \
465 if (fn->fn_flags & RTN_ROOT) { \
466 dst_hold(&rt->u.dst); \
467 goto out; \
468 } \
469 if (fn->fn_flags & RTN_RTINFO) \
470 goto restart; \
471 } \
472}
473
474
475void ip6_route_input(struct sk_buff *skb)
476{
477 struct fib6_node *fn;
478 struct rt6_info *rt;
479 int strict;
480 int attempts = 3;
481
482 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
483
484relookup:
485 read_lock_bh(&rt6_lock);
486
487 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
488 &skb->nh.ipv6h->saddr);
489
490restart:
491 rt = fn->leaf;
492
493 if ((rt->rt6i_flags & RTF_CACHE)) {
494 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
495 BACKTRACK();
496 dst_hold(&rt->u.dst);
497 goto out;
498 }
499
Yan Zheng9d17f212005-10-28 15:12:00 -0700500 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501 BACKTRACK();
502
503 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
504 struct rt6_info *nrt;
505 dst_hold(&rt->u.dst);
506 read_unlock_bh(&rt6_lock);
507
508 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700509 &skb->nh.ipv6h->saddr,
510 &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511
512 dst_release(&rt->u.dst);
513 rt = nrt;
514
515 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
516 goto out2;
517
518 /* Race condition! In the gap, when rt6_lock was
519 released someone could insert this route. Relookup.
520 */
521 dst_release(&rt->u.dst);
522 goto relookup;
523 }
524 dst_hold(&rt->u.dst);
525
526out:
527 read_unlock_bh(&rt6_lock);
528out2:
529 rt->u.dst.lastuse = jiffies;
530 rt->u.dst.__use++;
531 skb->dst = (struct dst_entry *) rt;
532}
533
534struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
535{
536 struct fib6_node *fn;
537 struct rt6_info *rt;
538 int strict;
539 int attempts = 3;
540
541 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
542
543relookup:
544 read_lock_bh(&rt6_lock);
545
546 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
547
548restart:
549 rt = fn->leaf;
550
551 if ((rt->rt6i_flags & RTF_CACHE)) {
552 rt = rt6_device_match(rt, fl->oif, strict);
553 BACKTRACK();
554 dst_hold(&rt->u.dst);
555 goto out;
556 }
557 if (rt->rt6i_flags & RTF_DEFAULT) {
558 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
559 rt = rt6_best_dflt(rt, fl->oif);
560 } else {
561 rt = rt6_device_match(rt, fl->oif, strict);
562 BACKTRACK();
563 }
564
565 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
566 struct rt6_info *nrt;
567 dst_hold(&rt->u.dst);
568 read_unlock_bh(&rt6_lock);
569
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700570 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571
572 dst_release(&rt->u.dst);
573 rt = nrt;
574
575 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
576 goto out2;
577
578 /* Race condition! In the gap, when rt6_lock was
579 released someone could insert this route. Relookup.
580 */
581 dst_release(&rt->u.dst);
582 goto relookup;
583 }
584 dst_hold(&rt->u.dst);
585
586out:
587 read_unlock_bh(&rt6_lock);
588out2:
589 rt->u.dst.lastuse = jiffies;
590 rt->u.dst.__use++;
591 return &rt->u.dst;
592}
593
594
595/*
596 * Destination cache support functions
597 */
598
599static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
600{
601 struct rt6_info *rt;
602
603 rt = (struct rt6_info *) dst;
604
605 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
606 return dst;
607
608 return NULL;
609}
610
611static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
612{
613 struct rt6_info *rt = (struct rt6_info *) dst;
614
615 if (rt) {
616 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700617 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 else
619 dst_release(dst);
620 }
621 return NULL;
622}
623
624static void ip6_link_failure(struct sk_buff *skb)
625{
626 struct rt6_info *rt;
627
628 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
629
630 rt = (struct rt6_info *) skb->dst;
631 if (rt) {
632 if (rt->rt6i_flags&RTF_CACHE) {
633 dst_set_expires(&rt->u.dst, 0);
634 rt->rt6i_flags |= RTF_EXPIRES;
635 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
636 rt->rt6i_node->fn_sernum = -1;
637 }
638}
639
640static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
641{
642 struct rt6_info *rt6 = (struct rt6_info*)dst;
643
644 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
645 rt6->rt6i_flags |= RTF_MODIFIED;
646 if (mtu < IPV6_MIN_MTU) {
647 mtu = IPV6_MIN_MTU;
648 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
649 }
650 dst->metrics[RTAX_MTU-1] = mtu;
651 }
652}
653
654/* Protected by rt6_lock. */
655static struct dst_entry *ndisc_dst_gc_list;
656static int ipv6_get_mtu(struct net_device *dev);
657
658static inline unsigned int ipv6_advmss(unsigned int mtu)
659{
660 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
661
662 if (mtu < ip6_rt_min_advmss)
663 mtu = ip6_rt_min_advmss;
664
665 /*
666 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
667 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
668 * IPV6_MAXPLEN is also valid and means: "any MSS,
669 * rely only on pmtu discovery"
670 */
671 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
672 mtu = IPV6_MAXPLEN;
673 return mtu;
674}
675
676struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
677 struct neighbour *neigh,
678 struct in6_addr *addr,
679 int (*output)(struct sk_buff *))
680{
681 struct rt6_info *rt;
682 struct inet6_dev *idev = in6_dev_get(dev);
683
684 if (unlikely(idev == NULL))
685 return NULL;
686
687 rt = ip6_dst_alloc();
688 if (unlikely(rt == NULL)) {
689 in6_dev_put(idev);
690 goto out;
691 }
692
693 dev_hold(dev);
694 if (neigh)
695 neigh_hold(neigh);
696 else
697 neigh = ndisc_get_neigh(dev, addr);
698
699 rt->rt6i_dev = dev;
700 rt->rt6i_idev = idev;
701 rt->rt6i_nexthop = neigh;
702 atomic_set(&rt->u.dst.__refcnt, 1);
703 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
704 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
705 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
706 rt->u.dst.output = output;
707
708#if 0 /* there's no chance to use these for ndisc */
709 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
710 ? DST_HOST
711 : 0;
712 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
713 rt->rt6i_dst.plen = 128;
714#endif
715
716 write_lock_bh(&rt6_lock);
717 rt->u.dst.next = ndisc_dst_gc_list;
718 ndisc_dst_gc_list = &rt->u.dst;
719 write_unlock_bh(&rt6_lock);
720
721 fib6_force_start_gc();
722
723out:
724 return (struct dst_entry *)rt;
725}
726
727int ndisc_dst_gc(int *more)
728{
729 struct dst_entry *dst, *next, **pprev;
730 int freed;
731
732 next = NULL;
733 pprev = &ndisc_dst_gc_list;
734 freed = 0;
735 while ((dst = *pprev) != NULL) {
736 if (!atomic_read(&dst->__refcnt)) {
737 *pprev = dst->next;
738 dst_free(dst);
739 freed++;
740 } else {
741 pprev = &dst->next;
742 (*more)++;
743 }
744 }
745
746 return freed;
747}
748
749static int ip6_dst_gc(void)
750{
751 static unsigned expire = 30*HZ;
752 static unsigned long last_gc;
753 unsigned long now = jiffies;
754
755 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
756 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
757 goto out;
758
759 expire++;
760 fib6_run_gc(expire);
761 last_gc = now;
762 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
763 expire = ip6_rt_gc_timeout>>1;
764
765out:
766 expire -= expire>>ip6_rt_gc_elasticity;
767 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
768}
769
770/* Clean host part of a prefix. Not necessary in radix tree,
771 but results in cleaner routing tables.
772
773 Remove it only when all the things will work!
774 */
775
776static int ipv6_get_mtu(struct net_device *dev)
777{
778 int mtu = IPV6_MIN_MTU;
779 struct inet6_dev *idev;
780
781 idev = in6_dev_get(dev);
782 if (idev) {
783 mtu = idev->cnf.mtu6;
784 in6_dev_put(idev);
785 }
786 return mtu;
787}
788
789int ipv6_get_hoplimit(struct net_device *dev)
790{
791 int hoplimit = ipv6_devconf.hop_limit;
792 struct inet6_dev *idev;
793
794 idev = in6_dev_get(dev);
795 if (idev) {
796 hoplimit = idev->cnf.hop_limit;
797 in6_dev_put(idev);
798 }
799 return hoplimit;
800}
801
802/*
803 *
804 */
805
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700806int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
807 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808{
809 int err;
810 struct rtmsg *r;
811 struct rtattr **rta;
812 struct rt6_info *rt = NULL;
813 struct net_device *dev = NULL;
814 struct inet6_dev *idev = NULL;
815 int addr_type;
816
817 rta = (struct rtattr **) _rtattr;
818
819 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
820 return -EINVAL;
821#ifndef CONFIG_IPV6_SUBTREES
822 if (rtmsg->rtmsg_src_len)
823 return -EINVAL;
824#endif
825 if (rtmsg->rtmsg_ifindex) {
826 err = -ENODEV;
827 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
828 if (!dev)
829 goto out;
830 idev = in6_dev_get(dev);
831 if (!idev)
832 goto out;
833 }
834
835 if (rtmsg->rtmsg_metric == 0)
836 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
837
838 rt = ip6_dst_alloc();
839
840 if (rt == NULL) {
841 err = -ENOMEM;
842 goto out;
843 }
844
845 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -0800846 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 if (nlh && (r = NLMSG_DATA(nlh))) {
848 rt->rt6i_protocol = r->rtm_protocol;
849 } else {
850 rt->rt6i_protocol = RTPROT_BOOT;
851 }
852
853 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
854
855 if (addr_type & IPV6_ADDR_MULTICAST)
856 rt->u.dst.input = ip6_mc_input;
857 else
858 rt->u.dst.input = ip6_forward;
859
860 rt->u.dst.output = ip6_output;
861
862 ipv6_addr_prefix(&rt->rt6i_dst.addr,
863 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
864 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
865 if (rt->rt6i_dst.plen == 128)
866 rt->u.dst.flags = DST_HOST;
867
868#ifdef CONFIG_IPV6_SUBTREES
869 ipv6_addr_prefix(&rt->rt6i_src.addr,
870 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
871 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
872#endif
873
874 rt->rt6i_metric = rtmsg->rtmsg_metric;
875
876 /* We cannot add true routes via loopback here,
877 they would result in kernel looping; promote them to reject routes
878 */
879 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
880 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
881 /* hold loopback dev/idev if we haven't done so. */
882 if (dev != &loopback_dev) {
883 if (dev) {
884 dev_put(dev);
885 in6_dev_put(idev);
886 }
887 dev = &loopback_dev;
888 dev_hold(dev);
889 idev = in6_dev_get(dev);
890 if (!idev) {
891 err = -ENODEV;
892 goto out;
893 }
894 }
895 rt->u.dst.output = ip6_pkt_discard_out;
896 rt->u.dst.input = ip6_pkt_discard;
897 rt->u.dst.error = -ENETUNREACH;
898 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
899 goto install_route;
900 }
901
902 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
903 struct in6_addr *gw_addr;
904 int gwa_type;
905
906 gw_addr = &rtmsg->rtmsg_gateway;
907 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
908 gwa_type = ipv6_addr_type(gw_addr);
909
910 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
911 struct rt6_info *grt;
912
913 /* IPv6 strictly inhibits using not link-local
914 addresses as nexthop address.
915 Otherwise, router will not able to send redirects.
916 It is very good, but in some (rare!) circumstances
917 (SIT, PtP, NBMA NOARP links) it is handy to allow
918 some exceptions. --ANK
919 */
920 err = -EINVAL;
921 if (!(gwa_type&IPV6_ADDR_UNICAST))
922 goto out;
923
924 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
925
926 err = -EHOSTUNREACH;
927 if (grt == NULL)
928 goto out;
929 if (dev) {
930 if (dev != grt->rt6i_dev) {
931 dst_release(&grt->u.dst);
932 goto out;
933 }
934 } else {
935 dev = grt->rt6i_dev;
936 idev = grt->rt6i_idev;
937 dev_hold(dev);
938 in6_dev_hold(grt->rt6i_idev);
939 }
940 if (!(grt->rt6i_flags&RTF_GATEWAY))
941 err = 0;
942 dst_release(&grt->u.dst);
943
944 if (err)
945 goto out;
946 }
947 err = -EINVAL;
948 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
949 goto out;
950 }
951
952 err = -ENODEV;
953 if (dev == NULL)
954 goto out;
955
956 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
957 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
958 if (IS_ERR(rt->rt6i_nexthop)) {
959 err = PTR_ERR(rt->rt6i_nexthop);
960 rt->rt6i_nexthop = NULL;
961 goto out;
962 }
963 }
964
965 rt->rt6i_flags = rtmsg->rtmsg_flags;
966
967install_route:
968 if (rta && rta[RTA_METRICS-1]) {
969 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
970 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
971
972 while (RTA_OK(attr, attrlen)) {
973 unsigned flavor = attr->rta_type;
974 if (flavor) {
975 if (flavor > RTAX_MAX) {
976 err = -EINVAL;
977 goto out;
978 }
979 rt->u.dst.metrics[flavor-1] =
980 *(u32 *)RTA_DATA(attr);
981 }
982 attr = RTA_NEXT(attr, attrlen);
983 }
984 }
985
986 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
987 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
988 if (!rt->u.dst.metrics[RTAX_MTU-1])
989 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
990 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
991 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
992 rt->u.dst.dev = dev;
993 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700994 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995
996out:
997 if (dev)
998 dev_put(dev);
999 if (idev)
1000 in6_dev_put(idev);
1001 if (rt)
1002 dst_free((struct dst_entry *) rt);
1003 return err;
1004}
1005
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001006int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007{
1008 int err;
1009
1010 write_lock_bh(&rt6_lock);
1011
1012 rt6_reset_dflt_pointer(NULL);
1013
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001014 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 dst_release(&rt->u.dst);
1016
1017 write_unlock_bh(&rt6_lock);
1018
1019 return err;
1020}
1021
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001022static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023{
1024 struct fib6_node *fn;
1025 struct rt6_info *rt;
1026 int err = -ESRCH;
1027
1028 read_lock_bh(&rt6_lock);
1029
1030 fn = fib6_locate(&ip6_routing_table,
1031 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1032 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1033
1034 if (fn) {
1035 for (rt = fn->leaf; rt; rt = rt->u.next) {
1036 if (rtmsg->rtmsg_ifindex &&
1037 (rt->rt6i_dev == NULL ||
1038 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1039 continue;
1040 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1041 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1042 continue;
1043 if (rtmsg->rtmsg_metric &&
1044 rtmsg->rtmsg_metric != rt->rt6i_metric)
1045 continue;
1046 dst_hold(&rt->u.dst);
1047 read_unlock_bh(&rt6_lock);
1048
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001049 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 }
1051 }
1052 read_unlock_bh(&rt6_lock);
1053
1054 return err;
1055}
1056
1057/*
1058 * Handle redirects
1059 */
1060void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1061 struct neighbour *neigh, u8 *lladdr, int on_link)
1062{
1063 struct rt6_info *rt, *nrt;
1064
1065 /* Locate old route to this destination. */
1066 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1067
1068 if (rt == NULL)
1069 return;
1070
1071 if (neigh->dev != rt->rt6i_dev)
1072 goto out;
1073
1074 /*
1075 * Current route is on-link; redirect is always invalid.
1076 *
1077 * Seems, previous statement is not true. It could
1078 * be node, which looks for us as on-link (f.e. proxy ndisc)
1079 * But then router serving it might decide, that we should
1080 * know truth 8)8) --ANK (980726).
1081 */
1082 if (!(rt->rt6i_flags&RTF_GATEWAY))
1083 goto out;
1084
1085 /*
1086 * RFC 2461 specifies that redirects should only be
1087 * accepted if they come from the nexthop to the target.
1088 * Due to the way default routers are chosen, this notion
1089 * is a bit fuzzy and one might need to check all default
1090 * routers.
1091 */
1092 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1093 if (rt->rt6i_flags & RTF_DEFAULT) {
1094 struct rt6_info *rt1;
1095
1096 read_lock(&rt6_lock);
1097 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1098 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1099 dst_hold(&rt1->u.dst);
1100 dst_release(&rt->u.dst);
1101 read_unlock(&rt6_lock);
1102 rt = rt1;
1103 goto source_ok;
1104 }
1105 }
1106 read_unlock(&rt6_lock);
1107 }
1108 if (net_ratelimit())
1109 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1110 "for redirect target\n");
1111 goto out;
1112 }
1113
1114source_ok:
1115
1116 /*
1117 * We have finally decided to accept it.
1118 */
1119
1120 neigh_update(neigh, lladdr, NUD_STALE,
1121 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1122 NEIGH_UPDATE_F_OVERRIDE|
1123 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1124 NEIGH_UPDATE_F_ISROUTER))
1125 );
1126
1127 /*
1128 * Redirect received -> path was valid.
1129 * Look, redirects are sent only in response to data packets,
1130 * so that this nexthop apparently is reachable. --ANK
1131 */
1132 dst_confirm(&rt->u.dst);
1133
1134 /* Duplicate redirect: silently ignore. */
1135 if (neigh == rt->u.dst.neighbour)
1136 goto out;
1137
1138 nrt = ip6_rt_copy(rt);
1139 if (nrt == NULL)
1140 goto out;
1141
1142 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1143 if (on_link)
1144 nrt->rt6i_flags &= ~RTF_GATEWAY;
1145
1146 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1147 nrt->rt6i_dst.plen = 128;
1148 nrt->u.dst.flags |= DST_HOST;
1149
1150 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1151 nrt->rt6i_nexthop = neigh_clone(neigh);
1152 /* Reset pmtu, it may be better */
1153 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1154 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1155
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001156 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 goto out;
1158
1159 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001160 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 return;
1162 }
1163
1164out:
1165 dst_release(&rt->u.dst);
1166 return;
1167}
1168
1169/*
1170 * Handle ICMP "packet too big" messages
1171 * i.e. Path MTU discovery
1172 */
1173
1174void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1175 struct net_device *dev, u32 pmtu)
1176{
1177 struct rt6_info *rt, *nrt;
1178 int allfrag = 0;
1179
1180 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1181 if (rt == NULL)
1182 return;
1183
1184 if (pmtu >= dst_mtu(&rt->u.dst))
1185 goto out;
1186
1187 if (pmtu < IPV6_MIN_MTU) {
1188 /*
1189 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1190 * MTU (1280) and a fragment header should always be included
1191 * after a node receiving Too Big message reporting PMTU is
1192 * less than the IPv6 Minimum Link MTU.
1193 */
1194 pmtu = IPV6_MIN_MTU;
1195 allfrag = 1;
1196 }
1197
1198 /* New mtu received -> path was valid.
1199 They are sent only in response to data packets,
1200 so that this nexthop apparently is reachable. --ANK
1201 */
1202 dst_confirm(&rt->u.dst);
1203
1204 /* Host route. If it is static, it would be better
1205 not to override it, but add new one, so that
1206 when cache entry will expire old pmtu
1207 would return automatically.
1208 */
1209 if (rt->rt6i_flags & RTF_CACHE) {
1210 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1211 if (allfrag)
1212 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1213 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1214 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1215 goto out;
1216 }
1217
1218 /* Network route.
1219 Two cases are possible:
1220 1. It is connected route. Action: COW
1221 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1222 */
1223 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001224 nrt = rt6_alloc_cow(rt, daddr, saddr);
1225 if (!nrt)
1226 goto out;
1227
1228 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1229 if (allfrag)
1230 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1231
1232 /* According to RFC 1981, detecting PMTU increase shouldn't be
1233 * happened within 5 mins, the recommended timer is 10 mins.
1234 * Here this route expiration time is set to ip6_rt_mtu_expires
1235 * which is 10 mins. After 10 mins the decreased pmtu is expired
1236 * and detecting PMTU increase will be automatically happened.
1237 */
1238 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1239 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1240
1241 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 } else {
1243 nrt = ip6_rt_copy(rt);
1244 if (nrt == NULL)
1245 goto out;
1246 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1247 nrt->rt6i_dst.plen = 128;
1248 nrt->u.dst.flags |= DST_HOST;
1249 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1250 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1251 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1252 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1253 if (allfrag)
1254 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001255 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 }
1257
1258out:
1259 dst_release(&rt->u.dst);
1260}
1261
1262/*
1263 * Misc support functions
1264 */
1265
1266static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1267{
1268 struct rt6_info *rt = ip6_dst_alloc();
1269
1270 if (rt) {
1271 rt->u.dst.input = ort->u.dst.input;
1272 rt->u.dst.output = ort->u.dst.output;
1273
1274 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1275 rt->u.dst.dev = ort->u.dst.dev;
1276 if (rt->u.dst.dev)
1277 dev_hold(rt->u.dst.dev);
1278 rt->rt6i_idev = ort->rt6i_idev;
1279 if (rt->rt6i_idev)
1280 in6_dev_hold(rt->rt6i_idev);
1281 rt->u.dst.lastuse = jiffies;
1282 rt->rt6i_expires = 0;
1283
1284 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1285 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1286 rt->rt6i_metric = 0;
1287
1288 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1289#ifdef CONFIG_IPV6_SUBTREES
1290 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1291#endif
1292 }
1293 return rt;
1294}
1295
1296struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1297{
1298 struct rt6_info *rt;
1299 struct fib6_node *fn;
1300
1301 fn = &ip6_routing_table;
1302
1303 write_lock_bh(&rt6_lock);
1304 for (rt = fn->leaf; rt; rt=rt->u.next) {
1305 if (dev == rt->rt6i_dev &&
1306 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1307 break;
1308 }
1309 if (rt)
1310 dst_hold(&rt->u.dst);
1311 write_unlock_bh(&rt6_lock);
1312 return rt;
1313}
1314
1315struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1316 struct net_device *dev)
1317{
1318 struct in6_rtmsg rtmsg;
1319
1320 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1321 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1322 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1323 rtmsg.rtmsg_metric = 1024;
1324 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1325
1326 rtmsg.rtmsg_ifindex = dev->ifindex;
1327
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001328 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 return rt6_get_dflt_router(gwaddr, dev);
1330}
1331
1332void rt6_purge_dflt_routers(void)
1333{
1334 struct rt6_info *rt;
1335
1336restart:
1337 read_lock_bh(&rt6_lock);
1338 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1339 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1340 dst_hold(&rt->u.dst);
1341
1342 rt6_reset_dflt_pointer(NULL);
1343
1344 read_unlock_bh(&rt6_lock);
1345
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001346 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347
1348 goto restart;
1349 }
1350 }
1351 read_unlock_bh(&rt6_lock);
1352}
1353
1354int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1355{
1356 struct in6_rtmsg rtmsg;
1357 int err;
1358
1359 switch(cmd) {
1360 case SIOCADDRT: /* Add a route */
1361 case SIOCDELRT: /* Delete a route */
1362 if (!capable(CAP_NET_ADMIN))
1363 return -EPERM;
1364 err = copy_from_user(&rtmsg, arg,
1365 sizeof(struct in6_rtmsg));
1366 if (err)
1367 return -EFAULT;
1368
1369 rtnl_lock();
1370 switch (cmd) {
1371 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001372 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 break;
1374 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001375 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 break;
1377 default:
1378 err = -EINVAL;
1379 }
1380 rtnl_unlock();
1381
1382 return err;
1383 };
1384
1385 return -EINVAL;
1386}
1387
1388/*
1389 * Drop the packet on the floor
1390 */
1391
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001392static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393{
1394 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1395 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1396 kfree_skb(skb);
1397 return 0;
1398}
1399
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001400static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401{
1402 skb->dev = skb->dst->dev;
1403 return ip6_pkt_discard(skb);
1404}
1405
1406/*
1407 * Allocate a dst for local (unicast / anycast) address.
1408 */
1409
1410struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1411 const struct in6_addr *addr,
1412 int anycast)
1413{
1414 struct rt6_info *rt = ip6_dst_alloc();
1415
1416 if (rt == NULL)
1417 return ERR_PTR(-ENOMEM);
1418
1419 dev_hold(&loopback_dev);
1420 in6_dev_hold(idev);
1421
1422 rt->u.dst.flags = DST_HOST;
1423 rt->u.dst.input = ip6_input;
1424 rt->u.dst.output = ip6_output;
1425 rt->rt6i_dev = &loopback_dev;
1426 rt->rt6i_idev = idev;
1427 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1428 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1429 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1430 rt->u.dst.obsolete = -1;
1431
1432 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001433 if (anycast)
1434 rt->rt6i_flags |= RTF_ANYCAST;
1435 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436 rt->rt6i_flags |= RTF_LOCAL;
1437 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1438 if (rt->rt6i_nexthop == NULL) {
1439 dst_free((struct dst_entry *) rt);
1440 return ERR_PTR(-ENOMEM);
1441 }
1442
1443 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1444 rt->rt6i_dst.plen = 128;
1445
1446 atomic_set(&rt->u.dst.__refcnt, 1);
1447
1448 return rt;
1449}
1450
1451static int fib6_ifdown(struct rt6_info *rt, void *arg)
1452{
1453 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1454 rt != &ip6_null_entry) {
1455 RT6_TRACE("deleted by ifdown %p\n", rt);
1456 return -1;
1457 }
1458 return 0;
1459}
1460
1461void rt6_ifdown(struct net_device *dev)
1462{
1463 write_lock_bh(&rt6_lock);
1464 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1465 write_unlock_bh(&rt6_lock);
1466}
1467
1468struct rt6_mtu_change_arg
1469{
1470 struct net_device *dev;
1471 unsigned mtu;
1472};
1473
1474static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1475{
1476 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1477 struct inet6_dev *idev;
1478
1479 /* In IPv6 pmtu discovery is not optional,
1480 so that RTAX_MTU lock cannot disable it.
1481 We still use this lock to block changes
1482 caused by addrconf/ndisc.
1483 */
1484
1485 idev = __in6_dev_get(arg->dev);
1486 if (idev == NULL)
1487 return 0;
1488
1489 /* For administrative MTU increase, there is no way to discover
1490 IPv6 PMTU increase, so PMTU increase should be updated here.
1491 Since RFC 1981 doesn't include administrative MTU increase
1492 update PMTU increase is a MUST. (i.e. jumbo frame)
1493 */
1494 /*
1495 If new MTU is less than route PMTU, this new MTU will be the
1496 lowest MTU in the path, update the route PMTU to reflect PMTU
1497 decreases; if new MTU is greater than route PMTU, and the
1498 old MTU is the lowest MTU in the path, update the route PMTU
1499 to reflect the increase. In this case if the other nodes' MTU
1500 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1501 PMTU discouvery.
1502 */
1503 if (rt->rt6i_dev == arg->dev &&
1504 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1505 (dst_mtu(&rt->u.dst) > arg->mtu ||
1506 (dst_mtu(&rt->u.dst) < arg->mtu &&
1507 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1508 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1509 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1510 return 0;
1511}
1512
1513void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1514{
1515 struct rt6_mtu_change_arg arg;
1516
1517 arg.dev = dev;
1518 arg.mtu = mtu;
1519 read_lock_bh(&rt6_lock);
1520 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1521 read_unlock_bh(&rt6_lock);
1522}
1523
1524static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1525 struct in6_rtmsg *rtmsg)
1526{
1527 memset(rtmsg, 0, sizeof(*rtmsg));
1528
1529 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1530 rtmsg->rtmsg_src_len = r->rtm_src_len;
1531 rtmsg->rtmsg_flags = RTF_UP;
1532 if (r->rtm_type == RTN_UNREACHABLE)
1533 rtmsg->rtmsg_flags |= RTF_REJECT;
1534
1535 if (rta[RTA_GATEWAY-1]) {
1536 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1537 return -EINVAL;
1538 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1539 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1540 }
1541 if (rta[RTA_DST-1]) {
1542 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1543 return -EINVAL;
1544 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1545 }
1546 if (rta[RTA_SRC-1]) {
1547 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1548 return -EINVAL;
1549 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1550 }
1551 if (rta[RTA_OIF-1]) {
1552 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1553 return -EINVAL;
1554 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1555 }
1556 if (rta[RTA_PRIORITY-1]) {
1557 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1558 return -EINVAL;
1559 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1560 }
1561 return 0;
1562}
1563
1564int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1565{
1566 struct rtmsg *r = NLMSG_DATA(nlh);
1567 struct in6_rtmsg rtmsg;
1568
1569 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1570 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001571 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572}
1573
1574int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1575{
1576 struct rtmsg *r = NLMSG_DATA(nlh);
1577 struct in6_rtmsg rtmsg;
1578
1579 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1580 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001581 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582}
1583
1584struct rt6_rtnl_dump_arg
1585{
1586 struct sk_buff *skb;
1587 struct netlink_callback *cb;
1588};
1589
1590static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001591 struct in6_addr *dst, struct in6_addr *src,
1592 int iif, int type, u32 pid, u32 seq,
1593 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594{
1595 struct rtmsg *rtm;
1596 struct nlmsghdr *nlh;
1597 unsigned char *b = skb->tail;
1598 struct rta_cacheinfo ci;
1599
1600 if (prefix) { /* user wants prefix routes only */
1601 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1602 /* success since this is not a prefix route */
1603 return 1;
1604 }
1605 }
1606
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001607 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 rtm = NLMSG_DATA(nlh);
1609 rtm->rtm_family = AF_INET6;
1610 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1611 rtm->rtm_src_len = rt->rt6i_src.plen;
1612 rtm->rtm_tos = 0;
1613 rtm->rtm_table = RT_TABLE_MAIN;
1614 if (rt->rt6i_flags&RTF_REJECT)
1615 rtm->rtm_type = RTN_UNREACHABLE;
1616 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1617 rtm->rtm_type = RTN_LOCAL;
1618 else
1619 rtm->rtm_type = RTN_UNICAST;
1620 rtm->rtm_flags = 0;
1621 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1622 rtm->rtm_protocol = rt->rt6i_protocol;
1623 if (rt->rt6i_flags&RTF_DYNAMIC)
1624 rtm->rtm_protocol = RTPROT_REDIRECT;
1625 else if (rt->rt6i_flags & RTF_ADDRCONF)
1626 rtm->rtm_protocol = RTPROT_KERNEL;
1627 else if (rt->rt6i_flags&RTF_DEFAULT)
1628 rtm->rtm_protocol = RTPROT_RA;
1629
1630 if (rt->rt6i_flags&RTF_CACHE)
1631 rtm->rtm_flags |= RTM_F_CLONED;
1632
1633 if (dst) {
1634 RTA_PUT(skb, RTA_DST, 16, dst);
1635 rtm->rtm_dst_len = 128;
1636 } else if (rtm->rtm_dst_len)
1637 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1638#ifdef CONFIG_IPV6_SUBTREES
1639 if (src) {
1640 RTA_PUT(skb, RTA_SRC, 16, src);
1641 rtm->rtm_src_len = 128;
1642 } else if (rtm->rtm_src_len)
1643 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1644#endif
1645 if (iif)
1646 RTA_PUT(skb, RTA_IIF, 4, &iif);
1647 else if (dst) {
1648 struct in6_addr saddr_buf;
1649 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1650 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1651 }
1652 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1653 goto rtattr_failure;
1654 if (rt->u.dst.neighbour)
1655 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1656 if (rt->u.dst.dev)
1657 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1658 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1659 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1660 if (rt->rt6i_expires)
1661 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1662 else
1663 ci.rta_expires = 0;
1664 ci.rta_used = rt->u.dst.__use;
1665 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1666 ci.rta_error = rt->u.dst.error;
1667 ci.rta_id = 0;
1668 ci.rta_ts = 0;
1669 ci.rta_tsage = 0;
1670 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1671 nlh->nlmsg_len = skb->tail - b;
1672 return skb->len;
1673
1674nlmsg_failure:
1675rtattr_failure:
1676 skb_trim(skb, b - skb->data);
1677 return -1;
1678}
1679
1680static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1681{
1682 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1683 int prefix;
1684
1685 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1686 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1687 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1688 } else
1689 prefix = 0;
1690
1691 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1692 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001693 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694}
1695
1696static int fib6_dump_node(struct fib6_walker_t *w)
1697{
1698 int res;
1699 struct rt6_info *rt;
1700
1701 for (rt = w->leaf; rt; rt = rt->u.next) {
1702 res = rt6_dump_route(rt, w->args);
1703 if (res < 0) {
1704 /* Frame is full, suspend walking */
1705 w->leaf = rt;
1706 return 1;
1707 }
1708 BUG_TRAP(res!=0);
1709 }
1710 w->leaf = NULL;
1711 return 0;
1712}
1713
1714static void fib6_dump_end(struct netlink_callback *cb)
1715{
1716 struct fib6_walker_t *w = (void*)cb->args[0];
1717
1718 if (w) {
1719 cb->args[0] = 0;
1720 fib6_walker_unlink(w);
1721 kfree(w);
1722 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08001723 cb->done = (void*)cb->args[1];
1724 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725}
1726
1727static int fib6_dump_done(struct netlink_callback *cb)
1728{
1729 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01001730 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731}
1732
1733int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1734{
1735 struct rt6_rtnl_dump_arg arg;
1736 struct fib6_walker_t *w;
1737 int res;
1738
1739 arg.skb = skb;
1740 arg.cb = cb;
1741
1742 w = (void*)cb->args[0];
1743 if (w == NULL) {
1744 /* New dump:
1745 *
1746 * 1. hook callback destructor.
1747 */
1748 cb->args[1] = (long)cb->done;
1749 cb->done = fib6_dump_done;
1750
1751 /*
1752 * 2. allocate and initialize walker.
1753 */
David S. Miller9e147a12005-11-17 16:52:51 -08001754 w = kmalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 if (w == NULL)
1756 return -ENOMEM;
1757 RT6_TRACE("dump<%p", w);
1758 memset(w, 0, sizeof(*w));
1759 w->root = &ip6_routing_table;
1760 w->func = fib6_dump_node;
1761 w->args = &arg;
1762 cb->args[0] = (long)w;
1763 read_lock_bh(&rt6_lock);
1764 res = fib6_walk(w);
1765 read_unlock_bh(&rt6_lock);
1766 } else {
1767 w->args = &arg;
1768 read_lock_bh(&rt6_lock);
1769 res = fib6_walk_continue(w);
1770 read_unlock_bh(&rt6_lock);
1771 }
1772#if RT6_DEBUG >= 3
1773 if (res <= 0 && skb->len == 0)
1774 RT6_TRACE("%p>dump end\n", w);
1775#endif
1776 res = res < 0 ? res : skb->len;
1777 /* res < 0 is an error. (really, impossible)
1778 res == 0 means that dump is complete, but skb still can contain data.
1779 res > 0 dump is not complete, but frame is full.
1780 */
1781 /* Destroy walker, if dump of this table is complete. */
1782 if (res <= 0)
1783 fib6_dump_end(cb);
1784 return res;
1785}
1786
1787int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1788{
1789 struct rtattr **rta = arg;
1790 int iif = 0;
1791 int err = -ENOBUFS;
1792 struct sk_buff *skb;
1793 struct flowi fl;
1794 struct rt6_info *rt;
1795
1796 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1797 if (skb == NULL)
1798 goto out;
1799
1800 /* Reserve room for dummy headers, this skb can pass
1801 through good chunk of routing engine.
1802 */
1803 skb->mac.raw = skb->data;
1804 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1805
1806 memset(&fl, 0, sizeof(fl));
1807 if (rta[RTA_SRC-1])
1808 ipv6_addr_copy(&fl.fl6_src,
1809 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1810 if (rta[RTA_DST-1])
1811 ipv6_addr_copy(&fl.fl6_dst,
1812 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1813
1814 if (rta[RTA_IIF-1])
1815 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1816
1817 if (iif) {
1818 struct net_device *dev;
1819 dev = __dev_get_by_index(iif);
1820 if (!dev) {
1821 err = -ENODEV;
1822 goto out_free;
1823 }
1824 }
1825
1826 fl.oif = 0;
1827 if (rta[RTA_OIF-1])
1828 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1829
1830 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1831
1832 skb->dst = &rt->u.dst;
1833
1834 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1835 err = rt6_fill_node(skb, rt,
1836 &fl.fl6_dst, &fl.fl6_src,
1837 iif,
1838 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001839 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 if (err < 0) {
1841 err = -EMSGSIZE;
1842 goto out_free;
1843 }
1844
1845 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1846 if (err > 0)
1847 err = 0;
1848out:
1849 return err;
1850out_free:
1851 kfree_skb(skb);
1852 goto out;
1853}
1854
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001855void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1856 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857{
1858 struct sk_buff *skb;
1859 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001860 u32 pid = current->pid;
1861 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001863 if (req)
1864 pid = req->pid;
1865 if (nlh)
1866 seq = nlh->nlmsg_seq;
1867
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 skb = alloc_skb(size, gfp_any());
1869 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001870 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 return;
1872 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001873 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07001875 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876 return;
1877 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07001878 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1879 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880}
1881
1882/*
1883 * /proc
1884 */
1885
1886#ifdef CONFIG_PROC_FS
1887
1888#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1889
1890struct rt6_proc_arg
1891{
1892 char *buffer;
1893 int offset;
1894 int length;
1895 int skip;
1896 int len;
1897};
1898
1899static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1900{
1901 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1902 int i;
1903
1904 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1905 arg->skip++;
1906 return 0;
1907 }
1908
1909 if (arg->len >= arg->length)
1910 return 0;
1911
1912 for (i=0; i<16; i++) {
1913 sprintf(arg->buffer + arg->len, "%02x",
1914 rt->rt6i_dst.addr.s6_addr[i]);
1915 arg->len += 2;
1916 }
1917 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1918 rt->rt6i_dst.plen);
1919
1920#ifdef CONFIG_IPV6_SUBTREES
1921 for (i=0; i<16; i++) {
1922 sprintf(arg->buffer + arg->len, "%02x",
1923 rt->rt6i_src.addr.s6_addr[i]);
1924 arg->len += 2;
1925 }
1926 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1927 rt->rt6i_src.plen);
1928#else
1929 sprintf(arg->buffer + arg->len,
1930 "00000000000000000000000000000000 00 ");
1931 arg->len += 36;
1932#endif
1933
1934 if (rt->rt6i_nexthop) {
1935 for (i=0; i<16; i++) {
1936 sprintf(arg->buffer + arg->len, "%02x",
1937 rt->rt6i_nexthop->primary_key[i]);
1938 arg->len += 2;
1939 }
1940 } else {
1941 sprintf(arg->buffer + arg->len,
1942 "00000000000000000000000000000000");
1943 arg->len += 32;
1944 }
1945 arg->len += sprintf(arg->buffer + arg->len,
1946 " %08x %08x %08x %08x %8s\n",
1947 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1948 rt->u.dst.__use, rt->rt6i_flags,
1949 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1950 return 0;
1951}
1952
1953static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1954{
1955 struct rt6_proc_arg arg;
1956 arg.buffer = buffer;
1957 arg.offset = offset;
1958 arg.length = length;
1959 arg.skip = 0;
1960 arg.len = 0;
1961
1962 read_lock_bh(&rt6_lock);
1963 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1964 read_unlock_bh(&rt6_lock);
1965
1966 *start = buffer;
1967 if (offset)
1968 *start += offset % RT6_INFO_LEN;
1969
1970 arg.len -= offset % RT6_INFO_LEN;
1971
1972 if (arg.len > length)
1973 arg.len = length;
1974 if (arg.len < 0)
1975 arg.len = 0;
1976
1977 return arg.len;
1978}
1979
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1981{
1982 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1983 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1984 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1985 rt6_stats.fib_rt_cache,
1986 atomic_read(&ip6_dst_ops.entries),
1987 rt6_stats.fib_discarded_routes);
1988
1989 return 0;
1990}
1991
1992static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1993{
1994 return single_open(file, rt6_stats_seq_show, NULL);
1995}
1996
1997static struct file_operations rt6_stats_seq_fops = {
1998 .owner = THIS_MODULE,
1999 .open = rt6_stats_seq_open,
2000 .read = seq_read,
2001 .llseek = seq_lseek,
2002 .release = single_release,
2003};
2004#endif /* CONFIG_PROC_FS */
2005
2006#ifdef CONFIG_SYSCTL
2007
2008static int flush_delay;
2009
2010static
2011int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2012 void __user *buffer, size_t *lenp, loff_t *ppos)
2013{
2014 if (write) {
2015 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2016 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2017 return 0;
2018 } else
2019 return -EINVAL;
2020}
2021
2022ctl_table ipv6_route_table[] = {
2023 {
2024 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2025 .procname = "flush",
2026 .data = &flush_delay,
2027 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002028 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029 .proc_handler = &ipv6_sysctl_rtcache_flush
2030 },
2031 {
2032 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2033 .procname = "gc_thresh",
2034 .data = &ip6_dst_ops.gc_thresh,
2035 .maxlen = sizeof(int),
2036 .mode = 0644,
2037 .proc_handler = &proc_dointvec,
2038 },
2039 {
2040 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2041 .procname = "max_size",
2042 .data = &ip6_rt_max_size,
2043 .maxlen = sizeof(int),
2044 .mode = 0644,
2045 .proc_handler = &proc_dointvec,
2046 },
2047 {
2048 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2049 .procname = "gc_min_interval",
2050 .data = &ip6_rt_gc_min_interval,
2051 .maxlen = sizeof(int),
2052 .mode = 0644,
2053 .proc_handler = &proc_dointvec_jiffies,
2054 .strategy = &sysctl_jiffies,
2055 },
2056 {
2057 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2058 .procname = "gc_timeout",
2059 .data = &ip6_rt_gc_timeout,
2060 .maxlen = sizeof(int),
2061 .mode = 0644,
2062 .proc_handler = &proc_dointvec_jiffies,
2063 .strategy = &sysctl_jiffies,
2064 },
2065 {
2066 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2067 .procname = "gc_interval",
2068 .data = &ip6_rt_gc_interval,
2069 .maxlen = sizeof(int),
2070 .mode = 0644,
2071 .proc_handler = &proc_dointvec_jiffies,
2072 .strategy = &sysctl_jiffies,
2073 },
2074 {
2075 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2076 .procname = "gc_elasticity",
2077 .data = &ip6_rt_gc_elasticity,
2078 .maxlen = sizeof(int),
2079 .mode = 0644,
2080 .proc_handler = &proc_dointvec_jiffies,
2081 .strategy = &sysctl_jiffies,
2082 },
2083 {
2084 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2085 .procname = "mtu_expires",
2086 .data = &ip6_rt_mtu_expires,
2087 .maxlen = sizeof(int),
2088 .mode = 0644,
2089 .proc_handler = &proc_dointvec_jiffies,
2090 .strategy = &sysctl_jiffies,
2091 },
2092 {
2093 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2094 .procname = "min_adv_mss",
2095 .data = &ip6_rt_min_advmss,
2096 .maxlen = sizeof(int),
2097 .mode = 0644,
2098 .proc_handler = &proc_dointvec_jiffies,
2099 .strategy = &sysctl_jiffies,
2100 },
2101 {
2102 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2103 .procname = "gc_min_interval_ms",
2104 .data = &ip6_rt_gc_min_interval,
2105 .maxlen = sizeof(int),
2106 .mode = 0644,
2107 .proc_handler = &proc_dointvec_ms_jiffies,
2108 .strategy = &sysctl_ms_jiffies,
2109 },
2110 { .ctl_name = 0 }
2111};
2112
2113#endif
2114
2115void __init ip6_route_init(void)
2116{
2117 struct proc_dir_entry *p;
2118
2119 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2120 sizeof(struct rt6_info),
2121 0, SLAB_HWCACHE_ALIGN,
2122 NULL, NULL);
2123 if (!ip6_dst_ops.kmem_cachep)
2124 panic("cannot create ip6_dst_cache");
2125
2126 fib6_init();
2127#ifdef CONFIG_PROC_FS
2128 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2129 if (p)
2130 p->owner = THIS_MODULE;
2131
2132 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2133#endif
2134#ifdef CONFIG_XFRM
2135 xfrm6_init();
2136#endif
2137}
2138
2139void ip6_route_cleanup(void)
2140{
2141#ifdef CONFIG_PROC_FS
2142 proc_net_remove("ipv6_route");
2143 proc_net_remove("rt6_stats");
2144#endif
2145#ifdef CONFIG_XFRM
2146 xfrm6_fini();
2147#endif
2148 rt6_ifdown(NULL);
2149 fib6_gc_cleanup();
2150 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2151}