blob: 5d5bbb49ec7893811743477e45ed1e6a651b2115 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
27#include <linux/config.h>
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
74
75static int ip6_rt_max_size = 4096;
76static int ip6_rt_gc_min_interval = HZ / 2;
77static int ip6_rt_gc_timeout = 60*HZ;
78int ip6_rt_gc_interval = 30*HZ;
79static int ip6_rt_gc_elasticity = 9;
80static int ip6_rt_mtu_expires = 10*60*HZ;
81static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
89static int ip6_dst_gc(void);
90
91static int ip6_pkt_discard(struct sk_buff *skb);
92static int ip6_pkt_discard_out(struct sk_buff *skb);
93static void ip6_link_failure(struct sk_buff *skb);
94static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96static struct dst_ops ip6_dst_ops = {
97 .family = AF_INET6,
98 .protocol = __constant_htons(ETH_P_IPV6),
99 .gc = ip6_dst_gc,
100 .gc_thresh = 1024,
101 .check = ip6_dst_check,
102 .destroy = ip6_dst_destroy,
103 .ifdown = ip6_dst_ifdown,
104 .negative_advice = ip6_negative_advice,
105 .link_failure = ip6_link_failure,
106 .update_pmtu = ip6_rt_update_pmtu,
107 .entry_size = sizeof(struct rt6_info),
108};
109
110struct rt6_info ip6_null_entry = {
111 .u = {
112 .dst = {
113 .__refcnt = ATOMIC_INIT(1),
114 .__use = 1,
115 .dev = &loopback_dev,
116 .obsolete = -1,
117 .error = -ENETUNREACH,
118 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
119 .input = ip6_pkt_discard,
120 .output = ip6_pkt_discard_out,
121 .ops = &ip6_dst_ops,
122 .path = (struct dst_entry*)&ip6_null_entry,
123 }
124 },
125 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
126 .rt6i_metric = ~(u32) 0,
127 .rt6i_ref = ATOMIC_INIT(1),
128};
129
130struct fib6_node ip6_routing_table = {
131 .leaf = &ip6_null_entry,
132 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133};
134
135/* Protects all the ip6 fib */
136
137DEFINE_RWLOCK(rt6_lock);
138
139
140/* allocate dst with ip6_dst_ops */
141static __inline__ struct rt6_info *ip6_dst_alloc(void)
142{
143 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144}
145
146static void ip6_dst_destroy(struct dst_entry *dst)
147{
148 struct rt6_info *rt = (struct rt6_info *)dst;
149 struct inet6_dev *idev = rt->rt6i_idev;
150
151 if (idev != NULL) {
152 rt->rt6i_idev = NULL;
153 in6_dev_put(idev);
154 }
155}
156
157static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 int how)
159{
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
162
163 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 if (loopback_idev != NULL) {
166 rt->rt6i_idev = loopback_idev;
167 in6_dev_put(idev);
168 }
169 }
170}
171
172static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173{
174 return (rt->rt6i_flags & RTF_EXPIRES &&
175 time_after(jiffies, rt->rt6i_expires));
176}
177
178/*
179 * Route lookup. Any rt6_lock is implied.
180 */
181
182static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 int oif,
184 int strict)
185{
186 struct rt6_info *local = NULL;
187 struct rt6_info *sprt;
188
189 if (oif) {
190 for (sprt = rt; sprt; sprt = sprt->u.next) {
191 struct net_device *dev = sprt->rt6i_dev;
192 if (dev->ifindex == oif)
193 return sprt;
194 if (dev->flags & IFF_LOOPBACK) {
195 if (sprt->rt6i_idev == NULL ||
196 sprt->rt6i_idev->dev->ifindex != oif) {
197 if (strict && oif)
198 continue;
199 if (local && (!oif ||
200 local->rt6i_idev->dev->ifindex == oif))
201 continue;
202 }
203 local = sprt;
204 }
205 }
206
207 if (local)
208 return local;
209
210 if (strict)
211 return &ip6_null_entry;
212 }
213 return rt;
214}
215
216/*
217 * pointer to the last default router chosen. BH is disabled locally.
218 */
219static struct rt6_info *rt6_dflt_pointer;
220static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222void rt6_reset_dflt_pointer(struct rt6_info *rt)
223{
224 spin_lock_bh(&rt6_dflt_lock);
225 if (rt == NULL || rt == rt6_dflt_pointer) {
226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 rt6_dflt_pointer = NULL;
228 }
229 spin_unlock_bh(&rt6_dflt_lock);
230}
231
232/* Default Router Selection (RFC 2461 6.3.6) */
233static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234{
235 struct rt6_info *match = NULL;
236 struct rt6_info *sprt;
237 int mpri = 0;
238
239 for (sprt = rt; sprt; sprt = sprt->u.next) {
240 struct neighbour *neigh;
241 int m = 0;
242
243 if (!oif ||
244 (sprt->rt6i_dev &&
245 sprt->rt6i_dev->ifindex == oif))
246 m += 8;
247
248 if (rt6_check_expired(sprt))
249 continue;
250
251 if (sprt == rt6_dflt_pointer)
252 m += 4;
253
254 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 read_lock_bh(&neigh->lock);
256 switch (neigh->nud_state) {
257 case NUD_REACHABLE:
258 m += 3;
259 break;
260
261 case NUD_STALE:
262 case NUD_DELAY:
263 case NUD_PROBE:
264 m += 2;
265 break;
266
267 case NUD_NOARP:
268 case NUD_PERMANENT:
269 m += 1;
270 break;
271
272 case NUD_INCOMPLETE:
273 default:
274 read_unlock_bh(&neigh->lock);
275 continue;
276 }
277 read_unlock_bh(&neigh->lock);
278 } else {
279 continue;
280 }
281
282 if (m > mpri || m >= 12) {
283 match = sprt;
284 mpri = m;
285 if (m >= 12) {
286 /* we choose the last default router if it
287 * is in (probably) reachable state.
288 * If route changed, we should do pmtu
289 * discovery. --yoshfuji
290 */
291 break;
292 }
293 }
294 }
295
296 spin_lock(&rt6_dflt_lock);
297 if (!match) {
298 /*
299 * No default routers are known to be reachable.
300 * SHOULD round robin
301 */
302 if (rt6_dflt_pointer) {
303 for (sprt = rt6_dflt_pointer->u.next;
304 sprt; sprt = sprt->u.next) {
305 if (sprt->u.dst.obsolete <= 0 &&
306 sprt->u.dst.error == 0 &&
307 !rt6_check_expired(sprt)) {
308 match = sprt;
309 break;
310 }
311 }
312 for (sprt = rt;
313 !match && sprt;
314 sprt = sprt->u.next) {
315 if (sprt->u.dst.obsolete <= 0 &&
316 sprt->u.dst.error == 0 &&
317 !rt6_check_expired(sprt)) {
318 match = sprt;
319 break;
320 }
321 if (sprt == rt6_dflt_pointer)
322 break;
323 }
324 }
325 }
326
327 if (match) {
328 if (rt6_dflt_pointer != match)
329 RT6_TRACE("changed default router: %p->%p\n",
330 rt6_dflt_pointer, match);
331 rt6_dflt_pointer = match;
332 }
333 spin_unlock(&rt6_dflt_lock);
334
335 if (!match) {
336 /*
337 * Last Resort: if no default routers found,
338 * use addrconf default route.
339 * We don't record this route.
340 */
341 for (sprt = ip6_routing_table.leaf;
342 sprt; sprt = sprt->u.next) {
343 if (!rt6_check_expired(sprt) &&
344 (sprt->rt6i_flags & RTF_DEFAULT) &&
345 (!oif ||
346 (sprt->rt6i_dev &&
347 sprt->rt6i_dev->ifindex == oif))) {
348 match = sprt;
349 break;
350 }
351 }
352 if (!match) {
353 /* no default route. give up. */
354 match = &ip6_null_entry;
355 }
356 }
357
358 return match;
359}
360
361struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 int oif, int strict)
363{
364 struct fib6_node *fn;
365 struct rt6_info *rt;
366
367 read_lock_bh(&rt6_lock);
368 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 rt = rt6_device_match(fn->leaf, oif, strict);
370 dst_hold(&rt->u.dst);
371 rt->u.dst.__use++;
372 read_unlock_bh(&rt6_lock);
373
374 rt->u.dst.lastuse = jiffies;
375 if (rt->u.dst.error == 0)
376 return rt;
377 dst_release(&rt->u.dst);
378 return NULL;
379}
380
381/* ip6_ins_rt is called with FREE rt6_lock.
382 It takes new route entry, the addition fails by any reason the
383 route is freed. In any case, if caller does not hold it, it may
384 be destroyed.
385 */
386
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700387int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389{
390 int err;
391
392 write_lock_bh(&rt6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700393 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 write_unlock_bh(&rt6_lock);
395
396 return err;
397}
398
399/* No rt6_lock! If COW failed, the function returns dead route entry
400 with dst->error set to errno value.
401 */
402
403static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700404 struct in6_addr *saddr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405{
406 int err;
407 struct rt6_info *rt;
408
409 /*
410 * Clone the route.
411 */
412
413 rt = ip6_rt_copy(ort);
414
415 if (rt) {
416 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
417
418 if (!(rt->rt6i_flags&RTF_GATEWAY))
419 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
420
421 rt->rt6i_dst.plen = 128;
422 rt->rt6i_flags |= RTF_CACHE;
423 rt->u.dst.flags |= DST_HOST;
424
425#ifdef CONFIG_IPV6_SUBTREES
426 if (rt->rt6i_src.plen && saddr) {
427 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
428 rt->rt6i_src.plen = 128;
429 }
430#endif
431
432 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
433
434 dst_hold(&rt->u.dst);
435
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700436 err = ip6_ins_rt(rt, NULL, NULL, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 if (err == 0)
438 return rt;
439
440 rt->u.dst.error = err;
441
442 return rt;
443 }
444 dst_hold(&ip6_null_entry.u.dst);
445 return &ip6_null_entry;
446}
447
448#define BACKTRACK() \
449if (rt == &ip6_null_entry && strict) { \
450 while ((fn = fn->parent) != NULL) { \
451 if (fn->fn_flags & RTN_ROOT) { \
452 dst_hold(&rt->u.dst); \
453 goto out; \
454 } \
455 if (fn->fn_flags & RTN_RTINFO) \
456 goto restart; \
457 } \
458}
459
460
461void ip6_route_input(struct sk_buff *skb)
462{
463 struct fib6_node *fn;
464 struct rt6_info *rt;
465 int strict;
466 int attempts = 3;
467
468 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
469
470relookup:
471 read_lock_bh(&rt6_lock);
472
473 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
474 &skb->nh.ipv6h->saddr);
475
476restart:
477 rt = fn->leaf;
478
479 if ((rt->rt6i_flags & RTF_CACHE)) {
480 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
481 BACKTRACK();
482 dst_hold(&rt->u.dst);
483 goto out;
484 }
485
486 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
487 BACKTRACK();
488
489 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
490 struct rt6_info *nrt;
491 dst_hold(&rt->u.dst);
492 read_unlock_bh(&rt6_lock);
493
494 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700495 &skb->nh.ipv6h->saddr,
496 &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700497
498 dst_release(&rt->u.dst);
499 rt = nrt;
500
501 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
502 goto out2;
503
504 /* Race condition! In the gap, when rt6_lock was
505 released someone could insert this route. Relookup.
506 */
507 dst_release(&rt->u.dst);
508 goto relookup;
509 }
510 dst_hold(&rt->u.dst);
511
512out:
513 read_unlock_bh(&rt6_lock);
514out2:
515 rt->u.dst.lastuse = jiffies;
516 rt->u.dst.__use++;
517 skb->dst = (struct dst_entry *) rt;
518}
519
520struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
521{
522 struct fib6_node *fn;
523 struct rt6_info *rt;
524 int strict;
525 int attempts = 3;
526
527 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
528
529relookup:
530 read_lock_bh(&rt6_lock);
531
532 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
533
534restart:
535 rt = fn->leaf;
536
537 if ((rt->rt6i_flags & RTF_CACHE)) {
538 rt = rt6_device_match(rt, fl->oif, strict);
539 BACKTRACK();
540 dst_hold(&rt->u.dst);
541 goto out;
542 }
543 if (rt->rt6i_flags & RTF_DEFAULT) {
544 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
545 rt = rt6_best_dflt(rt, fl->oif);
546 } else {
547 rt = rt6_device_match(rt, fl->oif, strict);
548 BACKTRACK();
549 }
550
551 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
552 struct rt6_info *nrt;
553 dst_hold(&rt->u.dst);
554 read_unlock_bh(&rt6_lock);
555
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700556 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
558 dst_release(&rt->u.dst);
559 rt = nrt;
560
561 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
562 goto out2;
563
564 /* Race condition! In the gap, when rt6_lock was
565 released someone could insert this route. Relookup.
566 */
567 dst_release(&rt->u.dst);
568 goto relookup;
569 }
570 dst_hold(&rt->u.dst);
571
572out:
573 read_unlock_bh(&rt6_lock);
574out2:
575 rt->u.dst.lastuse = jiffies;
576 rt->u.dst.__use++;
577 return &rt->u.dst;
578}
579
580
581/*
582 * Destination cache support functions
583 */
584
585static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
586{
587 struct rt6_info *rt;
588
589 rt = (struct rt6_info *) dst;
590
591 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
592 return dst;
593
594 return NULL;
595}
596
597static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
598{
599 struct rt6_info *rt = (struct rt6_info *) dst;
600
601 if (rt) {
602 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700603 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 else
605 dst_release(dst);
606 }
607 return NULL;
608}
609
610static void ip6_link_failure(struct sk_buff *skb)
611{
612 struct rt6_info *rt;
613
614 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
615
616 rt = (struct rt6_info *) skb->dst;
617 if (rt) {
618 if (rt->rt6i_flags&RTF_CACHE) {
619 dst_set_expires(&rt->u.dst, 0);
620 rt->rt6i_flags |= RTF_EXPIRES;
621 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
622 rt->rt6i_node->fn_sernum = -1;
623 }
624}
625
626static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
627{
628 struct rt6_info *rt6 = (struct rt6_info*)dst;
629
630 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
631 rt6->rt6i_flags |= RTF_MODIFIED;
632 if (mtu < IPV6_MIN_MTU) {
633 mtu = IPV6_MIN_MTU;
634 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
635 }
636 dst->metrics[RTAX_MTU-1] = mtu;
637 }
638}
639
640/* Protected by rt6_lock. */
641static struct dst_entry *ndisc_dst_gc_list;
642static int ipv6_get_mtu(struct net_device *dev);
643
644static inline unsigned int ipv6_advmss(unsigned int mtu)
645{
646 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
647
648 if (mtu < ip6_rt_min_advmss)
649 mtu = ip6_rt_min_advmss;
650
651 /*
652 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
653 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
654 * IPV6_MAXPLEN is also valid and means: "any MSS,
655 * rely only on pmtu discovery"
656 */
657 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
658 mtu = IPV6_MAXPLEN;
659 return mtu;
660}
661
662struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
663 struct neighbour *neigh,
664 struct in6_addr *addr,
665 int (*output)(struct sk_buff *))
666{
667 struct rt6_info *rt;
668 struct inet6_dev *idev = in6_dev_get(dev);
669
670 if (unlikely(idev == NULL))
671 return NULL;
672
673 rt = ip6_dst_alloc();
674 if (unlikely(rt == NULL)) {
675 in6_dev_put(idev);
676 goto out;
677 }
678
679 dev_hold(dev);
680 if (neigh)
681 neigh_hold(neigh);
682 else
683 neigh = ndisc_get_neigh(dev, addr);
684
685 rt->rt6i_dev = dev;
686 rt->rt6i_idev = idev;
687 rt->rt6i_nexthop = neigh;
688 atomic_set(&rt->u.dst.__refcnt, 1);
689 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
690 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
691 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
692 rt->u.dst.output = output;
693
694#if 0 /* there's no chance to use these for ndisc */
695 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
696 ? DST_HOST
697 : 0;
698 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
699 rt->rt6i_dst.plen = 128;
700#endif
701
702 write_lock_bh(&rt6_lock);
703 rt->u.dst.next = ndisc_dst_gc_list;
704 ndisc_dst_gc_list = &rt->u.dst;
705 write_unlock_bh(&rt6_lock);
706
707 fib6_force_start_gc();
708
709out:
710 return (struct dst_entry *)rt;
711}
712
713int ndisc_dst_gc(int *more)
714{
715 struct dst_entry *dst, *next, **pprev;
716 int freed;
717
718 next = NULL;
719 pprev = &ndisc_dst_gc_list;
720 freed = 0;
721 while ((dst = *pprev) != NULL) {
722 if (!atomic_read(&dst->__refcnt)) {
723 *pprev = dst->next;
724 dst_free(dst);
725 freed++;
726 } else {
727 pprev = &dst->next;
728 (*more)++;
729 }
730 }
731
732 return freed;
733}
734
735static int ip6_dst_gc(void)
736{
737 static unsigned expire = 30*HZ;
738 static unsigned long last_gc;
739 unsigned long now = jiffies;
740
741 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
742 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
743 goto out;
744
745 expire++;
746 fib6_run_gc(expire);
747 last_gc = now;
748 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
749 expire = ip6_rt_gc_timeout>>1;
750
751out:
752 expire -= expire>>ip6_rt_gc_elasticity;
753 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
754}
755
756/* Clean host part of a prefix. Not necessary in radix tree,
757 but results in cleaner routing tables.
758
759 Remove it only when all the things will work!
760 */
761
762static int ipv6_get_mtu(struct net_device *dev)
763{
764 int mtu = IPV6_MIN_MTU;
765 struct inet6_dev *idev;
766
767 idev = in6_dev_get(dev);
768 if (idev) {
769 mtu = idev->cnf.mtu6;
770 in6_dev_put(idev);
771 }
772 return mtu;
773}
774
775int ipv6_get_hoplimit(struct net_device *dev)
776{
777 int hoplimit = ipv6_devconf.hop_limit;
778 struct inet6_dev *idev;
779
780 idev = in6_dev_get(dev);
781 if (idev) {
782 hoplimit = idev->cnf.hop_limit;
783 in6_dev_put(idev);
784 }
785 return hoplimit;
786}
787
788/*
789 *
790 */
791
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700792int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
793 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794{
795 int err;
796 struct rtmsg *r;
797 struct rtattr **rta;
798 struct rt6_info *rt = NULL;
799 struct net_device *dev = NULL;
800 struct inet6_dev *idev = NULL;
801 int addr_type;
802
803 rta = (struct rtattr **) _rtattr;
804
805 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
806 return -EINVAL;
807#ifndef CONFIG_IPV6_SUBTREES
808 if (rtmsg->rtmsg_src_len)
809 return -EINVAL;
810#endif
811 if (rtmsg->rtmsg_ifindex) {
812 err = -ENODEV;
813 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
814 if (!dev)
815 goto out;
816 idev = in6_dev_get(dev);
817 if (!idev)
818 goto out;
819 }
820
821 if (rtmsg->rtmsg_metric == 0)
822 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
823
824 rt = ip6_dst_alloc();
825
826 if (rt == NULL) {
827 err = -ENOMEM;
828 goto out;
829 }
830
831 rt->u.dst.obsolete = -1;
832 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
833 if (nlh && (r = NLMSG_DATA(nlh))) {
834 rt->rt6i_protocol = r->rtm_protocol;
835 } else {
836 rt->rt6i_protocol = RTPROT_BOOT;
837 }
838
839 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
840
841 if (addr_type & IPV6_ADDR_MULTICAST)
842 rt->u.dst.input = ip6_mc_input;
843 else
844 rt->u.dst.input = ip6_forward;
845
846 rt->u.dst.output = ip6_output;
847
848 ipv6_addr_prefix(&rt->rt6i_dst.addr,
849 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
850 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
851 if (rt->rt6i_dst.plen == 128)
852 rt->u.dst.flags = DST_HOST;
853
854#ifdef CONFIG_IPV6_SUBTREES
855 ipv6_addr_prefix(&rt->rt6i_src.addr,
856 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
857 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
858#endif
859
860 rt->rt6i_metric = rtmsg->rtmsg_metric;
861
862 /* We cannot add true routes via loopback here,
863 they would result in kernel looping; promote them to reject routes
864 */
865 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
866 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
867 /* hold loopback dev/idev if we haven't done so. */
868 if (dev != &loopback_dev) {
869 if (dev) {
870 dev_put(dev);
871 in6_dev_put(idev);
872 }
873 dev = &loopback_dev;
874 dev_hold(dev);
875 idev = in6_dev_get(dev);
876 if (!idev) {
877 err = -ENODEV;
878 goto out;
879 }
880 }
881 rt->u.dst.output = ip6_pkt_discard_out;
882 rt->u.dst.input = ip6_pkt_discard;
883 rt->u.dst.error = -ENETUNREACH;
884 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
885 goto install_route;
886 }
887
888 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
889 struct in6_addr *gw_addr;
890 int gwa_type;
891
892 gw_addr = &rtmsg->rtmsg_gateway;
893 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
894 gwa_type = ipv6_addr_type(gw_addr);
895
896 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
897 struct rt6_info *grt;
898
899 /* IPv6 strictly inhibits using not link-local
900 addresses as nexthop address.
901 Otherwise, router will not able to send redirects.
902 It is very good, but in some (rare!) circumstances
903 (SIT, PtP, NBMA NOARP links) it is handy to allow
904 some exceptions. --ANK
905 */
906 err = -EINVAL;
907 if (!(gwa_type&IPV6_ADDR_UNICAST))
908 goto out;
909
910 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
911
912 err = -EHOSTUNREACH;
913 if (grt == NULL)
914 goto out;
915 if (dev) {
916 if (dev != grt->rt6i_dev) {
917 dst_release(&grt->u.dst);
918 goto out;
919 }
920 } else {
921 dev = grt->rt6i_dev;
922 idev = grt->rt6i_idev;
923 dev_hold(dev);
924 in6_dev_hold(grt->rt6i_idev);
925 }
926 if (!(grt->rt6i_flags&RTF_GATEWAY))
927 err = 0;
928 dst_release(&grt->u.dst);
929
930 if (err)
931 goto out;
932 }
933 err = -EINVAL;
934 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
935 goto out;
936 }
937
938 err = -ENODEV;
939 if (dev == NULL)
940 goto out;
941
942 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
943 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
944 if (IS_ERR(rt->rt6i_nexthop)) {
945 err = PTR_ERR(rt->rt6i_nexthop);
946 rt->rt6i_nexthop = NULL;
947 goto out;
948 }
949 }
950
951 rt->rt6i_flags = rtmsg->rtmsg_flags;
952
953install_route:
954 if (rta && rta[RTA_METRICS-1]) {
955 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
956 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
957
958 while (RTA_OK(attr, attrlen)) {
959 unsigned flavor = attr->rta_type;
960 if (flavor) {
961 if (flavor > RTAX_MAX) {
962 err = -EINVAL;
963 goto out;
964 }
965 rt->u.dst.metrics[flavor-1] =
966 *(u32 *)RTA_DATA(attr);
967 }
968 attr = RTA_NEXT(attr, attrlen);
969 }
970 }
971
972 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
973 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
974 if (!rt->u.dst.metrics[RTAX_MTU-1])
975 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
976 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
977 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
978 rt->u.dst.dev = dev;
979 rt->rt6i_idev = idev;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700980 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981
982out:
983 if (dev)
984 dev_put(dev);
985 if (idev)
986 in6_dev_put(idev);
987 if (rt)
988 dst_free((struct dst_entry *) rt);
989 return err;
990}
991
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700992int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993{
994 int err;
995
996 write_lock_bh(&rt6_lock);
997
998 rt6_reset_dflt_pointer(NULL);
999
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001000 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 dst_release(&rt->u.dst);
1002
1003 write_unlock_bh(&rt6_lock);
1004
1005 return err;
1006}
1007
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001008static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009{
1010 struct fib6_node *fn;
1011 struct rt6_info *rt;
1012 int err = -ESRCH;
1013
1014 read_lock_bh(&rt6_lock);
1015
1016 fn = fib6_locate(&ip6_routing_table,
1017 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1018 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1019
1020 if (fn) {
1021 for (rt = fn->leaf; rt; rt = rt->u.next) {
1022 if (rtmsg->rtmsg_ifindex &&
1023 (rt->rt6i_dev == NULL ||
1024 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1025 continue;
1026 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1027 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1028 continue;
1029 if (rtmsg->rtmsg_metric &&
1030 rtmsg->rtmsg_metric != rt->rt6i_metric)
1031 continue;
1032 dst_hold(&rt->u.dst);
1033 read_unlock_bh(&rt6_lock);
1034
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001035 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 }
1037 }
1038 read_unlock_bh(&rt6_lock);
1039
1040 return err;
1041}
1042
1043/*
1044 * Handle redirects
1045 */
1046void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1047 struct neighbour *neigh, u8 *lladdr, int on_link)
1048{
1049 struct rt6_info *rt, *nrt;
1050
1051 /* Locate old route to this destination. */
1052 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1053
1054 if (rt == NULL)
1055 return;
1056
1057 if (neigh->dev != rt->rt6i_dev)
1058 goto out;
1059
1060 /*
1061 * Current route is on-link; redirect is always invalid.
1062 *
1063 * Seems, previous statement is not true. It could
1064 * be node, which looks for us as on-link (f.e. proxy ndisc)
1065 * But then router serving it might decide, that we should
1066 * know truth 8)8) --ANK (980726).
1067 */
1068 if (!(rt->rt6i_flags&RTF_GATEWAY))
1069 goto out;
1070
1071 /*
1072 * RFC 2461 specifies that redirects should only be
1073 * accepted if they come from the nexthop to the target.
1074 * Due to the way default routers are chosen, this notion
1075 * is a bit fuzzy and one might need to check all default
1076 * routers.
1077 */
1078 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1079 if (rt->rt6i_flags & RTF_DEFAULT) {
1080 struct rt6_info *rt1;
1081
1082 read_lock(&rt6_lock);
1083 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1084 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1085 dst_hold(&rt1->u.dst);
1086 dst_release(&rt->u.dst);
1087 read_unlock(&rt6_lock);
1088 rt = rt1;
1089 goto source_ok;
1090 }
1091 }
1092 read_unlock(&rt6_lock);
1093 }
1094 if (net_ratelimit())
1095 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1096 "for redirect target\n");
1097 goto out;
1098 }
1099
1100source_ok:
1101
1102 /*
1103 * We have finally decided to accept it.
1104 */
1105
1106 neigh_update(neigh, lladdr, NUD_STALE,
1107 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1108 NEIGH_UPDATE_F_OVERRIDE|
1109 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1110 NEIGH_UPDATE_F_ISROUTER))
1111 );
1112
1113 /*
1114 * Redirect received -> path was valid.
1115 * Look, redirects are sent only in response to data packets,
1116 * so that this nexthop apparently is reachable. --ANK
1117 */
1118 dst_confirm(&rt->u.dst);
1119
1120 /* Duplicate redirect: silently ignore. */
1121 if (neigh == rt->u.dst.neighbour)
1122 goto out;
1123
1124 nrt = ip6_rt_copy(rt);
1125 if (nrt == NULL)
1126 goto out;
1127
1128 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1129 if (on_link)
1130 nrt->rt6i_flags &= ~RTF_GATEWAY;
1131
1132 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1133 nrt->rt6i_dst.plen = 128;
1134 nrt->u.dst.flags |= DST_HOST;
1135
1136 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1137 nrt->rt6i_nexthop = neigh_clone(neigh);
1138 /* Reset pmtu, it may be better */
1139 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1140 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1141
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001142 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 goto out;
1144
1145 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001146 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 return;
1148 }
1149
1150out:
1151 dst_release(&rt->u.dst);
1152 return;
1153}
1154
1155/*
1156 * Handle ICMP "packet too big" messages
1157 * i.e. Path MTU discovery
1158 */
1159
1160void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1161 struct net_device *dev, u32 pmtu)
1162{
1163 struct rt6_info *rt, *nrt;
1164 int allfrag = 0;
1165
1166 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1167 if (rt == NULL)
1168 return;
1169
1170 if (pmtu >= dst_mtu(&rt->u.dst))
1171 goto out;
1172
1173 if (pmtu < IPV6_MIN_MTU) {
1174 /*
1175 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1176 * MTU (1280) and a fragment header should always be included
1177 * after a node receiving Too Big message reporting PMTU is
1178 * less than the IPv6 Minimum Link MTU.
1179 */
1180 pmtu = IPV6_MIN_MTU;
1181 allfrag = 1;
1182 }
1183
1184 /* New mtu received -> path was valid.
1185 They are sent only in response to data packets,
1186 so that this nexthop apparently is reachable. --ANK
1187 */
1188 dst_confirm(&rt->u.dst);
1189
1190 /* Host route. If it is static, it would be better
1191 not to override it, but add new one, so that
1192 when cache entry will expire old pmtu
1193 would return automatically.
1194 */
1195 if (rt->rt6i_flags & RTF_CACHE) {
1196 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1197 if (allfrag)
1198 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1199 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1200 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1201 goto out;
1202 }
1203
1204 /* Network route.
1205 Two cases are possible:
1206 1. It is connected route. Action: COW
1207 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1208 */
1209 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001210 nrt = rt6_cow(rt, daddr, saddr, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 if (!nrt->u.dst.error) {
1212 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1213 if (allfrag)
1214 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1215 /* According to RFC 1981, detecting PMTU increase shouldn't be
1216 happened within 5 mins, the recommended timer is 10 mins.
1217 Here this route expiration time is set to ip6_rt_mtu_expires
1218 which is 10 mins. After 10 mins the decreased pmtu is expired
1219 and detecting PMTU increase will be automatically happened.
1220 */
1221 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1222 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1223 }
1224 dst_release(&nrt->u.dst);
1225 } else {
1226 nrt = ip6_rt_copy(rt);
1227 if (nrt == NULL)
1228 goto out;
1229 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1230 nrt->rt6i_dst.plen = 128;
1231 nrt->u.dst.flags |= DST_HOST;
1232 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1233 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1234 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1235 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1236 if (allfrag)
1237 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001238 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 }
1240
1241out:
1242 dst_release(&rt->u.dst);
1243}
1244
1245/*
1246 * Misc support functions
1247 */
1248
1249static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1250{
1251 struct rt6_info *rt = ip6_dst_alloc();
1252
1253 if (rt) {
1254 rt->u.dst.input = ort->u.dst.input;
1255 rt->u.dst.output = ort->u.dst.output;
1256
1257 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1258 rt->u.dst.dev = ort->u.dst.dev;
1259 if (rt->u.dst.dev)
1260 dev_hold(rt->u.dst.dev);
1261 rt->rt6i_idev = ort->rt6i_idev;
1262 if (rt->rt6i_idev)
1263 in6_dev_hold(rt->rt6i_idev);
1264 rt->u.dst.lastuse = jiffies;
1265 rt->rt6i_expires = 0;
1266
1267 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1268 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1269 rt->rt6i_metric = 0;
1270
1271 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1272#ifdef CONFIG_IPV6_SUBTREES
1273 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1274#endif
1275 }
1276 return rt;
1277}
1278
1279struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1280{
1281 struct rt6_info *rt;
1282 struct fib6_node *fn;
1283
1284 fn = &ip6_routing_table;
1285
1286 write_lock_bh(&rt6_lock);
1287 for (rt = fn->leaf; rt; rt=rt->u.next) {
1288 if (dev == rt->rt6i_dev &&
1289 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1290 break;
1291 }
1292 if (rt)
1293 dst_hold(&rt->u.dst);
1294 write_unlock_bh(&rt6_lock);
1295 return rt;
1296}
1297
1298struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1299 struct net_device *dev)
1300{
1301 struct in6_rtmsg rtmsg;
1302
1303 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1304 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1305 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1306 rtmsg.rtmsg_metric = 1024;
1307 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1308
1309 rtmsg.rtmsg_ifindex = dev->ifindex;
1310
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001311 ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 return rt6_get_dflt_router(gwaddr, dev);
1313}
1314
1315void rt6_purge_dflt_routers(void)
1316{
1317 struct rt6_info *rt;
1318
1319restart:
1320 read_lock_bh(&rt6_lock);
1321 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1322 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1323 dst_hold(&rt->u.dst);
1324
1325 rt6_reset_dflt_pointer(NULL);
1326
1327 read_unlock_bh(&rt6_lock);
1328
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001329 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
1331 goto restart;
1332 }
1333 }
1334 read_unlock_bh(&rt6_lock);
1335}
1336
1337int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1338{
1339 struct in6_rtmsg rtmsg;
1340 int err;
1341
1342 switch(cmd) {
1343 case SIOCADDRT: /* Add a route */
1344 case SIOCDELRT: /* Delete a route */
1345 if (!capable(CAP_NET_ADMIN))
1346 return -EPERM;
1347 err = copy_from_user(&rtmsg, arg,
1348 sizeof(struct in6_rtmsg));
1349 if (err)
1350 return -EFAULT;
1351
1352 rtnl_lock();
1353 switch (cmd) {
1354 case SIOCADDRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001355 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 break;
1357 case SIOCDELRT:
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001358 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 break;
1360 default:
1361 err = -EINVAL;
1362 }
1363 rtnl_unlock();
1364
1365 return err;
1366 };
1367
1368 return -EINVAL;
1369}
1370
1371/*
1372 * Drop the packet on the floor
1373 */
1374
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001375static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376{
1377 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1378 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1379 kfree_skb(skb);
1380 return 0;
1381}
1382
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001383static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384{
1385 skb->dev = skb->dst->dev;
1386 return ip6_pkt_discard(skb);
1387}
1388
1389/*
1390 * Allocate a dst for local (unicast / anycast) address.
1391 */
1392
1393struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1394 const struct in6_addr *addr,
1395 int anycast)
1396{
1397 struct rt6_info *rt = ip6_dst_alloc();
1398
1399 if (rt == NULL)
1400 return ERR_PTR(-ENOMEM);
1401
1402 dev_hold(&loopback_dev);
1403 in6_dev_hold(idev);
1404
1405 rt->u.dst.flags = DST_HOST;
1406 rt->u.dst.input = ip6_input;
1407 rt->u.dst.output = ip6_output;
1408 rt->rt6i_dev = &loopback_dev;
1409 rt->rt6i_idev = idev;
1410 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1411 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1412 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1413 rt->u.dst.obsolete = -1;
1414
1415 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1416 if (!anycast)
1417 rt->rt6i_flags |= RTF_LOCAL;
1418 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1419 if (rt->rt6i_nexthop == NULL) {
1420 dst_free((struct dst_entry *) rt);
1421 return ERR_PTR(-ENOMEM);
1422 }
1423
1424 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1425 rt->rt6i_dst.plen = 128;
1426
1427 atomic_set(&rt->u.dst.__refcnt, 1);
1428
1429 return rt;
1430}
1431
1432static int fib6_ifdown(struct rt6_info *rt, void *arg)
1433{
1434 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1435 rt != &ip6_null_entry) {
1436 RT6_TRACE("deleted by ifdown %p\n", rt);
1437 return -1;
1438 }
1439 return 0;
1440}
1441
1442void rt6_ifdown(struct net_device *dev)
1443{
1444 write_lock_bh(&rt6_lock);
1445 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1446 write_unlock_bh(&rt6_lock);
1447}
1448
1449struct rt6_mtu_change_arg
1450{
1451 struct net_device *dev;
1452 unsigned mtu;
1453};
1454
1455static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1456{
1457 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1458 struct inet6_dev *idev;
1459
1460 /* In IPv6 pmtu discovery is not optional,
1461 so that RTAX_MTU lock cannot disable it.
1462 We still use this lock to block changes
1463 caused by addrconf/ndisc.
1464 */
1465
1466 idev = __in6_dev_get(arg->dev);
1467 if (idev == NULL)
1468 return 0;
1469
1470 /* For administrative MTU increase, there is no way to discover
1471 IPv6 PMTU increase, so PMTU increase should be updated here.
1472 Since RFC 1981 doesn't include administrative MTU increase
1473 update PMTU increase is a MUST. (i.e. jumbo frame)
1474 */
1475 /*
1476 If new MTU is less than route PMTU, this new MTU will be the
1477 lowest MTU in the path, update the route PMTU to reflect PMTU
1478 decreases; if new MTU is greater than route PMTU, and the
1479 old MTU is the lowest MTU in the path, update the route PMTU
1480 to reflect the increase. In this case if the other nodes' MTU
1481 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1482 PMTU discouvery.
1483 */
1484 if (rt->rt6i_dev == arg->dev &&
1485 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1486 (dst_mtu(&rt->u.dst) > arg->mtu ||
1487 (dst_mtu(&rt->u.dst) < arg->mtu &&
1488 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1489 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1490 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1491 return 0;
1492}
1493
1494void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1495{
1496 struct rt6_mtu_change_arg arg;
1497
1498 arg.dev = dev;
1499 arg.mtu = mtu;
1500 read_lock_bh(&rt6_lock);
1501 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1502 read_unlock_bh(&rt6_lock);
1503}
1504
1505static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1506 struct in6_rtmsg *rtmsg)
1507{
1508 memset(rtmsg, 0, sizeof(*rtmsg));
1509
1510 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1511 rtmsg->rtmsg_src_len = r->rtm_src_len;
1512 rtmsg->rtmsg_flags = RTF_UP;
1513 if (r->rtm_type == RTN_UNREACHABLE)
1514 rtmsg->rtmsg_flags |= RTF_REJECT;
1515
1516 if (rta[RTA_GATEWAY-1]) {
1517 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1518 return -EINVAL;
1519 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1520 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1521 }
1522 if (rta[RTA_DST-1]) {
1523 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1524 return -EINVAL;
1525 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1526 }
1527 if (rta[RTA_SRC-1]) {
1528 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1529 return -EINVAL;
1530 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1531 }
1532 if (rta[RTA_OIF-1]) {
1533 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1534 return -EINVAL;
1535 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1536 }
1537 if (rta[RTA_PRIORITY-1]) {
1538 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1539 return -EINVAL;
1540 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1541 }
1542 return 0;
1543}
1544
1545int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1546{
1547 struct rtmsg *r = NLMSG_DATA(nlh);
1548 struct in6_rtmsg rtmsg;
1549
1550 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1551 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001552 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553}
1554
1555int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1556{
1557 struct rtmsg *r = NLMSG_DATA(nlh);
1558 struct in6_rtmsg rtmsg;
1559
1560 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1561 return -EINVAL;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001562 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563}
1564
1565struct rt6_rtnl_dump_arg
1566{
1567 struct sk_buff *skb;
1568 struct netlink_callback *cb;
1569};
1570
1571static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001572 struct in6_addr *dst, struct in6_addr *src,
1573 int iif, int type, u32 pid, u32 seq,
1574 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575{
1576 struct rtmsg *rtm;
1577 struct nlmsghdr *nlh;
1578 unsigned char *b = skb->tail;
1579 struct rta_cacheinfo ci;
1580
1581 if (prefix) { /* user wants prefix routes only */
1582 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1583 /* success since this is not a prefix route */
1584 return 1;
1585 }
1586 }
1587
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001588 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 rtm = NLMSG_DATA(nlh);
1590 rtm->rtm_family = AF_INET6;
1591 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1592 rtm->rtm_src_len = rt->rt6i_src.plen;
1593 rtm->rtm_tos = 0;
1594 rtm->rtm_table = RT_TABLE_MAIN;
1595 if (rt->rt6i_flags&RTF_REJECT)
1596 rtm->rtm_type = RTN_UNREACHABLE;
1597 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1598 rtm->rtm_type = RTN_LOCAL;
1599 else
1600 rtm->rtm_type = RTN_UNICAST;
1601 rtm->rtm_flags = 0;
1602 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1603 rtm->rtm_protocol = rt->rt6i_protocol;
1604 if (rt->rt6i_flags&RTF_DYNAMIC)
1605 rtm->rtm_protocol = RTPROT_REDIRECT;
1606 else if (rt->rt6i_flags & RTF_ADDRCONF)
1607 rtm->rtm_protocol = RTPROT_KERNEL;
1608 else if (rt->rt6i_flags&RTF_DEFAULT)
1609 rtm->rtm_protocol = RTPROT_RA;
1610
1611 if (rt->rt6i_flags&RTF_CACHE)
1612 rtm->rtm_flags |= RTM_F_CLONED;
1613
1614 if (dst) {
1615 RTA_PUT(skb, RTA_DST, 16, dst);
1616 rtm->rtm_dst_len = 128;
1617 } else if (rtm->rtm_dst_len)
1618 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1619#ifdef CONFIG_IPV6_SUBTREES
1620 if (src) {
1621 RTA_PUT(skb, RTA_SRC, 16, src);
1622 rtm->rtm_src_len = 128;
1623 } else if (rtm->rtm_src_len)
1624 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1625#endif
1626 if (iif)
1627 RTA_PUT(skb, RTA_IIF, 4, &iif);
1628 else if (dst) {
1629 struct in6_addr saddr_buf;
1630 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1631 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1632 }
1633 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1634 goto rtattr_failure;
1635 if (rt->u.dst.neighbour)
1636 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1637 if (rt->u.dst.dev)
1638 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1639 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1640 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1641 if (rt->rt6i_expires)
1642 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1643 else
1644 ci.rta_expires = 0;
1645 ci.rta_used = rt->u.dst.__use;
1646 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1647 ci.rta_error = rt->u.dst.error;
1648 ci.rta_id = 0;
1649 ci.rta_ts = 0;
1650 ci.rta_tsage = 0;
1651 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1652 nlh->nlmsg_len = skb->tail - b;
1653 return skb->len;
1654
1655nlmsg_failure:
1656rtattr_failure:
1657 skb_trim(skb, b - skb->data);
1658 return -1;
1659}
1660
1661static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1662{
1663 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1664 int prefix;
1665
1666 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1667 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1668 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1669 } else
1670 prefix = 0;
1671
1672 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1673 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001674 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675}
1676
1677static int fib6_dump_node(struct fib6_walker_t *w)
1678{
1679 int res;
1680 struct rt6_info *rt;
1681
1682 for (rt = w->leaf; rt; rt = rt->u.next) {
1683 res = rt6_dump_route(rt, w->args);
1684 if (res < 0) {
1685 /* Frame is full, suspend walking */
1686 w->leaf = rt;
1687 return 1;
1688 }
1689 BUG_TRAP(res!=0);
1690 }
1691 w->leaf = NULL;
1692 return 0;
1693}
1694
1695static void fib6_dump_end(struct netlink_callback *cb)
1696{
1697 struct fib6_walker_t *w = (void*)cb->args[0];
1698
1699 if (w) {
1700 cb->args[0] = 0;
1701 fib6_walker_unlink(w);
1702 kfree(w);
1703 }
1704 if (cb->args[1]) {
1705 cb->done = (void*)cb->args[1];
1706 cb->args[1] = 0;
1707 }
1708}
1709
1710static int fib6_dump_done(struct netlink_callback *cb)
1711{
1712 fib6_dump_end(cb);
1713 return cb->done(cb);
1714}
1715
1716int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1717{
1718 struct rt6_rtnl_dump_arg arg;
1719 struct fib6_walker_t *w;
1720 int res;
1721
1722 arg.skb = skb;
1723 arg.cb = cb;
1724
1725 w = (void*)cb->args[0];
1726 if (w == NULL) {
1727 /* New dump:
1728 *
1729 * 1. hook callback destructor.
1730 */
1731 cb->args[1] = (long)cb->done;
1732 cb->done = fib6_dump_done;
1733
1734 /*
1735 * 2. allocate and initialize walker.
1736 */
1737 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1738 if (w == NULL)
1739 return -ENOMEM;
1740 RT6_TRACE("dump<%p", w);
1741 memset(w, 0, sizeof(*w));
1742 w->root = &ip6_routing_table;
1743 w->func = fib6_dump_node;
1744 w->args = &arg;
1745 cb->args[0] = (long)w;
1746 read_lock_bh(&rt6_lock);
1747 res = fib6_walk(w);
1748 read_unlock_bh(&rt6_lock);
1749 } else {
1750 w->args = &arg;
1751 read_lock_bh(&rt6_lock);
1752 res = fib6_walk_continue(w);
1753 read_unlock_bh(&rt6_lock);
1754 }
1755#if RT6_DEBUG >= 3
1756 if (res <= 0 && skb->len == 0)
1757 RT6_TRACE("%p>dump end\n", w);
1758#endif
1759 res = res < 0 ? res : skb->len;
1760 /* res < 0 is an error. (really, impossible)
1761 res == 0 means that dump is complete, but skb still can contain data.
1762 res > 0 dump is not complete, but frame is full.
1763 */
1764 /* Destroy walker, if dump of this table is complete. */
1765 if (res <= 0)
1766 fib6_dump_end(cb);
1767 return res;
1768}
1769
1770int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1771{
1772 struct rtattr **rta = arg;
1773 int iif = 0;
1774 int err = -ENOBUFS;
1775 struct sk_buff *skb;
1776 struct flowi fl;
1777 struct rt6_info *rt;
1778
1779 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1780 if (skb == NULL)
1781 goto out;
1782
1783 /* Reserve room for dummy headers, this skb can pass
1784 through good chunk of routing engine.
1785 */
1786 skb->mac.raw = skb->data;
1787 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1788
1789 memset(&fl, 0, sizeof(fl));
1790 if (rta[RTA_SRC-1])
1791 ipv6_addr_copy(&fl.fl6_src,
1792 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1793 if (rta[RTA_DST-1])
1794 ipv6_addr_copy(&fl.fl6_dst,
1795 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1796
1797 if (rta[RTA_IIF-1])
1798 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1799
1800 if (iif) {
1801 struct net_device *dev;
1802 dev = __dev_get_by_index(iif);
1803 if (!dev) {
1804 err = -ENODEV;
1805 goto out_free;
1806 }
1807 }
1808
1809 fl.oif = 0;
1810 if (rta[RTA_OIF-1])
1811 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1812
1813 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1814
1815 skb->dst = &rt->u.dst;
1816
1817 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1818 err = rt6_fill_node(skb, rt,
1819 &fl.fl6_dst, &fl.fl6_src,
1820 iif,
1821 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001822 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 if (err < 0) {
1824 err = -EMSGSIZE;
1825 goto out_free;
1826 }
1827
1828 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1829 if (err > 0)
1830 err = 0;
1831out:
1832 return err;
1833out_free:
1834 kfree_skb(skb);
1835 goto out;
1836}
1837
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001838void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1839 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840{
1841 struct sk_buff *skb;
1842 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001843 u32 pid = current->pid;
1844 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001846 if (req)
1847 pid = req->pid;
1848 if (nlh)
1849 seq = nlh->nlmsg_seq;
1850
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 skb = alloc_skb(size, gfp_any());
1852 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07001853 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 return;
1855 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001856 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07001858 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 return;
1860 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07001861 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1862 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863}
1864
1865/*
1866 * /proc
1867 */
1868
1869#ifdef CONFIG_PROC_FS
1870
1871#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1872
1873struct rt6_proc_arg
1874{
1875 char *buffer;
1876 int offset;
1877 int length;
1878 int skip;
1879 int len;
1880};
1881
1882static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1883{
1884 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1885 int i;
1886
1887 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1888 arg->skip++;
1889 return 0;
1890 }
1891
1892 if (arg->len >= arg->length)
1893 return 0;
1894
1895 for (i=0; i<16; i++) {
1896 sprintf(arg->buffer + arg->len, "%02x",
1897 rt->rt6i_dst.addr.s6_addr[i]);
1898 arg->len += 2;
1899 }
1900 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1901 rt->rt6i_dst.plen);
1902
1903#ifdef CONFIG_IPV6_SUBTREES
1904 for (i=0; i<16; i++) {
1905 sprintf(arg->buffer + arg->len, "%02x",
1906 rt->rt6i_src.addr.s6_addr[i]);
1907 arg->len += 2;
1908 }
1909 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1910 rt->rt6i_src.plen);
1911#else
1912 sprintf(arg->buffer + arg->len,
1913 "00000000000000000000000000000000 00 ");
1914 arg->len += 36;
1915#endif
1916
1917 if (rt->rt6i_nexthop) {
1918 for (i=0; i<16; i++) {
1919 sprintf(arg->buffer + arg->len, "%02x",
1920 rt->rt6i_nexthop->primary_key[i]);
1921 arg->len += 2;
1922 }
1923 } else {
1924 sprintf(arg->buffer + arg->len,
1925 "00000000000000000000000000000000");
1926 arg->len += 32;
1927 }
1928 arg->len += sprintf(arg->buffer + arg->len,
1929 " %08x %08x %08x %08x %8s\n",
1930 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1931 rt->u.dst.__use, rt->rt6i_flags,
1932 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1933 return 0;
1934}
1935
1936static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1937{
1938 struct rt6_proc_arg arg;
1939 arg.buffer = buffer;
1940 arg.offset = offset;
1941 arg.length = length;
1942 arg.skip = 0;
1943 arg.len = 0;
1944
1945 read_lock_bh(&rt6_lock);
1946 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1947 read_unlock_bh(&rt6_lock);
1948
1949 *start = buffer;
1950 if (offset)
1951 *start += offset % RT6_INFO_LEN;
1952
1953 arg.len -= offset % RT6_INFO_LEN;
1954
1955 if (arg.len > length)
1956 arg.len = length;
1957 if (arg.len < 0)
1958 arg.len = 0;
1959
1960 return arg.len;
1961}
1962
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1964{
1965 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1966 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1967 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1968 rt6_stats.fib_rt_cache,
1969 atomic_read(&ip6_dst_ops.entries),
1970 rt6_stats.fib_discarded_routes);
1971
1972 return 0;
1973}
1974
1975static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1976{
1977 return single_open(file, rt6_stats_seq_show, NULL);
1978}
1979
1980static struct file_operations rt6_stats_seq_fops = {
1981 .owner = THIS_MODULE,
1982 .open = rt6_stats_seq_open,
1983 .read = seq_read,
1984 .llseek = seq_lseek,
1985 .release = single_release,
1986};
1987#endif /* CONFIG_PROC_FS */
1988
1989#ifdef CONFIG_SYSCTL
1990
1991static int flush_delay;
1992
1993static
1994int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1995 void __user *buffer, size_t *lenp, loff_t *ppos)
1996{
1997 if (write) {
1998 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1999 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2000 return 0;
2001 } else
2002 return -EINVAL;
2003}
2004
2005ctl_table ipv6_route_table[] = {
2006 {
2007 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2008 .procname = "flush",
2009 .data = &flush_delay,
2010 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002011 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012 .proc_handler = &ipv6_sysctl_rtcache_flush
2013 },
2014 {
2015 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2016 .procname = "gc_thresh",
2017 .data = &ip6_dst_ops.gc_thresh,
2018 .maxlen = sizeof(int),
2019 .mode = 0644,
2020 .proc_handler = &proc_dointvec,
2021 },
2022 {
2023 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2024 .procname = "max_size",
2025 .data = &ip6_rt_max_size,
2026 .maxlen = sizeof(int),
2027 .mode = 0644,
2028 .proc_handler = &proc_dointvec,
2029 },
2030 {
2031 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2032 .procname = "gc_min_interval",
2033 .data = &ip6_rt_gc_min_interval,
2034 .maxlen = sizeof(int),
2035 .mode = 0644,
2036 .proc_handler = &proc_dointvec_jiffies,
2037 .strategy = &sysctl_jiffies,
2038 },
2039 {
2040 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2041 .procname = "gc_timeout",
2042 .data = &ip6_rt_gc_timeout,
2043 .maxlen = sizeof(int),
2044 .mode = 0644,
2045 .proc_handler = &proc_dointvec_jiffies,
2046 .strategy = &sysctl_jiffies,
2047 },
2048 {
2049 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2050 .procname = "gc_interval",
2051 .data = &ip6_rt_gc_interval,
2052 .maxlen = sizeof(int),
2053 .mode = 0644,
2054 .proc_handler = &proc_dointvec_jiffies,
2055 .strategy = &sysctl_jiffies,
2056 },
2057 {
2058 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2059 .procname = "gc_elasticity",
2060 .data = &ip6_rt_gc_elasticity,
2061 .maxlen = sizeof(int),
2062 .mode = 0644,
2063 .proc_handler = &proc_dointvec_jiffies,
2064 .strategy = &sysctl_jiffies,
2065 },
2066 {
2067 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2068 .procname = "mtu_expires",
2069 .data = &ip6_rt_mtu_expires,
2070 .maxlen = sizeof(int),
2071 .mode = 0644,
2072 .proc_handler = &proc_dointvec_jiffies,
2073 .strategy = &sysctl_jiffies,
2074 },
2075 {
2076 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2077 .procname = "min_adv_mss",
2078 .data = &ip6_rt_min_advmss,
2079 .maxlen = sizeof(int),
2080 .mode = 0644,
2081 .proc_handler = &proc_dointvec_jiffies,
2082 .strategy = &sysctl_jiffies,
2083 },
2084 {
2085 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2086 .procname = "gc_min_interval_ms",
2087 .data = &ip6_rt_gc_min_interval,
2088 .maxlen = sizeof(int),
2089 .mode = 0644,
2090 .proc_handler = &proc_dointvec_ms_jiffies,
2091 .strategy = &sysctl_ms_jiffies,
2092 },
2093 { .ctl_name = 0 }
2094};
2095
2096#endif
2097
2098void __init ip6_route_init(void)
2099{
2100 struct proc_dir_entry *p;
2101
2102 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2103 sizeof(struct rt6_info),
2104 0, SLAB_HWCACHE_ALIGN,
2105 NULL, NULL);
2106 if (!ip6_dst_ops.kmem_cachep)
2107 panic("cannot create ip6_dst_cache");
2108
2109 fib6_init();
2110#ifdef CONFIG_PROC_FS
2111 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2112 if (p)
2113 p->owner = THIS_MODULE;
2114
2115 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2116#endif
2117#ifdef CONFIG_XFRM
2118 xfrm6_init();
2119#endif
2120}
2121
2122void ip6_route_cleanup(void)
2123{
2124#ifdef CONFIG_PROC_FS
2125 proc_net_remove("ipv6_route");
2126 proc_net_remove("rt6_stats");
2127#endif
2128#ifdef CONFIG_XFRM
2129 xfrm6_fini();
2130#endif
2131 rt6_ifdown(NULL);
2132 fib6_gc_cleanup();
2133 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2134}