blob: a1b0f075462e0e64fd76a91cd6696765e0f8b6e4 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070057#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070058#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080077#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
122struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
Thomas Graf101367c2006-08-04 03:39:02 -0700142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
144struct rt6_info ip6_prohibit_entry = {
145 .u = {
146 .dst = {
147 .__refcnt = ATOMIC_INIT(1),
148 .__use = 1,
149 .dev = &loopback_dev,
150 .obsolete = -1,
151 .error = -EACCES,
152 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
153 .input = ip6_pkt_discard,
154 .output = ip6_pkt_discard_out,
155 .ops = &ip6_dst_ops,
156 .path = (struct dst_entry*)&ip6_prohibit_entry,
157 }
158 },
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
160 .rt6i_metric = ~(u32) 0,
161 .rt6i_ref = ATOMIC_INIT(1),
162};
163
164struct rt6_info ip6_blk_hole_entry = {
165 .u = {
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .dev = &loopback_dev,
170 .obsolete = -1,
171 .error = -EINVAL,
172 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
173 .input = ip6_pkt_discard,
174 .output = ip6_pkt_discard_out,
175 .ops = &ip6_dst_ops,
176 .path = (struct dst_entry*)&ip6_blk_hole_entry,
177 }
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
182};
183
184#endif
185
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186/* allocate dst with ip6_dst_ops */
187static __inline__ struct rt6_info *ip6_dst_alloc(void)
188{
189 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
190}
191
192static void ip6_dst_destroy(struct dst_entry *dst)
193{
194 struct rt6_info *rt = (struct rt6_info *)dst;
195 struct inet6_dev *idev = rt->rt6i_idev;
196
197 if (idev != NULL) {
198 rt->rt6i_idev = NULL;
199 in6_dev_put(idev);
200 }
201}
202
203static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
204 int how)
205{
206 struct rt6_info *rt = (struct rt6_info *)dst;
207 struct inet6_dev *idev = rt->rt6i_idev;
208
209 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
210 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
211 if (loopback_idev != NULL) {
212 rt->rt6i_idev = loopback_idev;
213 in6_dev_put(idev);
214 }
215 }
216}
217
218static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{
220 return (rt->rt6i_flags & RTF_EXPIRES &&
221 time_after(jiffies, rt->rt6i_expires));
222}
223
Thomas Grafc71099a2006-08-04 23:20:06 -0700224static inline int rt6_need_strict(struct in6_addr *daddr)
225{
226 return (ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
228}
229
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700231 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 */
233
234static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
235 int oif,
236 int strict)
237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
241 if (oif) {
242 for (sprt = rt; sprt; sprt = sprt->u.next) {
243 struct net_device *dev = sprt->rt6i_dev;
244 if (dev->ifindex == oif)
245 return sprt;
246 if (dev->flags & IFF_LOOPBACK) {
247 if (sprt->rt6i_idev == NULL ||
248 sprt->rt6i_idev->dev->ifindex != oif) {
249 if (strict && oif)
250 continue;
251 if (local && (!oif ||
252 local->rt6i_idev->dev->ifindex == oif))
253 continue;
254 }
255 local = sprt;
256 }
257 }
258
259 if (local)
260 return local;
261
262 if (strict)
263 return &ip6_null_entry;
264 }
265 return rt;
266}
267
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800268#ifdef CONFIG_IPV6_ROUTER_PREF
269static void rt6_probe(struct rt6_info *rt)
270{
271 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
272 /*
273 * Okay, this does not seem to be appropriate
274 * for now, however, we need to check if it
275 * is really so; aka Router Reachability Probing.
276 *
277 * Router Reachability Probe MUST be rate-limited
278 * to no more than one per minute.
279 */
280 if (!neigh || (neigh->nud_state & NUD_VALID))
281 return;
282 read_lock_bh(&neigh->lock);
283 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800284 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800285 struct in6_addr mcaddr;
286 struct in6_addr *target;
287
288 neigh->updated = jiffies;
289 read_unlock_bh(&neigh->lock);
290
291 target = (struct in6_addr *)&neigh->primary_key;
292 addrconf_addr_solict_mult(target, &mcaddr);
293 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
294 } else
295 read_unlock_bh(&neigh->lock);
296}
297#else
298static inline void rt6_probe(struct rt6_info *rt)
299{
300 return;
301}
302#endif
303
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800305 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800307static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800309 struct net_device *dev = rt->rt6i_dev;
310 if (!oif || dev->ifindex == oif)
311 return 2;
312 if ((dev->flags & IFF_LOOPBACK) &&
313 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
314 return 1;
315 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316}
317
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800318static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800320 struct neighbour *neigh = rt->rt6i_nexthop;
321 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700322 if (rt->rt6i_flags & RTF_NONEXTHOP ||
323 !(rt->rt6i_flags & RTF_GATEWAY))
324 m = 1;
325 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800326 read_lock_bh(&neigh->lock);
327 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700328 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 read_unlock_bh(&neigh->lock);
330 }
331 return m;
332}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800334static int rt6_score_route(struct rt6_info *rt, int oif,
335 int strict)
336{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700337 int m, n;
338
339 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700340 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800342#ifdef CONFIG_IPV6_ROUTER_PREF
343 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
344#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700345 n = rt6_check_neigh(rt);
346 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800347 m |= 16;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700348 else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800349 return -1;
350 return m;
351}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
354 int strict)
355{
356 struct rt6_info *match = NULL, *last = NULL;
357 struct rt6_info *rt, *rt0 = *head;
358 u32 metric;
359 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800361 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
362 __FUNCTION__, head, head ? *head : NULL, oif);
363
364 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700365 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366 rt = rt->u.next) {
367 int m;
368
369 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 continue;
371
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800372 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800374 m = rt6_score_route(rt, oif, strict);
375 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800379 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800380 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800382 } else {
383 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 }
385 }
386
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800387 if (!match &&
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700388 (strict & RT6_LOOKUP_F_REACHABLE) &&
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800389 last && last != rt0) {
390 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700391 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700392 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800393 *head = rt0->u.next;
394 rt0->u.next = last->u.next;
395 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700396 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 }
398
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800399 RT6_TRACE("%s() => %p, score=%d\n",
400 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800402 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403}
404
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800405#ifdef CONFIG_IPV6_ROUTE_INFO
406int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
407 struct in6_addr *gwaddr)
408{
409 struct route_info *rinfo = (struct route_info *) opt;
410 struct in6_addr prefix_buf, *prefix;
411 unsigned int pref;
412 u32 lifetime;
413 struct rt6_info *rt;
414
415 if (len < sizeof(struct route_info)) {
416 return -EINVAL;
417 }
418
419 /* Sanity check for prefix_len and length */
420 if (rinfo->length > 3) {
421 return -EINVAL;
422 } else if (rinfo->prefix_len > 128) {
423 return -EINVAL;
424 } else if (rinfo->prefix_len > 64) {
425 if (rinfo->length < 2) {
426 return -EINVAL;
427 }
428 } else if (rinfo->prefix_len > 0) {
429 if (rinfo->length < 1) {
430 return -EINVAL;
431 }
432 }
433
434 pref = rinfo->route_pref;
435 if (pref == ICMPV6_ROUTER_PREF_INVALID)
436 pref = ICMPV6_ROUTER_PREF_MEDIUM;
437
438 lifetime = htonl(rinfo->lifetime);
439 if (lifetime == 0xffffffff) {
440 /* infinity */
441 } else if (lifetime > 0x7fffffff/HZ) {
442 /* Avoid arithmetic overflow */
443 lifetime = 0x7fffffff/HZ - 1;
444 }
445
446 if (rinfo->length == 3)
447 prefix = (struct in6_addr *)rinfo->prefix;
448 else {
449 /* this function is safe */
450 ipv6_addr_prefix(&prefix_buf,
451 (struct in6_addr *)rinfo->prefix,
452 rinfo->prefix_len);
453 prefix = &prefix_buf;
454 }
455
456 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
457
458 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700459 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800460 rt = NULL;
461 }
462
463 if (!rt && lifetime)
464 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
465 pref);
466 else if (rt)
467 rt->rt6i_flags = RTF_ROUTEINFO |
468 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
469
470 if (rt) {
471 if (lifetime == 0xffffffff) {
472 rt->rt6i_flags &= ~RTF_EXPIRES;
473 } else {
474 rt->rt6i_expires = jiffies + HZ * lifetime;
475 rt->rt6i_flags |= RTF_EXPIRES;
476 }
477 dst_release(&rt->u.dst);
478 }
479 return 0;
480}
481#endif
482
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700483#define BACKTRACK(saddr) \
484do { \
485 if (rt == &ip6_null_entry) { \
486 struct fib6_node *pn; \
487 while (fn) { \
488 if (fn->fn_flags & RTN_TL_ROOT) \
489 goto out; \
490 pn = fn->parent; \
491 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492 fn = fib6_lookup(pn->subtree, NULL, saddr); \
493 else \
494 fn = pn; \
495 if (fn->fn_flags & RTN_RTINFO) \
496 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700497 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700498 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700499} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700500
501static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503{
504 struct fib6_node *fn;
505 struct rt6_info *rt;
506
Thomas Grafc71099a2006-08-04 23:20:06 -0700507 read_lock_bh(&table->tb6_lock);
508 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
509restart:
510 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700511 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700512 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700513out:
YOSHIFUJI Hideaki33cc4892006-08-28 13:19:30 -0700514 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700515 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
517 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700518 rt->u.dst.__use++;
519
520 return rt;
521
522}
523
524struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525 int oif, int strict)
526{
527 struct flowi fl = {
528 .oif = oif,
529 .nl_u = {
530 .ip6_u = {
531 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700532 },
533 },
534 };
535 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700536 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700537
Thomas Grafadaa70b2006-10-13 15:01:03 -0700538 if (saddr) {
539 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
540 flags |= RT6_LOOKUP_F_HAS_SADDR;
541 }
542
Thomas Grafc71099a2006-08-04 23:20:06 -0700543 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
544 if (dst->error == 0)
545 return (struct rt6_info *) dst;
546
547 dst_release(dst);
548
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 return NULL;
550}
551
Thomas Grafc71099a2006-08-04 23:20:06 -0700552/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 It takes new route entry, the addition fails by any reason the
554 route is freed. In any case, if caller does not hold it, it may
555 be destroyed.
556 */
557
Thomas Graf86872cb2006-08-22 00:01:08 -0700558static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559{
560 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700561 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
Thomas Grafc71099a2006-08-04 23:20:06 -0700563 table = rt->rt6i_table;
564 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700565 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700566 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567
568 return err;
569}
570
Thomas Graf40e22e82006-08-22 00:00:45 -0700571int ip6_ins_rt(struct rt6_info *rt)
572{
Thomas Graf86872cb2006-08-22 00:01:08 -0700573 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700574}
575
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800576static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
577 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 struct rt6_info *rt;
580
581 /*
582 * Clone the route.
583 */
584
585 rt = ip6_rt_copy(ort);
586
587 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900588 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
589 if (rt->rt6i_dst.plen != 128 &&
590 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
591 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900593 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900595 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 rt->rt6i_dst.plen = 128;
597 rt->rt6i_flags |= RTF_CACHE;
598 rt->u.dst.flags |= DST_HOST;
599
600#ifdef CONFIG_IPV6_SUBTREES
601 if (rt->rt6i_src.plen && saddr) {
602 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
603 rt->rt6i_src.plen = 128;
604 }
605#endif
606
607 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
608
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800609 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800611 return rt;
612}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800614static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
615{
616 struct rt6_info *rt = ip6_rt_copy(ort);
617 if (rt) {
618 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
619 rt->rt6i_dst.plen = 128;
620 rt->rt6i_flags |= RTF_CACHE;
621 if (rt->rt6i_flags & RTF_REJECT)
622 rt->u.dst.error = ort->u.dst.error;
623 rt->u.dst.flags |= DST_HOST;
624 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
625 }
626 return rt;
627}
628
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700629static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
630 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631{
632 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800633 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700634 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800636 int err;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700637 int reachable = RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700639 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
641relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700642 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800644restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700645 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
647restart:
Thomas Grafc71099a2006-08-04 23:20:06 -0700648 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700649 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800650 if (rt == &ip6_null_entry ||
651 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800652 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800654 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700655 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800656
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800657 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800658 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800659 else {
660#if CLONE_OFFLINK_ROUTE
661 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
662#else
663 goto out2;
664#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800666
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800667 dst_release(&rt->u.dst);
668 rt = nrt ? : &ip6_null_entry;
669
670 dst_hold(&rt->u.dst);
671 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700672 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800673 if (!err)
674 goto out2;
675 }
676
677 if (--attempts <= 0)
678 goto out2;
679
680 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700681 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800682 * released someone could insert this route. Relookup.
683 */
684 dst_release(&rt->u.dst);
685 goto relookup;
686
687out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800688 if (reachable) {
689 reachable = 0;
690 goto restart_2;
691 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800692 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700693 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694out2:
695 rt->u.dst.lastuse = jiffies;
696 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700697
698 return rt;
699}
700
701void ip6_route_input(struct sk_buff *skb)
702{
703 struct ipv6hdr *iph = skb->nh.ipv6h;
Thomas Grafadaa70b2006-10-13 15:01:03 -0700704 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700705 struct flowi fl = {
706 .iif = skb->dev->ifindex,
707 .nl_u = {
708 .ip6_u = {
709 .daddr = iph->daddr,
710 .saddr = iph->saddr,
David S. Miller267935b2006-08-25 16:07:48 -0700711#ifdef CONFIG_IPV6_ROUTE_FWMARK
YOSHIFUJI Hideaki75bff8f2006-08-21 19:22:01 +0900712 .fwmark = skb->nfmark,
David S. Miller267935b2006-08-25 16:07:48 -0700713#endif
Thomas Grafc71099a2006-08-04 23:20:06 -0700714 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
715 },
716 },
717 .proto = iph->nexthdr,
718 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700719
720 if (rt6_need_strict(&iph->daddr))
721 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700722
723 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
724}
725
726static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
727 struct flowi *fl, int flags)
728{
729 struct fib6_node *fn;
730 struct rt6_info *rt, *nrt;
731 int strict = 0;
732 int attempts = 3;
733 int err;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700734 int reachable = RT6_LOOKUP_F_REACHABLE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700735
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700736 strict |= flags & RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700737
738relookup:
739 read_lock_bh(&table->tb6_lock);
740
741restart_2:
742 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
743
744restart:
745 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700746 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700747 if (rt == &ip6_null_entry ||
748 rt->rt6i_flags & RTF_CACHE)
749 goto out;
750
751 dst_hold(&rt->u.dst);
752 read_unlock_bh(&table->tb6_lock);
753
754 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
755 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
756 else {
757#if CLONE_OFFLINK_ROUTE
758 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
759#else
760 goto out2;
761#endif
762 }
763
764 dst_release(&rt->u.dst);
765 rt = nrt ? : &ip6_null_entry;
766
767 dst_hold(&rt->u.dst);
768 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700769 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700770 if (!err)
771 goto out2;
772 }
773
774 if (--attempts <= 0)
775 goto out2;
776
777 /*
778 * Race condition! In the gap, when table->tb6_lock was
779 * released someone could insert this route. Relookup.
780 */
781 dst_release(&rt->u.dst);
782 goto relookup;
783
784out:
785 if (reachable) {
786 reachable = 0;
787 goto restart_2;
788 }
789 dst_hold(&rt->u.dst);
790 read_unlock_bh(&table->tb6_lock);
791out2:
792 rt->u.dst.lastuse = jiffies;
793 rt->u.dst.__use++;
794 return rt;
795}
796
797struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
798{
799 int flags = 0;
800
801 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700802 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700803
Thomas Grafadaa70b2006-10-13 15:01:03 -0700804 if (!ipv6_addr_any(&fl->fl6_src))
805 flags |= RT6_LOOKUP_F_HAS_SADDR;
806
Thomas Grafc71099a2006-08-04 23:20:06 -0700807 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808}
809
810
811/*
812 * Destination cache support functions
813 */
814
815static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
816{
817 struct rt6_info *rt;
818
819 rt = (struct rt6_info *) dst;
820
821 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
822 return dst;
823
824 return NULL;
825}
826
827static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
828{
829 struct rt6_info *rt = (struct rt6_info *) dst;
830
831 if (rt) {
832 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700833 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 else
835 dst_release(dst);
836 }
837 return NULL;
838}
839
840static void ip6_link_failure(struct sk_buff *skb)
841{
842 struct rt6_info *rt;
843
844 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
845
846 rt = (struct rt6_info *) skb->dst;
847 if (rt) {
848 if (rt->rt6i_flags&RTF_CACHE) {
849 dst_set_expires(&rt->u.dst, 0);
850 rt->rt6i_flags |= RTF_EXPIRES;
851 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
852 rt->rt6i_node->fn_sernum = -1;
853 }
854}
855
856static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
857{
858 struct rt6_info *rt6 = (struct rt6_info*)dst;
859
860 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
861 rt6->rt6i_flags |= RTF_MODIFIED;
862 if (mtu < IPV6_MIN_MTU) {
863 mtu = IPV6_MIN_MTU;
864 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
865 }
866 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700867 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 }
869}
870
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871static int ipv6_get_mtu(struct net_device *dev);
872
873static inline unsigned int ipv6_advmss(unsigned int mtu)
874{
875 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
876
877 if (mtu < ip6_rt_min_advmss)
878 mtu = ip6_rt_min_advmss;
879
880 /*
881 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
882 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
883 * IPV6_MAXPLEN is also valid and means: "any MSS,
884 * rely only on pmtu discovery"
885 */
886 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
887 mtu = IPV6_MAXPLEN;
888 return mtu;
889}
890
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700891static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700892static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700893
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
895 struct neighbour *neigh,
896 struct in6_addr *addr,
897 int (*output)(struct sk_buff *))
898{
899 struct rt6_info *rt;
900 struct inet6_dev *idev = in6_dev_get(dev);
901
902 if (unlikely(idev == NULL))
903 return NULL;
904
905 rt = ip6_dst_alloc();
906 if (unlikely(rt == NULL)) {
907 in6_dev_put(idev);
908 goto out;
909 }
910
911 dev_hold(dev);
912 if (neigh)
913 neigh_hold(neigh);
914 else
915 neigh = ndisc_get_neigh(dev, addr);
916
917 rt->rt6i_dev = dev;
918 rt->rt6i_idev = idev;
919 rt->rt6i_nexthop = neigh;
920 atomic_set(&rt->u.dst.__refcnt, 1);
921 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
922 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
923 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
924 rt->u.dst.output = output;
925
926#if 0 /* there's no chance to use these for ndisc */
927 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
928 ? DST_HOST
929 : 0;
930 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
931 rt->rt6i_dst.plen = 128;
932#endif
933
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700934 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 rt->u.dst.next = ndisc_dst_gc_list;
936 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700937 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938
939 fib6_force_start_gc();
940
941out:
942 return (struct dst_entry *)rt;
943}
944
945int ndisc_dst_gc(int *more)
946{
947 struct dst_entry *dst, *next, **pprev;
948 int freed;
949
950 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700951 freed = 0;
952
953 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700955
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 while ((dst = *pprev) != NULL) {
957 if (!atomic_read(&dst->__refcnt)) {
958 *pprev = dst->next;
959 dst_free(dst);
960 freed++;
961 } else {
962 pprev = &dst->next;
963 (*more)++;
964 }
965 }
966
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700967 spin_unlock_bh(&ndisc_lock);
968
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 return freed;
970}
971
972static int ip6_dst_gc(void)
973{
974 static unsigned expire = 30*HZ;
975 static unsigned long last_gc;
976 unsigned long now = jiffies;
977
978 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
979 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
980 goto out;
981
982 expire++;
983 fib6_run_gc(expire);
984 last_gc = now;
985 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
986 expire = ip6_rt_gc_timeout>>1;
987
988out:
989 expire -= expire>>ip6_rt_gc_elasticity;
990 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
991}
992
993/* Clean host part of a prefix. Not necessary in radix tree,
994 but results in cleaner routing tables.
995
996 Remove it only when all the things will work!
997 */
998
999static int ipv6_get_mtu(struct net_device *dev)
1000{
1001 int mtu = IPV6_MIN_MTU;
1002 struct inet6_dev *idev;
1003
1004 idev = in6_dev_get(dev);
1005 if (idev) {
1006 mtu = idev->cnf.mtu6;
1007 in6_dev_put(idev);
1008 }
1009 return mtu;
1010}
1011
1012int ipv6_get_hoplimit(struct net_device *dev)
1013{
1014 int hoplimit = ipv6_devconf.hop_limit;
1015 struct inet6_dev *idev;
1016
1017 idev = in6_dev_get(dev);
1018 if (idev) {
1019 hoplimit = idev->cnf.hop_limit;
1020 in6_dev_put(idev);
1021 }
1022 return hoplimit;
1023}
1024
1025/*
1026 *
1027 */
1028
Thomas Graf86872cb2006-08-22 00:01:08 -07001029int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030{
1031 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 struct rt6_info *rt = NULL;
1033 struct net_device *dev = NULL;
1034 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001035 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 int addr_type;
1037
Thomas Graf86872cb2006-08-22 00:01:08 -07001038 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 return -EINVAL;
1040#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001041 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 return -EINVAL;
1043#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001044 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045 err = -ENODEV;
Thomas Graf86872cb2006-08-22 00:01:08 -07001046 dev = dev_get_by_index(cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 if (!dev)
1048 goto out;
1049 idev = in6_dev_get(dev);
1050 if (!idev)
1051 goto out;
1052 }
1053
Thomas Graf86872cb2006-08-22 00:01:08 -07001054 if (cfg->fc_metric == 0)
1055 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
Thomas Graf86872cb2006-08-22 00:01:08 -07001057 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001058 if (table == NULL) {
1059 err = -ENOBUFS;
1060 goto out;
1061 }
1062
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 rt = ip6_dst_alloc();
1064
1065 if (rt == NULL) {
1066 err = -ENOMEM;
1067 goto out;
1068 }
1069
1070 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001071 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072
Thomas Graf86872cb2006-08-22 00:01:08 -07001073 if (cfg->fc_protocol == RTPROT_UNSPEC)
1074 cfg->fc_protocol = RTPROT_BOOT;
1075 rt->rt6i_protocol = cfg->fc_protocol;
1076
1077 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078
1079 if (addr_type & IPV6_ADDR_MULTICAST)
1080 rt->u.dst.input = ip6_mc_input;
1081 else
1082 rt->u.dst.input = ip6_forward;
1083
1084 rt->u.dst.output = ip6_output;
1085
Thomas Graf86872cb2006-08-22 00:01:08 -07001086 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1087 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 if (rt->rt6i_dst.plen == 128)
1089 rt->u.dst.flags = DST_HOST;
1090
1091#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001092 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1093 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094#endif
1095
Thomas Graf86872cb2006-08-22 00:01:08 -07001096 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097
1098 /* We cannot add true routes via loopback here,
1099 they would result in kernel looping; promote them to reject routes
1100 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001101 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1103 /* hold loopback dev/idev if we haven't done so. */
1104 if (dev != &loopback_dev) {
1105 if (dev) {
1106 dev_put(dev);
1107 in6_dev_put(idev);
1108 }
1109 dev = &loopback_dev;
1110 dev_hold(dev);
1111 idev = in6_dev_get(dev);
1112 if (!idev) {
1113 err = -ENODEV;
1114 goto out;
1115 }
1116 }
1117 rt->u.dst.output = ip6_pkt_discard_out;
1118 rt->u.dst.input = ip6_pkt_discard;
1119 rt->u.dst.error = -ENETUNREACH;
1120 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1121 goto install_route;
1122 }
1123
Thomas Graf86872cb2006-08-22 00:01:08 -07001124 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 struct in6_addr *gw_addr;
1126 int gwa_type;
1127
Thomas Graf86872cb2006-08-22 00:01:08 -07001128 gw_addr = &cfg->fc_gateway;
1129 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 gwa_type = ipv6_addr_type(gw_addr);
1131
1132 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1133 struct rt6_info *grt;
1134
1135 /* IPv6 strictly inhibits using not link-local
1136 addresses as nexthop address.
1137 Otherwise, router will not able to send redirects.
1138 It is very good, but in some (rare!) circumstances
1139 (SIT, PtP, NBMA NOARP links) it is handy to allow
1140 some exceptions. --ANK
1141 */
1142 err = -EINVAL;
1143 if (!(gwa_type&IPV6_ADDR_UNICAST))
1144 goto out;
1145
Thomas Graf86872cb2006-08-22 00:01:08 -07001146 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147
1148 err = -EHOSTUNREACH;
1149 if (grt == NULL)
1150 goto out;
1151 if (dev) {
1152 if (dev != grt->rt6i_dev) {
1153 dst_release(&grt->u.dst);
1154 goto out;
1155 }
1156 } else {
1157 dev = grt->rt6i_dev;
1158 idev = grt->rt6i_idev;
1159 dev_hold(dev);
1160 in6_dev_hold(grt->rt6i_idev);
1161 }
1162 if (!(grt->rt6i_flags&RTF_GATEWAY))
1163 err = 0;
1164 dst_release(&grt->u.dst);
1165
1166 if (err)
1167 goto out;
1168 }
1169 err = -EINVAL;
1170 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1171 goto out;
1172 }
1173
1174 err = -ENODEV;
1175 if (dev == NULL)
1176 goto out;
1177
Thomas Graf86872cb2006-08-22 00:01:08 -07001178 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1180 if (IS_ERR(rt->rt6i_nexthop)) {
1181 err = PTR_ERR(rt->rt6i_nexthop);
1182 rt->rt6i_nexthop = NULL;
1183 goto out;
1184 }
1185 }
1186
Thomas Graf86872cb2006-08-22 00:01:08 -07001187 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188
1189install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001190 if (cfg->fc_mx) {
1191 struct nlattr *nla;
1192 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193
Thomas Graf86872cb2006-08-22 00:01:08 -07001194 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1195 int type = nla->nla_type;
1196
1197 if (type) {
1198 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 err = -EINVAL;
1200 goto out;
1201 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001202
1203 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 }
1206 }
1207
1208 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1209 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1210 if (!rt->u.dst.metrics[RTAX_MTU-1])
1211 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1212 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1213 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1214 rt->u.dst.dev = dev;
1215 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001216 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001217 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218
1219out:
1220 if (dev)
1221 dev_put(dev);
1222 if (idev)
1223 in6_dev_put(idev);
1224 if (rt)
1225 dst_free((struct dst_entry *) rt);
1226 return err;
1227}
1228
Thomas Graf86872cb2006-08-22 00:01:08 -07001229static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230{
1231 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001232 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
Patrick McHardy6c813a72006-08-06 22:22:47 -07001234 if (rt == &ip6_null_entry)
1235 return -ENOENT;
1236
Thomas Grafc71099a2006-08-04 23:20:06 -07001237 table = rt->rt6i_table;
1238 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239
Thomas Graf86872cb2006-08-22 00:01:08 -07001240 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 dst_release(&rt->u.dst);
1242
Thomas Grafc71099a2006-08-04 23:20:06 -07001243 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244
1245 return err;
1246}
1247
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001248int ip6_del_rt(struct rt6_info *rt)
1249{
Thomas Graf86872cb2006-08-22 00:01:08 -07001250 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001251}
1252
Thomas Graf86872cb2006-08-22 00:01:08 -07001253static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254{
Thomas Grafc71099a2006-08-04 23:20:06 -07001255 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 struct fib6_node *fn;
1257 struct rt6_info *rt;
1258 int err = -ESRCH;
1259
Thomas Graf86872cb2006-08-22 00:01:08 -07001260 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001261 if (table == NULL)
1262 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263
Thomas Grafc71099a2006-08-04 23:20:06 -07001264 read_lock_bh(&table->tb6_lock);
1265
1266 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001267 &cfg->fc_dst, cfg->fc_dst_len,
1268 &cfg->fc_src, cfg->fc_src_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269
1270 if (fn) {
1271 for (rt = fn->leaf; rt; rt = rt->u.next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001272 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001276 if (cfg->fc_flags & RTF_GATEWAY &&
1277 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001279 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 continue;
1281 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001282 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283
Thomas Graf86872cb2006-08-22 00:01:08 -07001284 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 }
1286 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001287 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288
1289 return err;
1290}
1291
1292/*
1293 * Handle redirects
1294 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001295struct ip6rd_flowi {
1296 struct flowi fl;
1297 struct in6_addr gateway;
1298};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001300static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1301 struct flowi *fl,
1302 int flags)
1303{
1304 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1305 struct rt6_info *rt;
1306 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001307
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001309 * Get the "current" route for this destination and
1310 * check if the redirect has come from approriate router.
1311 *
1312 * RFC 2461 specifies that redirects should only be
1313 * accepted if they come from the nexthop to the target.
1314 * Due to the way the routes are chosen, this notion
1315 * is a bit fuzzy and one might need to check all possible
1316 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318
Thomas Grafc71099a2006-08-04 23:20:06 -07001319 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001320 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001321restart:
1322 for (rt = fn->leaf; rt; rt = rt->u.next) {
1323 /*
1324 * Current route is on-link; redirect is always invalid.
1325 *
1326 * Seems, previous statement is not true. It could
1327 * be node, which looks for us as on-link (f.e. proxy ndisc)
1328 * But then router serving it might decide, that we should
1329 * know truth 8)8) --ANK (980726).
1330 */
1331 if (rt6_check_expired(rt))
1332 continue;
1333 if (!(rt->rt6i_flags & RTF_GATEWAY))
1334 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001335 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001336 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001337 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001338 continue;
1339 break;
1340 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001341
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001342 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001343 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001344 BACKTRACK(&fl->fl6_src);
1345out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001346 dst_hold(&rt->u.dst);
1347
1348 read_unlock_bh(&table->tb6_lock);
1349
1350 return rt;
1351};
1352
1353static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1354 struct in6_addr *src,
1355 struct in6_addr *gateway,
1356 struct net_device *dev)
1357{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001358 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001359 struct ip6rd_flowi rdfl = {
1360 .fl = {
1361 .oif = dev->ifindex,
1362 .nl_u = {
1363 .ip6_u = {
1364 .daddr = *dest,
1365 .saddr = *src,
1366 },
1367 },
1368 },
1369 .gateway = *gateway,
1370 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001371
1372 if (rt6_need_strict(dest))
1373 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001374
1375 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1376}
1377
1378void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1379 struct in6_addr *saddr,
1380 struct neighbour *neigh, u8 *lladdr, int on_link)
1381{
1382 struct rt6_info *rt, *nrt = NULL;
1383 struct netevent_redirect netevent;
1384
1385 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1386
1387 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 if (net_ratelimit())
1389 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1390 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001391 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 }
1393
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 /*
1395 * We have finally decided to accept it.
1396 */
1397
1398 neigh_update(neigh, lladdr, NUD_STALE,
1399 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1400 NEIGH_UPDATE_F_OVERRIDE|
1401 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1402 NEIGH_UPDATE_F_ISROUTER))
1403 );
1404
1405 /*
1406 * Redirect received -> path was valid.
1407 * Look, redirects are sent only in response to data packets,
1408 * so that this nexthop apparently is reachable. --ANK
1409 */
1410 dst_confirm(&rt->u.dst);
1411
1412 /* Duplicate redirect: silently ignore. */
1413 if (neigh == rt->u.dst.neighbour)
1414 goto out;
1415
1416 nrt = ip6_rt_copy(rt);
1417 if (nrt == NULL)
1418 goto out;
1419
1420 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1421 if (on_link)
1422 nrt->rt6i_flags &= ~RTF_GATEWAY;
1423
1424 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1425 nrt->rt6i_dst.plen = 128;
1426 nrt->u.dst.flags |= DST_HOST;
1427
1428 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1429 nrt->rt6i_nexthop = neigh_clone(neigh);
1430 /* Reset pmtu, it may be better */
1431 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1432 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1433
Thomas Graf40e22e82006-08-22 00:00:45 -07001434 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 goto out;
1436
Tom Tucker8d717402006-07-30 20:43:36 -07001437 netevent.old = &rt->u.dst;
1438 netevent.new = &nrt->u.dst;
1439 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1440
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001442 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 return;
1444 }
1445
1446out:
1447 dst_release(&rt->u.dst);
1448 return;
1449}
1450
1451/*
1452 * Handle ICMP "packet too big" messages
1453 * i.e. Path MTU discovery
1454 */
1455
1456void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1457 struct net_device *dev, u32 pmtu)
1458{
1459 struct rt6_info *rt, *nrt;
1460 int allfrag = 0;
1461
1462 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1463 if (rt == NULL)
1464 return;
1465
1466 if (pmtu >= dst_mtu(&rt->u.dst))
1467 goto out;
1468
1469 if (pmtu < IPV6_MIN_MTU) {
1470 /*
1471 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1472 * MTU (1280) and a fragment header should always be included
1473 * after a node receiving Too Big message reporting PMTU is
1474 * less than the IPv6 Minimum Link MTU.
1475 */
1476 pmtu = IPV6_MIN_MTU;
1477 allfrag = 1;
1478 }
1479
1480 /* New mtu received -> path was valid.
1481 They are sent only in response to data packets,
1482 so that this nexthop apparently is reachable. --ANK
1483 */
1484 dst_confirm(&rt->u.dst);
1485
1486 /* Host route. If it is static, it would be better
1487 not to override it, but add new one, so that
1488 when cache entry will expire old pmtu
1489 would return automatically.
1490 */
1491 if (rt->rt6i_flags & RTF_CACHE) {
1492 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1493 if (allfrag)
1494 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1495 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1496 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1497 goto out;
1498 }
1499
1500 /* Network route.
1501 Two cases are possible:
1502 1. It is connected route. Action: COW
1503 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1504 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001505 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001506 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001507 else
1508 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001509
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001510 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001511 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1512 if (allfrag)
1513 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1514
1515 /* According to RFC 1981, detecting PMTU increase shouldn't be
1516 * happened within 5 mins, the recommended timer is 10 mins.
1517 * Here this route expiration time is set to ip6_rt_mtu_expires
1518 * which is 10 mins. After 10 mins the decreased pmtu is expired
1519 * and detecting PMTU increase will be automatically happened.
1520 */
1521 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1522 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1523
Thomas Graf40e22e82006-08-22 00:00:45 -07001524 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526out:
1527 dst_release(&rt->u.dst);
1528}
1529
1530/*
1531 * Misc support functions
1532 */
1533
1534static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1535{
1536 struct rt6_info *rt = ip6_dst_alloc();
1537
1538 if (rt) {
1539 rt->u.dst.input = ort->u.dst.input;
1540 rt->u.dst.output = ort->u.dst.output;
1541
1542 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1543 rt->u.dst.dev = ort->u.dst.dev;
1544 if (rt->u.dst.dev)
1545 dev_hold(rt->u.dst.dev);
1546 rt->rt6i_idev = ort->rt6i_idev;
1547 if (rt->rt6i_idev)
1548 in6_dev_hold(rt->rt6i_idev);
1549 rt->u.dst.lastuse = jiffies;
1550 rt->rt6i_expires = 0;
1551
1552 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1553 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1554 rt->rt6i_metric = 0;
1555
1556 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1557#ifdef CONFIG_IPV6_SUBTREES
1558 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1559#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001560 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 }
1562 return rt;
1563}
1564
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001565#ifdef CONFIG_IPV6_ROUTE_INFO
1566static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1567 struct in6_addr *gwaddr, int ifindex)
1568{
1569 struct fib6_node *fn;
1570 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001571 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001572
Thomas Grafc71099a2006-08-04 23:20:06 -07001573 table = fib6_get_table(RT6_TABLE_INFO);
1574 if (table == NULL)
1575 return NULL;
1576
1577 write_lock_bh(&table->tb6_lock);
1578 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001579 if (!fn)
1580 goto out;
1581
1582 for (rt = fn->leaf; rt; rt = rt->u.next) {
1583 if (rt->rt6i_dev->ifindex != ifindex)
1584 continue;
1585 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1586 continue;
1587 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1588 continue;
1589 dst_hold(&rt->u.dst);
1590 break;
1591 }
1592out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001593 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001594 return rt;
1595}
1596
1597static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1598 struct in6_addr *gwaddr, int ifindex,
1599 unsigned pref)
1600{
Thomas Graf86872cb2006-08-22 00:01:08 -07001601 struct fib6_config cfg = {
1602 .fc_table = RT6_TABLE_INFO,
1603 .fc_metric = 1024,
1604 .fc_ifindex = ifindex,
1605 .fc_dst_len = prefixlen,
1606 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1607 RTF_UP | RTF_PREF(pref),
1608 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001609
Thomas Graf86872cb2006-08-22 00:01:08 -07001610 ipv6_addr_copy(&cfg.fc_dst, prefix);
1611 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1612
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001613 /* We should treat it as a default route if prefix length is 0. */
1614 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001615 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001616
Thomas Graf86872cb2006-08-22 00:01:08 -07001617 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001618
1619 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1620}
1621#endif
1622
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1624{
1625 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001626 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627
Thomas Grafc71099a2006-08-04 23:20:06 -07001628 table = fib6_get_table(RT6_TABLE_DFLT);
1629 if (table == NULL)
1630 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631
Thomas Grafc71099a2006-08-04 23:20:06 -07001632 write_lock_bh(&table->tb6_lock);
1633 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001635 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1637 break;
1638 }
1639 if (rt)
1640 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001641 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 return rt;
1643}
1644
1645struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001646 struct net_device *dev,
1647 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648{
Thomas Graf86872cb2006-08-22 00:01:08 -07001649 struct fib6_config cfg = {
1650 .fc_table = RT6_TABLE_DFLT,
1651 .fc_metric = 1024,
1652 .fc_ifindex = dev->ifindex,
1653 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1654 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1655 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656
Thomas Graf86872cb2006-08-22 00:01:08 -07001657 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
Thomas Graf86872cb2006-08-22 00:01:08 -07001659 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 return rt6_get_dflt_router(gwaddr, dev);
1662}
1663
1664void rt6_purge_dflt_routers(void)
1665{
1666 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001667 struct fib6_table *table;
1668
1669 /* NOTE: Keep consistent with rt6_get_dflt_router */
1670 table = fib6_get_table(RT6_TABLE_DFLT);
1671 if (table == NULL)
1672 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673
1674restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001675 read_lock_bh(&table->tb6_lock);
1676 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1678 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001679 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001680 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 goto restart;
1682 }
1683 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001684 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685}
1686
Thomas Graf86872cb2006-08-22 00:01:08 -07001687static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1688 struct fib6_config *cfg)
1689{
1690 memset(cfg, 0, sizeof(*cfg));
1691
1692 cfg->fc_table = RT6_TABLE_MAIN;
1693 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1694 cfg->fc_metric = rtmsg->rtmsg_metric;
1695 cfg->fc_expires = rtmsg->rtmsg_info;
1696 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1697 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1698 cfg->fc_flags = rtmsg->rtmsg_flags;
1699
1700 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1701 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1702 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1703}
1704
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1706{
Thomas Graf86872cb2006-08-22 00:01:08 -07001707 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 struct in6_rtmsg rtmsg;
1709 int err;
1710
1711 switch(cmd) {
1712 case SIOCADDRT: /* Add a route */
1713 case SIOCDELRT: /* Delete a route */
1714 if (!capable(CAP_NET_ADMIN))
1715 return -EPERM;
1716 err = copy_from_user(&rtmsg, arg,
1717 sizeof(struct in6_rtmsg));
1718 if (err)
1719 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001720
1721 rtmsg_to_fib6_config(&rtmsg, &cfg);
1722
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 rtnl_lock();
1724 switch (cmd) {
1725 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001726 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 break;
1728 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001729 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 break;
1731 default:
1732 err = -EINVAL;
1733 }
1734 rtnl_unlock();
1735
1736 return err;
1737 };
1738
1739 return -EINVAL;
1740}
1741
1742/*
1743 * Drop the packet on the floor
1744 */
1745
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001746static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747{
Lv Liangying76d0cc12006-08-29 00:00:47 -07001748 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1749 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1750 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1751
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1753 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1754 kfree_skb(skb);
1755 return 0;
1756}
1757
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001758static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759{
1760 skb->dev = skb->dst->dev;
1761 return ip6_pkt_discard(skb);
1762}
1763
1764/*
1765 * Allocate a dst for local (unicast / anycast) address.
1766 */
1767
1768struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1769 const struct in6_addr *addr,
1770 int anycast)
1771{
1772 struct rt6_info *rt = ip6_dst_alloc();
1773
1774 if (rt == NULL)
1775 return ERR_PTR(-ENOMEM);
1776
1777 dev_hold(&loopback_dev);
1778 in6_dev_hold(idev);
1779
1780 rt->u.dst.flags = DST_HOST;
1781 rt->u.dst.input = ip6_input;
1782 rt->u.dst.output = ip6_output;
1783 rt->rt6i_dev = &loopback_dev;
1784 rt->rt6i_idev = idev;
1785 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1786 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1787 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1788 rt->u.dst.obsolete = -1;
1789
1790 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001791 if (anycast)
1792 rt->rt6i_flags |= RTF_ANYCAST;
1793 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 rt->rt6i_flags |= RTF_LOCAL;
1795 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1796 if (rt->rt6i_nexthop == NULL) {
1797 dst_free((struct dst_entry *) rt);
1798 return ERR_PTR(-ENOMEM);
1799 }
1800
1801 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1802 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001803 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
1805 atomic_set(&rt->u.dst.__refcnt, 1);
1806
1807 return rt;
1808}
1809
1810static int fib6_ifdown(struct rt6_info *rt, void *arg)
1811{
1812 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1813 rt != &ip6_null_entry) {
1814 RT6_TRACE("deleted by ifdown %p\n", rt);
1815 return -1;
1816 }
1817 return 0;
1818}
1819
1820void rt6_ifdown(struct net_device *dev)
1821{
Thomas Grafc71099a2006-08-04 23:20:06 -07001822 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823}
1824
1825struct rt6_mtu_change_arg
1826{
1827 struct net_device *dev;
1828 unsigned mtu;
1829};
1830
1831static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1832{
1833 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1834 struct inet6_dev *idev;
1835
1836 /* In IPv6 pmtu discovery is not optional,
1837 so that RTAX_MTU lock cannot disable it.
1838 We still use this lock to block changes
1839 caused by addrconf/ndisc.
1840 */
1841
1842 idev = __in6_dev_get(arg->dev);
1843 if (idev == NULL)
1844 return 0;
1845
1846 /* For administrative MTU increase, there is no way to discover
1847 IPv6 PMTU increase, so PMTU increase should be updated here.
1848 Since RFC 1981 doesn't include administrative MTU increase
1849 update PMTU increase is a MUST. (i.e. jumbo frame)
1850 */
1851 /*
1852 If new MTU is less than route PMTU, this new MTU will be the
1853 lowest MTU in the path, update the route PMTU to reflect PMTU
1854 decreases; if new MTU is greater than route PMTU, and the
1855 old MTU is the lowest MTU in the path, update the route PMTU
1856 to reflect the increase. In this case if the other nodes' MTU
1857 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1858 PMTU discouvery.
1859 */
1860 if (rt->rt6i_dev == arg->dev &&
1861 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1862 (dst_mtu(&rt->u.dst) > arg->mtu ||
1863 (dst_mtu(&rt->u.dst) < arg->mtu &&
1864 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1865 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1866 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1867 return 0;
1868}
1869
1870void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1871{
Thomas Grafc71099a2006-08-04 23:20:06 -07001872 struct rt6_mtu_change_arg arg = {
1873 .dev = dev,
1874 .mtu = mtu,
1875 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876
Thomas Grafc71099a2006-08-04 23:20:06 -07001877 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878}
1879
Thomas Graf86872cb2006-08-22 00:01:08 -07001880static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
Thomas Graf5176f912006-08-26 20:13:18 -07001881 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001882 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001883 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001884 [RTA_PRIORITY] = { .type = NLA_U32 },
1885 [RTA_METRICS] = { .type = NLA_NESTED },
1886};
1887
1888static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1889 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001890{
Thomas Graf86872cb2006-08-22 00:01:08 -07001891 struct rtmsg *rtm;
1892 struct nlattr *tb[RTA_MAX+1];
1893 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894
Thomas Graf86872cb2006-08-22 00:01:08 -07001895 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1896 if (err < 0)
1897 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
Thomas Graf86872cb2006-08-22 00:01:08 -07001899 err = -EINVAL;
1900 rtm = nlmsg_data(nlh);
1901 memset(cfg, 0, sizeof(*cfg));
1902
1903 cfg->fc_table = rtm->rtm_table;
1904 cfg->fc_dst_len = rtm->rtm_dst_len;
1905 cfg->fc_src_len = rtm->rtm_src_len;
1906 cfg->fc_flags = RTF_UP;
1907 cfg->fc_protocol = rtm->rtm_protocol;
1908
1909 if (rtm->rtm_type == RTN_UNREACHABLE)
1910 cfg->fc_flags |= RTF_REJECT;
1911
1912 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1913 cfg->fc_nlinfo.nlh = nlh;
1914
1915 if (tb[RTA_GATEWAY]) {
1916 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1917 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001919
1920 if (tb[RTA_DST]) {
1921 int plen = (rtm->rtm_dst_len + 7) >> 3;
1922
1923 if (nla_len(tb[RTA_DST]) < plen)
1924 goto errout;
1925
1926 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001928
1929 if (tb[RTA_SRC]) {
1930 int plen = (rtm->rtm_src_len + 7) >> 3;
1931
1932 if (nla_len(tb[RTA_SRC]) < plen)
1933 goto errout;
1934
1935 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001937
1938 if (tb[RTA_OIF])
1939 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1940
1941 if (tb[RTA_PRIORITY])
1942 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1943
1944 if (tb[RTA_METRICS]) {
1945 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1946 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001948
1949 if (tb[RTA_TABLE])
1950 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1951
1952 err = 0;
1953errout:
1954 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955}
1956
1957int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1958{
Thomas Graf86872cb2006-08-22 00:01:08 -07001959 struct fib6_config cfg;
1960 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961
Thomas Graf86872cb2006-08-22 00:01:08 -07001962 err = rtm_to_fib6_config(skb, nlh, &cfg);
1963 if (err < 0)
1964 return err;
1965
1966 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967}
1968
1969int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1970{
Thomas Graf86872cb2006-08-22 00:01:08 -07001971 struct fib6_config cfg;
1972 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973
Thomas Graf86872cb2006-08-22 00:01:08 -07001974 err = rtm_to_fib6_config(skb, nlh, &cfg);
1975 if (err < 0)
1976 return err;
1977
1978 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979}
1980
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001982 struct in6_addr *dst, struct in6_addr *src,
1983 int iif, int type, u32 pid, u32 seq,
1984 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985{
1986 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07001987 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 struct rta_cacheinfo ci;
Patrick McHardy9e762a42006-08-10 23:09:48 -07001989 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
1991 if (prefix) { /* user wants prefix routes only */
1992 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1993 /* success since this is not a prefix route */
1994 return 1;
1995 }
1996 }
1997
Thomas Graf2d7202b2006-08-22 00:01:27 -07001998 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1999 if (nlh == NULL)
2000 return -ENOBUFS;
2001
2002 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 rtm->rtm_family = AF_INET6;
2004 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2005 rtm->rtm_src_len = rt->rt6i_src.plen;
2006 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002007 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002008 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002009 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002010 table = RT6_TABLE_UNSPEC;
2011 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002012 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 if (rt->rt6i_flags&RTF_REJECT)
2014 rtm->rtm_type = RTN_UNREACHABLE;
2015 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2016 rtm->rtm_type = RTN_LOCAL;
2017 else
2018 rtm->rtm_type = RTN_UNICAST;
2019 rtm->rtm_flags = 0;
2020 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2021 rtm->rtm_protocol = rt->rt6i_protocol;
2022 if (rt->rt6i_flags&RTF_DYNAMIC)
2023 rtm->rtm_protocol = RTPROT_REDIRECT;
2024 else if (rt->rt6i_flags & RTF_ADDRCONF)
2025 rtm->rtm_protocol = RTPROT_KERNEL;
2026 else if (rt->rt6i_flags&RTF_DEFAULT)
2027 rtm->rtm_protocol = RTPROT_RA;
2028
2029 if (rt->rt6i_flags&RTF_CACHE)
2030 rtm->rtm_flags |= RTM_F_CLONED;
2031
2032 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002033 NLA_PUT(skb, RTA_DST, 16, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 rtm->rtm_dst_len = 128;
2035 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002036 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037#ifdef CONFIG_IPV6_SUBTREES
2038 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002039 NLA_PUT(skb, RTA_SRC, 16, src);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 rtm->rtm_src_len = 128;
2041 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002042 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043#endif
2044 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002045 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046 else if (dst) {
2047 struct in6_addr saddr_buf;
2048 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002049 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002051
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002053 goto nla_put_failure;
2054
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002056 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2057
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002059 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2060
2061 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2063 if (rt->rt6i_expires)
2064 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2065 else
2066 ci.rta_expires = 0;
2067 ci.rta_used = rt->u.dst.__use;
2068 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2069 ci.rta_error = rt->u.dst.error;
2070 ci.rta_id = 0;
2071 ci.rta_ts = 0;
2072 ci.rta_tsage = 0;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002073 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074
Thomas Graf2d7202b2006-08-22 00:01:27 -07002075 return nlmsg_end(skb, nlh);
2076
2077nla_put_failure:
2078 return nlmsg_cancel(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079}
2080
Patrick McHardy1b43af52006-08-10 23:11:17 -07002081int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082{
2083 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2084 int prefix;
2085
Thomas Graf2d7202b2006-08-22 00:01:27 -07002086 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2087 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2089 } else
2090 prefix = 0;
2091
2092 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2093 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002094 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095}
2096
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2098{
Thomas Grafab364a62006-08-22 00:01:47 -07002099 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002101 struct sk_buff *skb;
2102 struct rtmsg *rtm;
2103 struct flowi fl;
2104 int err, iif = 0;
2105
2106 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2107 if (err < 0)
2108 goto errout;
2109
2110 err = -EINVAL;
2111 memset(&fl, 0, sizeof(fl));
2112
2113 if (tb[RTA_SRC]) {
2114 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2115 goto errout;
2116
2117 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2118 }
2119
2120 if (tb[RTA_DST]) {
2121 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2122 goto errout;
2123
2124 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2125 }
2126
2127 if (tb[RTA_IIF])
2128 iif = nla_get_u32(tb[RTA_IIF]);
2129
2130 if (tb[RTA_OIF])
2131 fl.oif = nla_get_u32(tb[RTA_OIF]);
2132
2133 if (iif) {
2134 struct net_device *dev;
2135 dev = __dev_get_by_index(iif);
2136 if (!dev) {
2137 err = -ENODEV;
2138 goto errout;
2139 }
2140 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141
2142 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002143 if (skb == NULL) {
2144 err = -ENOBUFS;
2145 goto errout;
2146 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147
2148 /* Reserve room for dummy headers, this skb can pass
2149 through good chunk of routing engine.
2150 */
2151 skb->mac.raw = skb->data;
2152 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2153
Thomas Grafab364a62006-08-22 00:01:47 -07002154 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 skb->dst = &rt->u.dst;
2156
Thomas Grafab364a62006-08-22 00:01:47 -07002157 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002159 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002161 kfree_skb(skb);
2162 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002163 }
2164
Thomas Graf2942e902006-08-15 00:30:25 -07002165 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002166errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168}
2169
Thomas Graf86872cb2006-08-22 00:01:08 -07002170void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171{
2172 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002173 u32 pid = 0, seq = 0;
2174 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002175 int payload = sizeof(struct rtmsg) + 256;
2176 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177
Thomas Graf86872cb2006-08-22 00:01:08 -07002178 if (info) {
2179 pid = info->pid;
2180 nlh = info->nlh;
2181 if (nlh)
2182 seq = nlh->nlmsg_seq;
2183 }
2184
Thomas Graf21713eb2006-08-15 00:35:24 -07002185 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2186 if (skb == NULL)
2187 goto errout;
2188
2189 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2190 if (err < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191 kfree_skb(skb);
Thomas Graf21713eb2006-08-15 00:35:24 -07002192 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002194
2195 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2196errout:
2197 if (err < 0)
2198 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199}
2200
2201/*
2202 * /proc
2203 */
2204
2205#ifdef CONFIG_PROC_FS
2206
2207#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2208
2209struct rt6_proc_arg
2210{
2211 char *buffer;
2212 int offset;
2213 int length;
2214 int skip;
2215 int len;
2216};
2217
2218static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2219{
2220 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2221 int i;
2222
2223 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2224 arg->skip++;
2225 return 0;
2226 }
2227
2228 if (arg->len >= arg->length)
2229 return 0;
2230
2231 for (i=0; i<16; i++) {
2232 sprintf(arg->buffer + arg->len, "%02x",
2233 rt->rt6i_dst.addr.s6_addr[i]);
2234 arg->len += 2;
2235 }
2236 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2237 rt->rt6i_dst.plen);
2238
2239#ifdef CONFIG_IPV6_SUBTREES
2240 for (i=0; i<16; i++) {
2241 sprintf(arg->buffer + arg->len, "%02x",
2242 rt->rt6i_src.addr.s6_addr[i]);
2243 arg->len += 2;
2244 }
2245 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2246 rt->rt6i_src.plen);
2247#else
2248 sprintf(arg->buffer + arg->len,
2249 "00000000000000000000000000000000 00 ");
2250 arg->len += 36;
2251#endif
2252
2253 if (rt->rt6i_nexthop) {
2254 for (i=0; i<16; i++) {
2255 sprintf(arg->buffer + arg->len, "%02x",
2256 rt->rt6i_nexthop->primary_key[i]);
2257 arg->len += 2;
2258 }
2259 } else {
2260 sprintf(arg->buffer + arg->len,
2261 "00000000000000000000000000000000");
2262 arg->len += 32;
2263 }
2264 arg->len += sprintf(arg->buffer + arg->len,
2265 " %08x %08x %08x %08x %8s\n",
2266 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2267 rt->u.dst.__use, rt->rt6i_flags,
2268 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2269 return 0;
2270}
2271
2272static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2273{
Thomas Grafc71099a2006-08-04 23:20:06 -07002274 struct rt6_proc_arg arg = {
2275 .buffer = buffer,
2276 .offset = offset,
2277 .length = length,
2278 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279
Thomas Grafc71099a2006-08-04 23:20:06 -07002280 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281
2282 *start = buffer;
2283 if (offset)
2284 *start += offset % RT6_INFO_LEN;
2285
2286 arg.len -= offset % RT6_INFO_LEN;
2287
2288 if (arg.len > length)
2289 arg.len = length;
2290 if (arg.len < 0)
2291 arg.len = 0;
2292
2293 return arg.len;
2294}
2295
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2297{
2298 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2299 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2300 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2301 rt6_stats.fib_rt_cache,
2302 atomic_read(&ip6_dst_ops.entries),
2303 rt6_stats.fib_discarded_routes);
2304
2305 return 0;
2306}
2307
2308static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2309{
2310 return single_open(file, rt6_stats_seq_show, NULL);
2311}
2312
2313static struct file_operations rt6_stats_seq_fops = {
2314 .owner = THIS_MODULE,
2315 .open = rt6_stats_seq_open,
2316 .read = seq_read,
2317 .llseek = seq_lseek,
2318 .release = single_release,
2319};
2320#endif /* CONFIG_PROC_FS */
2321
2322#ifdef CONFIG_SYSCTL
2323
2324static int flush_delay;
2325
2326static
2327int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2328 void __user *buffer, size_t *lenp, loff_t *ppos)
2329{
2330 if (write) {
2331 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2332 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2333 return 0;
2334 } else
2335 return -EINVAL;
2336}
2337
2338ctl_table ipv6_route_table[] = {
2339 {
2340 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2341 .procname = "flush",
2342 .data = &flush_delay,
2343 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002344 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 .proc_handler = &ipv6_sysctl_rtcache_flush
2346 },
2347 {
2348 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2349 .procname = "gc_thresh",
2350 .data = &ip6_dst_ops.gc_thresh,
2351 .maxlen = sizeof(int),
2352 .mode = 0644,
2353 .proc_handler = &proc_dointvec,
2354 },
2355 {
2356 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2357 .procname = "max_size",
2358 .data = &ip6_rt_max_size,
2359 .maxlen = sizeof(int),
2360 .mode = 0644,
2361 .proc_handler = &proc_dointvec,
2362 },
2363 {
2364 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2365 .procname = "gc_min_interval",
2366 .data = &ip6_rt_gc_min_interval,
2367 .maxlen = sizeof(int),
2368 .mode = 0644,
2369 .proc_handler = &proc_dointvec_jiffies,
2370 .strategy = &sysctl_jiffies,
2371 },
2372 {
2373 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2374 .procname = "gc_timeout",
2375 .data = &ip6_rt_gc_timeout,
2376 .maxlen = sizeof(int),
2377 .mode = 0644,
2378 .proc_handler = &proc_dointvec_jiffies,
2379 .strategy = &sysctl_jiffies,
2380 },
2381 {
2382 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2383 .procname = "gc_interval",
2384 .data = &ip6_rt_gc_interval,
2385 .maxlen = sizeof(int),
2386 .mode = 0644,
2387 .proc_handler = &proc_dointvec_jiffies,
2388 .strategy = &sysctl_jiffies,
2389 },
2390 {
2391 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2392 .procname = "gc_elasticity",
2393 .data = &ip6_rt_gc_elasticity,
2394 .maxlen = sizeof(int),
2395 .mode = 0644,
2396 .proc_handler = &proc_dointvec_jiffies,
2397 .strategy = &sysctl_jiffies,
2398 },
2399 {
2400 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2401 .procname = "mtu_expires",
2402 .data = &ip6_rt_mtu_expires,
2403 .maxlen = sizeof(int),
2404 .mode = 0644,
2405 .proc_handler = &proc_dointvec_jiffies,
2406 .strategy = &sysctl_jiffies,
2407 },
2408 {
2409 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2410 .procname = "min_adv_mss",
2411 .data = &ip6_rt_min_advmss,
2412 .maxlen = sizeof(int),
2413 .mode = 0644,
2414 .proc_handler = &proc_dointvec_jiffies,
2415 .strategy = &sysctl_jiffies,
2416 },
2417 {
2418 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2419 .procname = "gc_min_interval_ms",
2420 .data = &ip6_rt_gc_min_interval,
2421 .maxlen = sizeof(int),
2422 .mode = 0644,
2423 .proc_handler = &proc_dointvec_ms_jiffies,
2424 .strategy = &sysctl_ms_jiffies,
2425 },
2426 { .ctl_name = 0 }
2427};
2428
2429#endif
2430
2431void __init ip6_route_init(void)
2432{
2433 struct proc_dir_entry *p;
2434
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002435 ip6_dst_ops.kmem_cachep =
2436 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2437 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438 fib6_init();
2439#ifdef CONFIG_PROC_FS
2440 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2441 if (p)
2442 p->owner = THIS_MODULE;
2443
2444 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2445#endif
2446#ifdef CONFIG_XFRM
2447 xfrm6_init();
2448#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002449#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2450 fib6_rules_init();
2451#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452}
2453
2454void ip6_route_cleanup(void)
2455{
Thomas Graf101367c2006-08-04 03:39:02 -07002456#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2457 fib6_rules_cleanup();
2458#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459#ifdef CONFIG_PROC_FS
2460 proc_net_remove("ipv6_route");
2461 proc_net_remove("rt6_stats");
2462#endif
2463#ifdef CONFIG_XFRM
2464 xfrm6_fini();
2465#endif
2466 rt6_ifdown(NULL);
2467 fib6_gc_cleanup();
2468 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2469}