blob: ba1b3d11865e43c2553d352179625e9e4b4fad37 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070057#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070058#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080077#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
122struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
Thomas Graf101367c2006-08-04 03:39:02 -0700142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
144struct rt6_info ip6_prohibit_entry = {
145 .u = {
146 .dst = {
147 .__refcnt = ATOMIC_INIT(1),
148 .__use = 1,
149 .dev = &loopback_dev,
150 .obsolete = -1,
151 .error = -EACCES,
152 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
153 .input = ip6_pkt_discard,
154 .output = ip6_pkt_discard_out,
155 .ops = &ip6_dst_ops,
156 .path = (struct dst_entry*)&ip6_prohibit_entry,
157 }
158 },
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
160 .rt6i_metric = ~(u32) 0,
161 .rt6i_ref = ATOMIC_INIT(1),
162};
163
164struct rt6_info ip6_blk_hole_entry = {
165 .u = {
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .dev = &loopback_dev,
170 .obsolete = -1,
171 .error = -EINVAL,
172 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
173 .input = ip6_pkt_discard,
174 .output = ip6_pkt_discard_out,
175 .ops = &ip6_dst_ops,
176 .path = (struct dst_entry*)&ip6_blk_hole_entry,
177 }
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
182};
183
184#endif
185
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186/* allocate dst with ip6_dst_ops */
187static __inline__ struct rt6_info *ip6_dst_alloc(void)
188{
189 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
190}
191
192static void ip6_dst_destroy(struct dst_entry *dst)
193{
194 struct rt6_info *rt = (struct rt6_info *)dst;
195 struct inet6_dev *idev = rt->rt6i_idev;
196
197 if (idev != NULL) {
198 rt->rt6i_idev = NULL;
199 in6_dev_put(idev);
200 }
201}
202
203static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
204 int how)
205{
206 struct rt6_info *rt = (struct rt6_info *)dst;
207 struct inet6_dev *idev = rt->rt6i_idev;
208
209 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
210 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
211 if (loopback_idev != NULL) {
212 rt->rt6i_idev = loopback_idev;
213 in6_dev_put(idev);
214 }
215 }
216}
217
218static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{
220 return (rt->rt6i_flags & RTF_EXPIRES &&
221 time_after(jiffies, rt->rt6i_expires));
222}
223
Thomas Grafc71099a2006-08-04 23:20:06 -0700224static inline int rt6_need_strict(struct in6_addr *daddr)
225{
226 return (ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
228}
229
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700231 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 */
233
234static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
235 int oif,
236 int strict)
237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
241 if (oif) {
242 for (sprt = rt; sprt; sprt = sprt->u.next) {
243 struct net_device *dev = sprt->rt6i_dev;
244 if (dev->ifindex == oif)
245 return sprt;
246 if (dev->flags & IFF_LOOPBACK) {
247 if (sprt->rt6i_idev == NULL ||
248 sprt->rt6i_idev->dev->ifindex != oif) {
249 if (strict && oif)
250 continue;
251 if (local && (!oif ||
252 local->rt6i_idev->dev->ifindex == oif))
253 continue;
254 }
255 local = sprt;
256 }
257 }
258
259 if (local)
260 return local;
261
262 if (strict)
263 return &ip6_null_entry;
264 }
265 return rt;
266}
267
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800268#ifdef CONFIG_IPV6_ROUTER_PREF
269static void rt6_probe(struct rt6_info *rt)
270{
271 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
272 /*
273 * Okay, this does not seem to be appropriate
274 * for now, however, we need to check if it
275 * is really so; aka Router Reachability Probing.
276 *
277 * Router Reachability Probe MUST be rate-limited
278 * to no more than one per minute.
279 */
280 if (!neigh || (neigh->nud_state & NUD_VALID))
281 return;
282 read_lock_bh(&neigh->lock);
283 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800284 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800285 struct in6_addr mcaddr;
286 struct in6_addr *target;
287
288 neigh->updated = jiffies;
289 read_unlock_bh(&neigh->lock);
290
291 target = (struct in6_addr *)&neigh->primary_key;
292 addrconf_addr_solict_mult(target, &mcaddr);
293 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
294 } else
295 read_unlock_bh(&neigh->lock);
296}
297#else
298static inline void rt6_probe(struct rt6_info *rt)
299{
300 return;
301}
302#endif
303
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800305 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800307static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800309 struct net_device *dev = rt->rt6i_dev;
310 if (!oif || dev->ifindex == oif)
311 return 2;
312 if ((dev->flags & IFF_LOOPBACK) &&
313 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
314 return 1;
315 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316}
317
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800318static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800320 struct neighbour *neigh = rt->rt6i_nexthop;
321 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700322 if (rt->rt6i_flags & RTF_NONEXTHOP ||
323 !(rt->rt6i_flags & RTF_GATEWAY))
324 m = 1;
325 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800326 read_lock_bh(&neigh->lock);
327 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700328 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 read_unlock_bh(&neigh->lock);
330 }
331 return m;
332}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800334static int rt6_score_route(struct rt6_info *rt, int oif,
335 int strict)
336{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700337 int m, n;
338
339 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700340 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800342#ifdef CONFIG_IPV6_ROUTER_PREF
343 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
344#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700345 n = rt6_check_neigh(rt);
346 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800347 m |= 16;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700348 else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800349 return -1;
350 return m;
351}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
354 int strict)
355{
356 struct rt6_info *match = NULL, *last = NULL;
357 struct rt6_info *rt, *rt0 = *head;
358 u32 metric;
359 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800361 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
362 __FUNCTION__, head, head ? *head : NULL, oif);
363
364 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700365 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366 rt = rt->u.next) {
367 int m;
368
369 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 continue;
371
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800372 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800374 m = rt6_score_route(rt, oif, strict);
375 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800378 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800379 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800380 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800382 } else {
383 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 }
385 }
386
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800387 if (!match &&
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700388 (strict & RT6_LOOKUP_F_REACHABLE) &&
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800389 last && last != rt0) {
390 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700391 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700392 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800393 *head = rt0->u.next;
394 rt0->u.next = last->u.next;
395 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700396 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 }
398
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800399 RT6_TRACE("%s() => %p, score=%d\n",
400 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800402 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403}
404
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800405#ifdef CONFIG_IPV6_ROUTE_INFO
406int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
407 struct in6_addr *gwaddr)
408{
409 struct route_info *rinfo = (struct route_info *) opt;
410 struct in6_addr prefix_buf, *prefix;
411 unsigned int pref;
412 u32 lifetime;
413 struct rt6_info *rt;
414
415 if (len < sizeof(struct route_info)) {
416 return -EINVAL;
417 }
418
419 /* Sanity check for prefix_len and length */
420 if (rinfo->length > 3) {
421 return -EINVAL;
422 } else if (rinfo->prefix_len > 128) {
423 return -EINVAL;
424 } else if (rinfo->prefix_len > 64) {
425 if (rinfo->length < 2) {
426 return -EINVAL;
427 }
428 } else if (rinfo->prefix_len > 0) {
429 if (rinfo->length < 1) {
430 return -EINVAL;
431 }
432 }
433
434 pref = rinfo->route_pref;
435 if (pref == ICMPV6_ROUTER_PREF_INVALID)
436 pref = ICMPV6_ROUTER_PREF_MEDIUM;
437
438 lifetime = htonl(rinfo->lifetime);
439 if (lifetime == 0xffffffff) {
440 /* infinity */
441 } else if (lifetime > 0x7fffffff/HZ) {
442 /* Avoid arithmetic overflow */
443 lifetime = 0x7fffffff/HZ - 1;
444 }
445
446 if (rinfo->length == 3)
447 prefix = (struct in6_addr *)rinfo->prefix;
448 else {
449 /* this function is safe */
450 ipv6_addr_prefix(&prefix_buf,
451 (struct in6_addr *)rinfo->prefix,
452 rinfo->prefix_len);
453 prefix = &prefix_buf;
454 }
455
456 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
457
458 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700459 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800460 rt = NULL;
461 }
462
463 if (!rt && lifetime)
464 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
465 pref);
466 else if (rt)
467 rt->rt6i_flags = RTF_ROUTEINFO |
468 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
469
470 if (rt) {
471 if (lifetime == 0xffffffff) {
472 rt->rt6i_flags &= ~RTF_EXPIRES;
473 } else {
474 rt->rt6i_expires = jiffies + HZ * lifetime;
475 rt->rt6i_flags |= RTF_EXPIRES;
476 }
477 dst_release(&rt->u.dst);
478 }
479 return 0;
480}
481#endif
482
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700483#define BACKTRACK(saddr) \
484do { \
485 if (rt == &ip6_null_entry) { \
486 struct fib6_node *pn; \
487 while (fn) { \
488 if (fn->fn_flags & RTN_TL_ROOT) \
489 goto out; \
490 pn = fn->parent; \
491 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492 fn = fib6_lookup(pn->subtree, NULL, saddr); \
493 else \
494 fn = pn; \
495 if (fn->fn_flags & RTN_RTINFO) \
496 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700497 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700498 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700499} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700500
501static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503{
504 struct fib6_node *fn;
505 struct rt6_info *rt;
506
Thomas Grafc71099a2006-08-04 23:20:06 -0700507 read_lock_bh(&table->tb6_lock);
508 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
509restart:
510 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700511 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700512 BACKTRACK(&fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700514out:
515 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
517 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700518 rt->u.dst.__use++;
519
520 return rt;
521
522}
523
524struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525 int oif, int strict)
526{
527 struct flowi fl = {
528 .oif = oif,
529 .nl_u = {
530 .ip6_u = {
531 .daddr = *daddr,
532 /* TODO: saddr */
533 },
534 },
535 };
536 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700537 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700538
539 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
540 if (dst->error == 0)
541 return (struct rt6_info *) dst;
542
543 dst_release(dst);
544
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545 return NULL;
546}
547
Thomas Grafc71099a2006-08-04 23:20:06 -0700548/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 It takes new route entry, the addition fails by any reason the
550 route is freed. In any case, if caller does not hold it, it may
551 be destroyed.
552 */
553
Thomas Graf86872cb2006-08-22 00:01:08 -0700554static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555{
556 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700557 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 table = rt->rt6i_table;
560 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700561 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700562 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
564 return err;
565}
566
Thomas Graf40e22e82006-08-22 00:00:45 -0700567int ip6_ins_rt(struct rt6_info *rt)
568{
Thomas Graf86872cb2006-08-22 00:01:08 -0700569 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700570}
571
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800572static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
573 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 struct rt6_info *rt;
576
577 /*
578 * Clone the route.
579 */
580
581 rt = ip6_rt_copy(ort);
582
583 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900584 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
585 if (rt->rt6i_dst.plen != 128 &&
586 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
587 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900589 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900591 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 rt->rt6i_dst.plen = 128;
593 rt->rt6i_flags |= RTF_CACHE;
594 rt->u.dst.flags |= DST_HOST;
595
596#ifdef CONFIG_IPV6_SUBTREES
597 if (rt->rt6i_src.plen && saddr) {
598 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
599 rt->rt6i_src.plen = 128;
600 }
601#endif
602
603 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
604
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800605 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800607 return rt;
608}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800610static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
611{
612 struct rt6_info *rt = ip6_rt_copy(ort);
613 if (rt) {
614 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
615 rt->rt6i_dst.plen = 128;
616 rt->rt6i_flags |= RTF_CACHE;
617 if (rt->rt6i_flags & RTF_REJECT)
618 rt->u.dst.error = ort->u.dst.error;
619 rt->u.dst.flags |= DST_HOST;
620 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
621 }
622 return rt;
623}
624
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700625static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
626 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627{
628 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800629 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700630 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800632 int err;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700633 int reachable = RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700635 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
637relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700638 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800640restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700641 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
643restart:
Thomas Grafc71099a2006-08-04 23:20:06 -0700644 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700645 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800646 if (rt == &ip6_null_entry ||
647 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800648 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800650 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700651 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800652
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800653 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800654 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800655 else {
656#if CLONE_OFFLINK_ROUTE
657 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
658#else
659 goto out2;
660#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800662
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800663 dst_release(&rt->u.dst);
664 rt = nrt ? : &ip6_null_entry;
665
666 dst_hold(&rt->u.dst);
667 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700668 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800669 if (!err)
670 goto out2;
671 }
672
673 if (--attempts <= 0)
674 goto out2;
675
676 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700677 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800678 * released someone could insert this route. Relookup.
679 */
680 dst_release(&rt->u.dst);
681 goto relookup;
682
683out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800684 if (reachable) {
685 reachable = 0;
686 goto restart_2;
687 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800688 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700689 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690out2:
691 rt->u.dst.lastuse = jiffies;
692 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700693
694 return rt;
695}
696
697void ip6_route_input(struct sk_buff *skb)
698{
699 struct ipv6hdr *iph = skb->nh.ipv6h;
700 struct flowi fl = {
701 .iif = skb->dev->ifindex,
702 .nl_u = {
703 .ip6_u = {
704 .daddr = iph->daddr,
705 .saddr = iph->saddr,
David S. Miller267935b2006-08-25 16:07:48 -0700706#ifdef CONFIG_IPV6_ROUTE_FWMARK
YOSHIFUJI Hideaki75bff8f2006-08-21 19:22:01 +0900707 .fwmark = skb->nfmark,
David S. Miller267935b2006-08-25 16:07:48 -0700708#endif
Thomas Grafc71099a2006-08-04 23:20:06 -0700709 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
710 },
711 },
712 .proto = iph->nexthdr,
713 };
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700714 int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700715
716 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
717}
718
719static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
720 struct flowi *fl, int flags)
721{
722 struct fib6_node *fn;
723 struct rt6_info *rt, *nrt;
724 int strict = 0;
725 int attempts = 3;
726 int err;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700727 int reachable = RT6_LOOKUP_F_REACHABLE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700728
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700729 strict |= flags & RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700730
731relookup:
732 read_lock_bh(&table->tb6_lock);
733
734restart_2:
735 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
736
737restart:
738 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700739 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700740 if (rt == &ip6_null_entry ||
741 rt->rt6i_flags & RTF_CACHE)
742 goto out;
743
744 dst_hold(&rt->u.dst);
745 read_unlock_bh(&table->tb6_lock);
746
747 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
748 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
749 else {
750#if CLONE_OFFLINK_ROUTE
751 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
752#else
753 goto out2;
754#endif
755 }
756
757 dst_release(&rt->u.dst);
758 rt = nrt ? : &ip6_null_entry;
759
760 dst_hold(&rt->u.dst);
761 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700762 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700763 if (!err)
764 goto out2;
765 }
766
767 if (--attempts <= 0)
768 goto out2;
769
770 /*
771 * Race condition! In the gap, when table->tb6_lock was
772 * released someone could insert this route. Relookup.
773 */
774 dst_release(&rt->u.dst);
775 goto relookup;
776
777out:
778 if (reachable) {
779 reachable = 0;
780 goto restart_2;
781 }
782 dst_hold(&rt->u.dst);
783 read_unlock_bh(&table->tb6_lock);
784out2:
785 rt->u.dst.lastuse = jiffies;
786 rt->u.dst.__use++;
787 return rt;
788}
789
790struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
791{
792 int flags = 0;
793
794 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700795 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700796
797 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798}
799
800
801/*
802 * Destination cache support functions
803 */
804
805static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
806{
807 struct rt6_info *rt;
808
809 rt = (struct rt6_info *) dst;
810
811 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
812 return dst;
813
814 return NULL;
815}
816
817static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
818{
819 struct rt6_info *rt = (struct rt6_info *) dst;
820
821 if (rt) {
822 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700823 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 else
825 dst_release(dst);
826 }
827 return NULL;
828}
829
830static void ip6_link_failure(struct sk_buff *skb)
831{
832 struct rt6_info *rt;
833
834 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
835
836 rt = (struct rt6_info *) skb->dst;
837 if (rt) {
838 if (rt->rt6i_flags&RTF_CACHE) {
839 dst_set_expires(&rt->u.dst, 0);
840 rt->rt6i_flags |= RTF_EXPIRES;
841 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
842 rt->rt6i_node->fn_sernum = -1;
843 }
844}
845
846static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
847{
848 struct rt6_info *rt6 = (struct rt6_info*)dst;
849
850 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
851 rt6->rt6i_flags |= RTF_MODIFIED;
852 if (mtu < IPV6_MIN_MTU) {
853 mtu = IPV6_MIN_MTU;
854 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
855 }
856 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700857 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 }
859}
860
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861static int ipv6_get_mtu(struct net_device *dev);
862
863static inline unsigned int ipv6_advmss(unsigned int mtu)
864{
865 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
866
867 if (mtu < ip6_rt_min_advmss)
868 mtu = ip6_rt_min_advmss;
869
870 /*
871 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
872 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
873 * IPV6_MAXPLEN is also valid and means: "any MSS,
874 * rely only on pmtu discovery"
875 */
876 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
877 mtu = IPV6_MAXPLEN;
878 return mtu;
879}
880
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700881static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700882static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700883
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
885 struct neighbour *neigh,
886 struct in6_addr *addr,
887 int (*output)(struct sk_buff *))
888{
889 struct rt6_info *rt;
890 struct inet6_dev *idev = in6_dev_get(dev);
891
892 if (unlikely(idev == NULL))
893 return NULL;
894
895 rt = ip6_dst_alloc();
896 if (unlikely(rt == NULL)) {
897 in6_dev_put(idev);
898 goto out;
899 }
900
901 dev_hold(dev);
902 if (neigh)
903 neigh_hold(neigh);
904 else
905 neigh = ndisc_get_neigh(dev, addr);
906
907 rt->rt6i_dev = dev;
908 rt->rt6i_idev = idev;
909 rt->rt6i_nexthop = neigh;
910 atomic_set(&rt->u.dst.__refcnt, 1);
911 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
912 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
913 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
914 rt->u.dst.output = output;
915
916#if 0 /* there's no chance to use these for ndisc */
917 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
918 ? DST_HOST
919 : 0;
920 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
921 rt->rt6i_dst.plen = 128;
922#endif
923
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700924 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925 rt->u.dst.next = ndisc_dst_gc_list;
926 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700927 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928
929 fib6_force_start_gc();
930
931out:
932 return (struct dst_entry *)rt;
933}
934
935int ndisc_dst_gc(int *more)
936{
937 struct dst_entry *dst, *next, **pprev;
938 int freed;
939
940 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700941 freed = 0;
942
943 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 while ((dst = *pprev) != NULL) {
947 if (!atomic_read(&dst->__refcnt)) {
948 *pprev = dst->next;
949 dst_free(dst);
950 freed++;
951 } else {
952 pprev = &dst->next;
953 (*more)++;
954 }
955 }
956
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700957 spin_unlock_bh(&ndisc_lock);
958
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 return freed;
960}
961
962static int ip6_dst_gc(void)
963{
964 static unsigned expire = 30*HZ;
965 static unsigned long last_gc;
966 unsigned long now = jiffies;
967
968 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
969 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
970 goto out;
971
972 expire++;
973 fib6_run_gc(expire);
974 last_gc = now;
975 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
976 expire = ip6_rt_gc_timeout>>1;
977
978out:
979 expire -= expire>>ip6_rt_gc_elasticity;
980 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
981}
982
983/* Clean host part of a prefix. Not necessary in radix tree,
984 but results in cleaner routing tables.
985
986 Remove it only when all the things will work!
987 */
988
989static int ipv6_get_mtu(struct net_device *dev)
990{
991 int mtu = IPV6_MIN_MTU;
992 struct inet6_dev *idev;
993
994 idev = in6_dev_get(dev);
995 if (idev) {
996 mtu = idev->cnf.mtu6;
997 in6_dev_put(idev);
998 }
999 return mtu;
1000}
1001
1002int ipv6_get_hoplimit(struct net_device *dev)
1003{
1004 int hoplimit = ipv6_devconf.hop_limit;
1005 struct inet6_dev *idev;
1006
1007 idev = in6_dev_get(dev);
1008 if (idev) {
1009 hoplimit = idev->cnf.hop_limit;
1010 in6_dev_put(idev);
1011 }
1012 return hoplimit;
1013}
1014
1015/*
1016 *
1017 */
1018
Thomas Graf86872cb2006-08-22 00:01:08 -07001019int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020{
1021 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 struct rt6_info *rt = NULL;
1023 struct net_device *dev = NULL;
1024 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001025 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 int addr_type;
1027
Thomas Graf86872cb2006-08-22 00:01:08 -07001028 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 return -EINVAL;
1030#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001031 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 return -EINVAL;
1033#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001034 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035 err = -ENODEV;
Thomas Graf86872cb2006-08-22 00:01:08 -07001036 dev = dev_get_by_index(cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 if (!dev)
1038 goto out;
1039 idev = in6_dev_get(dev);
1040 if (!idev)
1041 goto out;
1042 }
1043
Thomas Graf86872cb2006-08-22 00:01:08 -07001044 if (cfg->fc_metric == 0)
1045 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046
Thomas Graf86872cb2006-08-22 00:01:08 -07001047 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001048 if (table == NULL) {
1049 err = -ENOBUFS;
1050 goto out;
1051 }
1052
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 rt = ip6_dst_alloc();
1054
1055 if (rt == NULL) {
1056 err = -ENOMEM;
1057 goto out;
1058 }
1059
1060 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001061 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062
Thomas Graf86872cb2006-08-22 00:01:08 -07001063 if (cfg->fc_protocol == RTPROT_UNSPEC)
1064 cfg->fc_protocol = RTPROT_BOOT;
1065 rt->rt6i_protocol = cfg->fc_protocol;
1066
1067 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068
1069 if (addr_type & IPV6_ADDR_MULTICAST)
1070 rt->u.dst.input = ip6_mc_input;
1071 else
1072 rt->u.dst.input = ip6_forward;
1073
1074 rt->u.dst.output = ip6_output;
1075
Thomas Graf86872cb2006-08-22 00:01:08 -07001076 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1077 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 if (rt->rt6i_dst.plen == 128)
1079 rt->u.dst.flags = DST_HOST;
1080
1081#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001082 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1083 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084#endif
1085
Thomas Graf86872cb2006-08-22 00:01:08 -07001086 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087
1088 /* We cannot add true routes via loopback here,
1089 they would result in kernel looping; promote them to reject routes
1090 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001091 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1093 /* hold loopback dev/idev if we haven't done so. */
1094 if (dev != &loopback_dev) {
1095 if (dev) {
1096 dev_put(dev);
1097 in6_dev_put(idev);
1098 }
1099 dev = &loopback_dev;
1100 dev_hold(dev);
1101 idev = in6_dev_get(dev);
1102 if (!idev) {
1103 err = -ENODEV;
1104 goto out;
1105 }
1106 }
1107 rt->u.dst.output = ip6_pkt_discard_out;
1108 rt->u.dst.input = ip6_pkt_discard;
1109 rt->u.dst.error = -ENETUNREACH;
1110 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1111 goto install_route;
1112 }
1113
Thomas Graf86872cb2006-08-22 00:01:08 -07001114 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 struct in6_addr *gw_addr;
1116 int gwa_type;
1117
Thomas Graf86872cb2006-08-22 00:01:08 -07001118 gw_addr = &cfg->fc_gateway;
1119 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 gwa_type = ipv6_addr_type(gw_addr);
1121
1122 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1123 struct rt6_info *grt;
1124
1125 /* IPv6 strictly inhibits using not link-local
1126 addresses as nexthop address.
1127 Otherwise, router will not able to send redirects.
1128 It is very good, but in some (rare!) circumstances
1129 (SIT, PtP, NBMA NOARP links) it is handy to allow
1130 some exceptions. --ANK
1131 */
1132 err = -EINVAL;
1133 if (!(gwa_type&IPV6_ADDR_UNICAST))
1134 goto out;
1135
Thomas Graf86872cb2006-08-22 00:01:08 -07001136 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137
1138 err = -EHOSTUNREACH;
1139 if (grt == NULL)
1140 goto out;
1141 if (dev) {
1142 if (dev != grt->rt6i_dev) {
1143 dst_release(&grt->u.dst);
1144 goto out;
1145 }
1146 } else {
1147 dev = grt->rt6i_dev;
1148 idev = grt->rt6i_idev;
1149 dev_hold(dev);
1150 in6_dev_hold(grt->rt6i_idev);
1151 }
1152 if (!(grt->rt6i_flags&RTF_GATEWAY))
1153 err = 0;
1154 dst_release(&grt->u.dst);
1155
1156 if (err)
1157 goto out;
1158 }
1159 err = -EINVAL;
1160 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1161 goto out;
1162 }
1163
1164 err = -ENODEV;
1165 if (dev == NULL)
1166 goto out;
1167
Thomas Graf86872cb2006-08-22 00:01:08 -07001168 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1170 if (IS_ERR(rt->rt6i_nexthop)) {
1171 err = PTR_ERR(rt->rt6i_nexthop);
1172 rt->rt6i_nexthop = NULL;
1173 goto out;
1174 }
1175 }
1176
Thomas Graf86872cb2006-08-22 00:01:08 -07001177 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178
1179install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001180 if (cfg->fc_mx) {
1181 struct nlattr *nla;
1182 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183
Thomas Graf86872cb2006-08-22 00:01:08 -07001184 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1185 int type = nla->nla_type;
1186
1187 if (type) {
1188 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 err = -EINVAL;
1190 goto out;
1191 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001192
1193 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 }
1196 }
1197
1198 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1199 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1200 if (!rt->u.dst.metrics[RTAX_MTU-1])
1201 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1202 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1203 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1204 rt->u.dst.dev = dev;
1205 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001206 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001207 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208
1209out:
1210 if (dev)
1211 dev_put(dev);
1212 if (idev)
1213 in6_dev_put(idev);
1214 if (rt)
1215 dst_free((struct dst_entry *) rt);
1216 return err;
1217}
1218
Thomas Graf86872cb2006-08-22 00:01:08 -07001219static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220{
1221 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001222 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223
Patrick McHardy6c813a72006-08-06 22:22:47 -07001224 if (rt == &ip6_null_entry)
1225 return -ENOENT;
1226
Thomas Grafc71099a2006-08-04 23:20:06 -07001227 table = rt->rt6i_table;
1228 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229
Thomas Graf86872cb2006-08-22 00:01:08 -07001230 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 dst_release(&rt->u.dst);
1232
Thomas Grafc71099a2006-08-04 23:20:06 -07001233 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
1235 return err;
1236}
1237
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001238int ip6_del_rt(struct rt6_info *rt)
1239{
Thomas Graf86872cb2006-08-22 00:01:08 -07001240 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001241}
1242
Thomas Graf86872cb2006-08-22 00:01:08 -07001243static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244{
Thomas Grafc71099a2006-08-04 23:20:06 -07001245 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 struct fib6_node *fn;
1247 struct rt6_info *rt;
1248 int err = -ESRCH;
1249
Thomas Graf86872cb2006-08-22 00:01:08 -07001250 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001251 if (table == NULL)
1252 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253
Thomas Grafc71099a2006-08-04 23:20:06 -07001254 read_lock_bh(&table->tb6_lock);
1255
1256 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001257 &cfg->fc_dst, cfg->fc_dst_len,
1258 &cfg->fc_src, cfg->fc_src_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259
1260 if (fn) {
1261 for (rt = fn->leaf; rt; rt = rt->u.next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001262 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001264 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001266 if (cfg->fc_flags & RTF_GATEWAY &&
1267 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001269 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 continue;
1271 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001272 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 }
1276 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001277 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278
1279 return err;
1280}
1281
1282/*
1283 * Handle redirects
1284 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001285struct ip6rd_flowi {
1286 struct flowi fl;
1287 struct in6_addr gateway;
1288};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001290static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1291 struct flowi *fl,
1292 int flags)
1293{
1294 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1295 struct rt6_info *rt;
1296 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001297
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001299 * Get the "current" route for this destination and
1300 * check if the redirect has come from approriate router.
1301 *
1302 * RFC 2461 specifies that redirects should only be
1303 * accepted if they come from the nexthop to the target.
1304 * Due to the way the routes are chosen, this notion
1305 * is a bit fuzzy and one might need to check all possible
1306 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308
Thomas Grafc71099a2006-08-04 23:20:06 -07001309 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001310 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001311restart:
1312 for (rt = fn->leaf; rt; rt = rt->u.next) {
1313 /*
1314 * Current route is on-link; redirect is always invalid.
1315 *
1316 * Seems, previous statement is not true. It could
1317 * be node, which looks for us as on-link (f.e. proxy ndisc)
1318 * But then router serving it might decide, that we should
1319 * know truth 8)8) --ANK (980726).
1320 */
1321 if (rt6_check_expired(rt))
1322 continue;
1323 if (!(rt->rt6i_flags & RTF_GATEWAY))
1324 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001325 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001326 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001327 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001328 continue;
1329 break;
1330 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001331
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001332 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001333 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001334 BACKTRACK(&fl->fl6_src);
1335out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001336 dst_hold(&rt->u.dst);
1337
1338 read_unlock_bh(&table->tb6_lock);
1339
1340 return rt;
1341};
1342
1343static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1344 struct in6_addr *src,
1345 struct in6_addr *gateway,
1346 struct net_device *dev)
1347{
1348 struct ip6rd_flowi rdfl = {
1349 .fl = {
1350 .oif = dev->ifindex,
1351 .nl_u = {
1352 .ip6_u = {
1353 .daddr = *dest,
1354 .saddr = *src,
1355 },
1356 },
1357 },
1358 .gateway = *gateway,
1359 };
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001360 int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001361
1362 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1363}
1364
1365void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1366 struct in6_addr *saddr,
1367 struct neighbour *neigh, u8 *lladdr, int on_link)
1368{
1369 struct rt6_info *rt, *nrt = NULL;
1370 struct netevent_redirect netevent;
1371
1372 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1373
1374 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 if (net_ratelimit())
1376 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1377 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001378 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 }
1380
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 /*
1382 * We have finally decided to accept it.
1383 */
1384
1385 neigh_update(neigh, lladdr, NUD_STALE,
1386 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1387 NEIGH_UPDATE_F_OVERRIDE|
1388 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1389 NEIGH_UPDATE_F_ISROUTER))
1390 );
1391
1392 /*
1393 * Redirect received -> path was valid.
1394 * Look, redirects are sent only in response to data packets,
1395 * so that this nexthop apparently is reachable. --ANK
1396 */
1397 dst_confirm(&rt->u.dst);
1398
1399 /* Duplicate redirect: silently ignore. */
1400 if (neigh == rt->u.dst.neighbour)
1401 goto out;
1402
1403 nrt = ip6_rt_copy(rt);
1404 if (nrt == NULL)
1405 goto out;
1406
1407 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1408 if (on_link)
1409 nrt->rt6i_flags &= ~RTF_GATEWAY;
1410
1411 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1412 nrt->rt6i_dst.plen = 128;
1413 nrt->u.dst.flags |= DST_HOST;
1414
1415 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1416 nrt->rt6i_nexthop = neigh_clone(neigh);
1417 /* Reset pmtu, it may be better */
1418 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1419 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1420
Thomas Graf40e22e82006-08-22 00:00:45 -07001421 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 goto out;
1423
Tom Tucker8d717402006-07-30 20:43:36 -07001424 netevent.old = &rt->u.dst;
1425 netevent.new = &nrt->u.dst;
1426 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1427
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001429 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 return;
1431 }
1432
1433out:
1434 dst_release(&rt->u.dst);
1435 return;
1436}
1437
1438/*
1439 * Handle ICMP "packet too big" messages
1440 * i.e. Path MTU discovery
1441 */
1442
1443void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1444 struct net_device *dev, u32 pmtu)
1445{
1446 struct rt6_info *rt, *nrt;
1447 int allfrag = 0;
1448
1449 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1450 if (rt == NULL)
1451 return;
1452
1453 if (pmtu >= dst_mtu(&rt->u.dst))
1454 goto out;
1455
1456 if (pmtu < IPV6_MIN_MTU) {
1457 /*
1458 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1459 * MTU (1280) and a fragment header should always be included
1460 * after a node receiving Too Big message reporting PMTU is
1461 * less than the IPv6 Minimum Link MTU.
1462 */
1463 pmtu = IPV6_MIN_MTU;
1464 allfrag = 1;
1465 }
1466
1467 /* New mtu received -> path was valid.
1468 They are sent only in response to data packets,
1469 so that this nexthop apparently is reachable. --ANK
1470 */
1471 dst_confirm(&rt->u.dst);
1472
1473 /* Host route. If it is static, it would be better
1474 not to override it, but add new one, so that
1475 when cache entry will expire old pmtu
1476 would return automatically.
1477 */
1478 if (rt->rt6i_flags & RTF_CACHE) {
1479 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1480 if (allfrag)
1481 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1482 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1483 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1484 goto out;
1485 }
1486
1487 /* Network route.
1488 Two cases are possible:
1489 1. It is connected route. Action: COW
1490 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1491 */
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001492 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001493 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001494 else
1495 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001496
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001497 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001498 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1499 if (allfrag)
1500 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1501
1502 /* According to RFC 1981, detecting PMTU increase shouldn't be
1503 * happened within 5 mins, the recommended timer is 10 mins.
1504 * Here this route expiration time is set to ip6_rt_mtu_expires
1505 * which is 10 mins. After 10 mins the decreased pmtu is expired
1506 * and detecting PMTU increase will be automatically happened.
1507 */
1508 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1509 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1510
Thomas Graf40e22e82006-08-22 00:00:45 -07001511 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513out:
1514 dst_release(&rt->u.dst);
1515}
1516
1517/*
1518 * Misc support functions
1519 */
1520
1521static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1522{
1523 struct rt6_info *rt = ip6_dst_alloc();
1524
1525 if (rt) {
1526 rt->u.dst.input = ort->u.dst.input;
1527 rt->u.dst.output = ort->u.dst.output;
1528
1529 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1530 rt->u.dst.dev = ort->u.dst.dev;
1531 if (rt->u.dst.dev)
1532 dev_hold(rt->u.dst.dev);
1533 rt->rt6i_idev = ort->rt6i_idev;
1534 if (rt->rt6i_idev)
1535 in6_dev_hold(rt->rt6i_idev);
1536 rt->u.dst.lastuse = jiffies;
1537 rt->rt6i_expires = 0;
1538
1539 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1540 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1541 rt->rt6i_metric = 0;
1542
1543 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1544#ifdef CONFIG_IPV6_SUBTREES
1545 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1546#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001547 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 }
1549 return rt;
1550}
1551
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001552#ifdef CONFIG_IPV6_ROUTE_INFO
1553static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1554 struct in6_addr *gwaddr, int ifindex)
1555{
1556 struct fib6_node *fn;
1557 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001558 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001559
Thomas Grafc71099a2006-08-04 23:20:06 -07001560 table = fib6_get_table(RT6_TABLE_INFO);
1561 if (table == NULL)
1562 return NULL;
1563
1564 write_lock_bh(&table->tb6_lock);
1565 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001566 if (!fn)
1567 goto out;
1568
1569 for (rt = fn->leaf; rt; rt = rt->u.next) {
1570 if (rt->rt6i_dev->ifindex != ifindex)
1571 continue;
1572 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1573 continue;
1574 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1575 continue;
1576 dst_hold(&rt->u.dst);
1577 break;
1578 }
1579out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001580 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001581 return rt;
1582}
1583
1584static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1585 struct in6_addr *gwaddr, int ifindex,
1586 unsigned pref)
1587{
Thomas Graf86872cb2006-08-22 00:01:08 -07001588 struct fib6_config cfg = {
1589 .fc_table = RT6_TABLE_INFO,
1590 .fc_metric = 1024,
1591 .fc_ifindex = ifindex,
1592 .fc_dst_len = prefixlen,
1593 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1594 RTF_UP | RTF_PREF(pref),
1595 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001596
Thomas Graf86872cb2006-08-22 00:01:08 -07001597 ipv6_addr_copy(&cfg.fc_dst, prefix);
1598 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1599
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001600 /* We should treat it as a default route if prefix length is 0. */
1601 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001602 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001603
Thomas Graf86872cb2006-08-22 00:01:08 -07001604 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001605
1606 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1607}
1608#endif
1609
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1611{
1612 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001613 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614
Thomas Grafc71099a2006-08-04 23:20:06 -07001615 table = fib6_get_table(RT6_TABLE_DFLT);
1616 if (table == NULL)
1617 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618
Thomas Grafc71099a2006-08-04 23:20:06 -07001619 write_lock_bh(&table->tb6_lock);
1620 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001622 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1624 break;
1625 }
1626 if (rt)
1627 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001628 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 return rt;
1630}
1631
1632struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001633 struct net_device *dev,
1634 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635{
Thomas Graf86872cb2006-08-22 00:01:08 -07001636 struct fib6_config cfg = {
1637 .fc_table = RT6_TABLE_DFLT,
1638 .fc_metric = 1024,
1639 .fc_ifindex = dev->ifindex,
1640 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1641 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1642 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643
Thomas Graf86872cb2006-08-22 00:01:08 -07001644 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645
Thomas Graf86872cb2006-08-22 00:01:08 -07001646 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 return rt6_get_dflt_router(gwaddr, dev);
1649}
1650
1651void rt6_purge_dflt_routers(void)
1652{
1653 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001654 struct fib6_table *table;
1655
1656 /* NOTE: Keep consistent with rt6_get_dflt_router */
1657 table = fib6_get_table(RT6_TABLE_DFLT);
1658 if (table == NULL)
1659 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660
1661restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001662 read_lock_bh(&table->tb6_lock);
1663 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1665 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001666 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001667 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 goto restart;
1669 }
1670 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001671 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672}
1673
Thomas Graf86872cb2006-08-22 00:01:08 -07001674static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1675 struct fib6_config *cfg)
1676{
1677 memset(cfg, 0, sizeof(*cfg));
1678
1679 cfg->fc_table = RT6_TABLE_MAIN;
1680 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1681 cfg->fc_metric = rtmsg->rtmsg_metric;
1682 cfg->fc_expires = rtmsg->rtmsg_info;
1683 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1684 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1685 cfg->fc_flags = rtmsg->rtmsg_flags;
1686
1687 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1688 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1689 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1690}
1691
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1693{
Thomas Graf86872cb2006-08-22 00:01:08 -07001694 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 struct in6_rtmsg rtmsg;
1696 int err;
1697
1698 switch(cmd) {
1699 case SIOCADDRT: /* Add a route */
1700 case SIOCDELRT: /* Delete a route */
1701 if (!capable(CAP_NET_ADMIN))
1702 return -EPERM;
1703 err = copy_from_user(&rtmsg, arg,
1704 sizeof(struct in6_rtmsg));
1705 if (err)
1706 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001707
1708 rtmsg_to_fib6_config(&rtmsg, &cfg);
1709
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 rtnl_lock();
1711 switch (cmd) {
1712 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001713 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 break;
1715 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001716 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 break;
1718 default:
1719 err = -EINVAL;
1720 }
1721 rtnl_unlock();
1722
1723 return err;
1724 };
1725
1726 return -EINVAL;
1727}
1728
1729/*
1730 * Drop the packet on the floor
1731 */
1732
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001733static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734{
Lv Liangying76d0cc12006-08-29 00:00:47 -07001735 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1736 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1737 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1738
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1740 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1741 kfree_skb(skb);
1742 return 0;
1743}
1744
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001745static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746{
1747 skb->dev = skb->dst->dev;
1748 return ip6_pkt_discard(skb);
1749}
1750
1751/*
1752 * Allocate a dst for local (unicast / anycast) address.
1753 */
1754
1755struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1756 const struct in6_addr *addr,
1757 int anycast)
1758{
1759 struct rt6_info *rt = ip6_dst_alloc();
1760
1761 if (rt == NULL)
1762 return ERR_PTR(-ENOMEM);
1763
1764 dev_hold(&loopback_dev);
1765 in6_dev_hold(idev);
1766
1767 rt->u.dst.flags = DST_HOST;
1768 rt->u.dst.input = ip6_input;
1769 rt->u.dst.output = ip6_output;
1770 rt->rt6i_dev = &loopback_dev;
1771 rt->rt6i_idev = idev;
1772 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1773 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1774 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1775 rt->u.dst.obsolete = -1;
1776
1777 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001778 if (anycast)
1779 rt->rt6i_flags |= RTF_ANYCAST;
1780 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 rt->rt6i_flags |= RTF_LOCAL;
1782 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1783 if (rt->rt6i_nexthop == NULL) {
1784 dst_free((struct dst_entry *) rt);
1785 return ERR_PTR(-ENOMEM);
1786 }
1787
1788 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1789 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001790 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791
1792 atomic_set(&rt->u.dst.__refcnt, 1);
1793
1794 return rt;
1795}
1796
1797static int fib6_ifdown(struct rt6_info *rt, void *arg)
1798{
1799 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1800 rt != &ip6_null_entry) {
1801 RT6_TRACE("deleted by ifdown %p\n", rt);
1802 return -1;
1803 }
1804 return 0;
1805}
1806
1807void rt6_ifdown(struct net_device *dev)
1808{
Thomas Grafc71099a2006-08-04 23:20:06 -07001809 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810}
1811
1812struct rt6_mtu_change_arg
1813{
1814 struct net_device *dev;
1815 unsigned mtu;
1816};
1817
1818static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1819{
1820 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1821 struct inet6_dev *idev;
1822
1823 /* In IPv6 pmtu discovery is not optional,
1824 so that RTAX_MTU lock cannot disable it.
1825 We still use this lock to block changes
1826 caused by addrconf/ndisc.
1827 */
1828
1829 idev = __in6_dev_get(arg->dev);
1830 if (idev == NULL)
1831 return 0;
1832
1833 /* For administrative MTU increase, there is no way to discover
1834 IPv6 PMTU increase, so PMTU increase should be updated here.
1835 Since RFC 1981 doesn't include administrative MTU increase
1836 update PMTU increase is a MUST. (i.e. jumbo frame)
1837 */
1838 /*
1839 If new MTU is less than route PMTU, this new MTU will be the
1840 lowest MTU in the path, update the route PMTU to reflect PMTU
1841 decreases; if new MTU is greater than route PMTU, and the
1842 old MTU is the lowest MTU in the path, update the route PMTU
1843 to reflect the increase. In this case if the other nodes' MTU
1844 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1845 PMTU discouvery.
1846 */
1847 if (rt->rt6i_dev == arg->dev &&
1848 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1849 (dst_mtu(&rt->u.dst) > arg->mtu ||
1850 (dst_mtu(&rt->u.dst) < arg->mtu &&
1851 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1852 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1853 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1854 return 0;
1855}
1856
1857void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1858{
Thomas Grafc71099a2006-08-04 23:20:06 -07001859 struct rt6_mtu_change_arg arg = {
1860 .dev = dev,
1861 .mtu = mtu,
1862 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863
Thomas Grafc71099a2006-08-04 23:20:06 -07001864 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865}
1866
Thomas Graf86872cb2006-08-22 00:01:08 -07001867static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1868 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
1869 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001870 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001871 [RTA_PRIORITY] = { .type = NLA_U32 },
1872 [RTA_METRICS] = { .type = NLA_NESTED },
1873};
1874
1875static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1876 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877{
Thomas Graf86872cb2006-08-22 00:01:08 -07001878 struct rtmsg *rtm;
1879 struct nlattr *tb[RTA_MAX+1];
1880 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881
Thomas Graf86872cb2006-08-22 00:01:08 -07001882 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1883 if (err < 0)
1884 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
Thomas Graf86872cb2006-08-22 00:01:08 -07001886 err = -EINVAL;
1887 rtm = nlmsg_data(nlh);
1888 memset(cfg, 0, sizeof(*cfg));
1889
1890 cfg->fc_table = rtm->rtm_table;
1891 cfg->fc_dst_len = rtm->rtm_dst_len;
1892 cfg->fc_src_len = rtm->rtm_src_len;
1893 cfg->fc_flags = RTF_UP;
1894 cfg->fc_protocol = rtm->rtm_protocol;
1895
1896 if (rtm->rtm_type == RTN_UNREACHABLE)
1897 cfg->fc_flags |= RTF_REJECT;
1898
1899 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1900 cfg->fc_nlinfo.nlh = nlh;
1901
1902 if (tb[RTA_GATEWAY]) {
1903 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1904 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001906
1907 if (tb[RTA_DST]) {
1908 int plen = (rtm->rtm_dst_len + 7) >> 3;
1909
1910 if (nla_len(tb[RTA_DST]) < plen)
1911 goto errout;
1912
1913 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001915
1916 if (tb[RTA_SRC]) {
1917 int plen = (rtm->rtm_src_len + 7) >> 3;
1918
1919 if (nla_len(tb[RTA_SRC]) < plen)
1920 goto errout;
1921
1922 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001924
1925 if (tb[RTA_OIF])
1926 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1927
1928 if (tb[RTA_PRIORITY])
1929 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1930
1931 if (tb[RTA_METRICS]) {
1932 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1933 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001935
1936 if (tb[RTA_TABLE])
1937 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1938
1939 err = 0;
1940errout:
1941 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942}
1943
1944int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1945{
Thomas Graf86872cb2006-08-22 00:01:08 -07001946 struct fib6_config cfg;
1947 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948
Thomas Graf86872cb2006-08-22 00:01:08 -07001949 err = rtm_to_fib6_config(skb, nlh, &cfg);
1950 if (err < 0)
1951 return err;
1952
1953 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954}
1955
1956int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1957{
Thomas Graf86872cb2006-08-22 00:01:08 -07001958 struct fib6_config cfg;
1959 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960
Thomas Graf86872cb2006-08-22 00:01:08 -07001961 err = rtm_to_fib6_config(skb, nlh, &cfg);
1962 if (err < 0)
1963 return err;
1964
1965 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966}
1967
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001969 struct in6_addr *dst, struct in6_addr *src,
1970 int iif, int type, u32 pid, u32 seq,
1971 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972{
1973 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07001974 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 struct rta_cacheinfo ci;
Patrick McHardy9e762a42006-08-10 23:09:48 -07001976 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977
1978 if (prefix) { /* user wants prefix routes only */
1979 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1980 /* success since this is not a prefix route */
1981 return 1;
1982 }
1983 }
1984
Thomas Graf2d7202b2006-08-22 00:01:27 -07001985 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1986 if (nlh == NULL)
1987 return -ENOBUFS;
1988
1989 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990 rtm->rtm_family = AF_INET6;
1991 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1992 rtm->rtm_src_len = rt->rt6i_src.plen;
1993 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07001994 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07001995 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07001996 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07001997 table = RT6_TABLE_UNSPEC;
1998 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07001999 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 if (rt->rt6i_flags&RTF_REJECT)
2001 rtm->rtm_type = RTN_UNREACHABLE;
2002 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2003 rtm->rtm_type = RTN_LOCAL;
2004 else
2005 rtm->rtm_type = RTN_UNICAST;
2006 rtm->rtm_flags = 0;
2007 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2008 rtm->rtm_protocol = rt->rt6i_protocol;
2009 if (rt->rt6i_flags&RTF_DYNAMIC)
2010 rtm->rtm_protocol = RTPROT_REDIRECT;
2011 else if (rt->rt6i_flags & RTF_ADDRCONF)
2012 rtm->rtm_protocol = RTPROT_KERNEL;
2013 else if (rt->rt6i_flags&RTF_DEFAULT)
2014 rtm->rtm_protocol = RTPROT_RA;
2015
2016 if (rt->rt6i_flags&RTF_CACHE)
2017 rtm->rtm_flags |= RTM_F_CLONED;
2018
2019 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002020 NLA_PUT(skb, RTA_DST, 16, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021 rtm->rtm_dst_len = 128;
2022 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002023 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024#ifdef CONFIG_IPV6_SUBTREES
2025 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002026 NLA_PUT(skb, RTA_SRC, 16, src);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 rtm->rtm_src_len = 128;
2028 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002029 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030#endif
2031 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002032 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 else if (dst) {
2034 struct in6_addr saddr_buf;
2035 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002036 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002038
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002040 goto nla_put_failure;
2041
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002043 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2044
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002046 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2047
2048 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2050 if (rt->rt6i_expires)
2051 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2052 else
2053 ci.rta_expires = 0;
2054 ci.rta_used = rt->u.dst.__use;
2055 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2056 ci.rta_error = rt->u.dst.error;
2057 ci.rta_id = 0;
2058 ci.rta_ts = 0;
2059 ci.rta_tsage = 0;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002060 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061
Thomas Graf2d7202b2006-08-22 00:01:27 -07002062 return nlmsg_end(skb, nlh);
2063
2064nla_put_failure:
2065 return nlmsg_cancel(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066}
2067
Patrick McHardy1b43af52006-08-10 23:11:17 -07002068int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069{
2070 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2071 int prefix;
2072
Thomas Graf2d7202b2006-08-22 00:01:27 -07002073 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2074 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2076 } else
2077 prefix = 0;
2078
2079 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2080 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002081 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082}
2083
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2085{
Thomas Grafab364a62006-08-22 00:01:47 -07002086 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002088 struct sk_buff *skb;
2089 struct rtmsg *rtm;
2090 struct flowi fl;
2091 int err, iif = 0;
2092
2093 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2094 if (err < 0)
2095 goto errout;
2096
2097 err = -EINVAL;
2098 memset(&fl, 0, sizeof(fl));
2099
2100 if (tb[RTA_SRC]) {
2101 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2102 goto errout;
2103
2104 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2105 }
2106
2107 if (tb[RTA_DST]) {
2108 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2109 goto errout;
2110
2111 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2112 }
2113
2114 if (tb[RTA_IIF])
2115 iif = nla_get_u32(tb[RTA_IIF]);
2116
2117 if (tb[RTA_OIF])
2118 fl.oif = nla_get_u32(tb[RTA_OIF]);
2119
2120 if (iif) {
2121 struct net_device *dev;
2122 dev = __dev_get_by_index(iif);
2123 if (!dev) {
2124 err = -ENODEV;
2125 goto errout;
2126 }
2127 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128
2129 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002130 if (skb == NULL) {
2131 err = -ENOBUFS;
2132 goto errout;
2133 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134
2135 /* Reserve room for dummy headers, this skb can pass
2136 through good chunk of routing engine.
2137 */
2138 skb->mac.raw = skb->data;
2139 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2140
Thomas Grafab364a62006-08-22 00:01:47 -07002141 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142 skb->dst = &rt->u.dst;
2143
Thomas Grafab364a62006-08-22 00:01:47 -07002144 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002146 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002148 kfree_skb(skb);
2149 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150 }
2151
Thomas Graf2942e902006-08-15 00:30:25 -07002152 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002153errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155}
2156
Thomas Graf86872cb2006-08-22 00:01:08 -07002157void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158{
2159 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002160 u32 pid = 0, seq = 0;
2161 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002162 int payload = sizeof(struct rtmsg) + 256;
2163 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164
Thomas Graf86872cb2006-08-22 00:01:08 -07002165 if (info) {
2166 pid = info->pid;
2167 nlh = info->nlh;
2168 if (nlh)
2169 seq = nlh->nlmsg_seq;
2170 }
2171
Thomas Graf21713eb2006-08-15 00:35:24 -07002172 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2173 if (skb == NULL)
2174 goto errout;
2175
2176 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2177 if (err < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178 kfree_skb(skb);
Thomas Graf21713eb2006-08-15 00:35:24 -07002179 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002181
2182 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2183errout:
2184 if (err < 0)
2185 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186}
2187
2188/*
2189 * /proc
2190 */
2191
2192#ifdef CONFIG_PROC_FS
2193
2194#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2195
2196struct rt6_proc_arg
2197{
2198 char *buffer;
2199 int offset;
2200 int length;
2201 int skip;
2202 int len;
2203};
2204
2205static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2206{
2207 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2208 int i;
2209
2210 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2211 arg->skip++;
2212 return 0;
2213 }
2214
2215 if (arg->len >= arg->length)
2216 return 0;
2217
2218 for (i=0; i<16; i++) {
2219 sprintf(arg->buffer + arg->len, "%02x",
2220 rt->rt6i_dst.addr.s6_addr[i]);
2221 arg->len += 2;
2222 }
2223 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2224 rt->rt6i_dst.plen);
2225
2226#ifdef CONFIG_IPV6_SUBTREES
2227 for (i=0; i<16; i++) {
2228 sprintf(arg->buffer + arg->len, "%02x",
2229 rt->rt6i_src.addr.s6_addr[i]);
2230 arg->len += 2;
2231 }
2232 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2233 rt->rt6i_src.plen);
2234#else
2235 sprintf(arg->buffer + arg->len,
2236 "00000000000000000000000000000000 00 ");
2237 arg->len += 36;
2238#endif
2239
2240 if (rt->rt6i_nexthop) {
2241 for (i=0; i<16; i++) {
2242 sprintf(arg->buffer + arg->len, "%02x",
2243 rt->rt6i_nexthop->primary_key[i]);
2244 arg->len += 2;
2245 }
2246 } else {
2247 sprintf(arg->buffer + arg->len,
2248 "00000000000000000000000000000000");
2249 arg->len += 32;
2250 }
2251 arg->len += sprintf(arg->buffer + arg->len,
2252 " %08x %08x %08x %08x %8s\n",
2253 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2254 rt->u.dst.__use, rt->rt6i_flags,
2255 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2256 return 0;
2257}
2258
2259static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2260{
Thomas Grafc71099a2006-08-04 23:20:06 -07002261 struct rt6_proc_arg arg = {
2262 .buffer = buffer,
2263 .offset = offset,
2264 .length = length,
2265 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266
Thomas Grafc71099a2006-08-04 23:20:06 -07002267 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268
2269 *start = buffer;
2270 if (offset)
2271 *start += offset % RT6_INFO_LEN;
2272
2273 arg.len -= offset % RT6_INFO_LEN;
2274
2275 if (arg.len > length)
2276 arg.len = length;
2277 if (arg.len < 0)
2278 arg.len = 0;
2279
2280 return arg.len;
2281}
2282
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2284{
2285 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2286 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2287 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2288 rt6_stats.fib_rt_cache,
2289 atomic_read(&ip6_dst_ops.entries),
2290 rt6_stats.fib_discarded_routes);
2291
2292 return 0;
2293}
2294
2295static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2296{
2297 return single_open(file, rt6_stats_seq_show, NULL);
2298}
2299
2300static struct file_operations rt6_stats_seq_fops = {
2301 .owner = THIS_MODULE,
2302 .open = rt6_stats_seq_open,
2303 .read = seq_read,
2304 .llseek = seq_lseek,
2305 .release = single_release,
2306};
2307#endif /* CONFIG_PROC_FS */
2308
2309#ifdef CONFIG_SYSCTL
2310
2311static int flush_delay;
2312
2313static
2314int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2315 void __user *buffer, size_t *lenp, loff_t *ppos)
2316{
2317 if (write) {
2318 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2319 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2320 return 0;
2321 } else
2322 return -EINVAL;
2323}
2324
2325ctl_table ipv6_route_table[] = {
2326 {
2327 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2328 .procname = "flush",
2329 .data = &flush_delay,
2330 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002331 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 .proc_handler = &ipv6_sysctl_rtcache_flush
2333 },
2334 {
2335 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2336 .procname = "gc_thresh",
2337 .data = &ip6_dst_ops.gc_thresh,
2338 .maxlen = sizeof(int),
2339 .mode = 0644,
2340 .proc_handler = &proc_dointvec,
2341 },
2342 {
2343 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2344 .procname = "max_size",
2345 .data = &ip6_rt_max_size,
2346 .maxlen = sizeof(int),
2347 .mode = 0644,
2348 .proc_handler = &proc_dointvec,
2349 },
2350 {
2351 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2352 .procname = "gc_min_interval",
2353 .data = &ip6_rt_gc_min_interval,
2354 .maxlen = sizeof(int),
2355 .mode = 0644,
2356 .proc_handler = &proc_dointvec_jiffies,
2357 .strategy = &sysctl_jiffies,
2358 },
2359 {
2360 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2361 .procname = "gc_timeout",
2362 .data = &ip6_rt_gc_timeout,
2363 .maxlen = sizeof(int),
2364 .mode = 0644,
2365 .proc_handler = &proc_dointvec_jiffies,
2366 .strategy = &sysctl_jiffies,
2367 },
2368 {
2369 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2370 .procname = "gc_interval",
2371 .data = &ip6_rt_gc_interval,
2372 .maxlen = sizeof(int),
2373 .mode = 0644,
2374 .proc_handler = &proc_dointvec_jiffies,
2375 .strategy = &sysctl_jiffies,
2376 },
2377 {
2378 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2379 .procname = "gc_elasticity",
2380 .data = &ip6_rt_gc_elasticity,
2381 .maxlen = sizeof(int),
2382 .mode = 0644,
2383 .proc_handler = &proc_dointvec_jiffies,
2384 .strategy = &sysctl_jiffies,
2385 },
2386 {
2387 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2388 .procname = "mtu_expires",
2389 .data = &ip6_rt_mtu_expires,
2390 .maxlen = sizeof(int),
2391 .mode = 0644,
2392 .proc_handler = &proc_dointvec_jiffies,
2393 .strategy = &sysctl_jiffies,
2394 },
2395 {
2396 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2397 .procname = "min_adv_mss",
2398 .data = &ip6_rt_min_advmss,
2399 .maxlen = sizeof(int),
2400 .mode = 0644,
2401 .proc_handler = &proc_dointvec_jiffies,
2402 .strategy = &sysctl_jiffies,
2403 },
2404 {
2405 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2406 .procname = "gc_min_interval_ms",
2407 .data = &ip6_rt_gc_min_interval,
2408 .maxlen = sizeof(int),
2409 .mode = 0644,
2410 .proc_handler = &proc_dointvec_ms_jiffies,
2411 .strategy = &sysctl_ms_jiffies,
2412 },
2413 { .ctl_name = 0 }
2414};
2415
2416#endif
2417
2418void __init ip6_route_init(void)
2419{
2420 struct proc_dir_entry *p;
2421
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002422 ip6_dst_ops.kmem_cachep =
2423 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2424 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002425 fib6_init();
2426#ifdef CONFIG_PROC_FS
2427 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2428 if (p)
2429 p->owner = THIS_MODULE;
2430
2431 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2432#endif
2433#ifdef CONFIG_XFRM
2434 xfrm6_init();
2435#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002436#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2437 fib6_rules_init();
2438#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439}
2440
2441void ip6_route_cleanup(void)
2442{
Thomas Graf101367c2006-08-04 03:39:02 -07002443#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2444 fib6_rules_cleanup();
2445#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446#ifdef CONFIG_PROC_FS
2447 proc_net_remove("ipv6_route");
2448 proc_net_remove("rt6_stats");
2449#endif
2450#ifdef CONFIG_XFRM
2451 xfrm6_fini();
2452#endif
2453 rt6_ifdown(NULL);
2454 fib6_gc_cleanup();
2455 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2456}