blob: 438977e2085df2cc5bccc9e8237286d7c621b122 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
Thomas Graf101367c2006-08-04 03:39:02 -0700143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
158 }
159 },
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
163};
164
165struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187/* allocate dst with ip6_dst_ops */
188static __inline__ struct rt6_info *ip6_dst_alloc(void)
189{
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
201 }
202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
209
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
215 }
216 }
217}
218
219static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220{
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
223}
224
Thomas Grafc71099a2006-08-04 23:20:06 -0700225static inline int rt6_need_strict(struct in6_addr *daddr)
226{
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229}
230
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700232 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 */
234
235static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
238{
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
241
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
255 }
256 local = sprt;
257 }
258 }
259
260 if (local)
261 return local;
262
263 if (strict)
264 return &ip6_null_entry;
265 }
266 return rt;
267}
268
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800269#ifdef CONFIG_IPV6_ROUTER_PREF
270static void rt6_probe(struct rt6_info *rt)
271{
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273 /*
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
277 *
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
280 */
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800286 struct in6_addr mcaddr;
287 struct in6_addr *target;
288
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
291
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
297}
298#else
299static inline void rt6_probe(struct rt6_info *rt)
300{
301 return;
302}
303#endif
304
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800306 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800308static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317}
318
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800319static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700329 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800330 read_unlock_bh(&neigh->lock);
331 }
332 return m;
333}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
337{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700338 int m, n;
339
340 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800343#ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700346 n = rt6_check_neigh(rt);
347 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800348 m |= 16;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800350 return -1;
351 return m;
352}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
356{
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
364
365 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800367 rt = rt->u.next) {
368 int m;
369
370 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 continue;
372
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800373 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800379 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800380 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800381 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800383 } else {
384 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 }
386 }
387
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700392 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700393 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700397 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 }
399
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800403 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404}
405
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800406#ifdef CONFIG_IPV6_ROUTE_INFO
407int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
409{
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
415
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
418 }
419
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
428 }
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
432 }
433 }
434
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
445 }
446
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
455 }
456
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459 if (rt && !lifetime) {
460 ip6_del_rt(rt, NULL, NULL, NULL);
461 rt = NULL;
462 }
463
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
477 }
478 dst_release(&rt->u.dst);
479 }
480 return 0;
481}
482#endif
483
Thomas Grafc71099a2006-08-04 23:20:06 -0700484#define BACKTRACK() \
485if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486 while ((fn = fn->parent) != NULL) { \
487 if (fn->fn_flags & RTN_TL_ROOT) { \
488 dst_hold(&rt->u.dst); \
489 goto out; \
490 } \
491 if (fn->fn_flags & RTN_RTINFO) \
492 goto restart; \
493 } \
494}
495
496static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498{
499 struct fib6_node *fn;
500 struct rt6_info *rt;
501
Thomas Grafc71099a2006-08-04 23:20:06 -0700502 read_lock_bh(&table->tb6_lock);
503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
504restart:
505 rt = fn->leaf;
506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
507 BACKTRACK();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700509out:
510 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511
512 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700513 rt->u.dst.__use++;
514
515 return rt;
516
517}
518
519struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
520 int oif, int strict)
521{
522 struct flowi fl = {
523 .oif = oif,
524 .nl_u = {
525 .ip6_u = {
526 .daddr = *daddr,
527 /* TODO: saddr */
528 },
529 },
530 };
531 struct dst_entry *dst;
532 int flags = strict ? RT6_F_STRICT : 0;
533
534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
535 if (dst->error == 0)
536 return (struct rt6_info *) dst;
537
538 dst_release(dst);
539
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 return NULL;
541}
542
Thomas Grafc71099a2006-08-04 23:20:06 -0700543/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 It takes new route entry, the addition fails by any reason the
545 route is freed. In any case, if caller does not hold it, it may
546 be destroyed.
547 */
548
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700549int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
550 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551{
552 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700553 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554
Thomas Grafc71099a2006-08-04 23:20:06 -0700555 table = rt->rt6i_table;
556 write_lock_bh(&table->tb6_lock);
557 err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
558 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559
560 return err;
561}
562
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800563static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
564 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 struct rt6_info *rt;
567
568 /*
569 * Clone the route.
570 */
571
572 rt = ip6_rt_copy(ort);
573
574 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900575 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
576 if (rt->rt6i_dst.plen != 128 &&
577 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
578 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900580 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900582 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 rt->rt6i_dst.plen = 128;
584 rt->rt6i_flags |= RTF_CACHE;
585 rt->u.dst.flags |= DST_HOST;
586
587#ifdef CONFIG_IPV6_SUBTREES
588 if (rt->rt6i_src.plen && saddr) {
589 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
590 rt->rt6i_src.plen = 128;
591 }
592#endif
593
594 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
595
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800596 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800598 return rt;
599}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800601static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
602{
603 struct rt6_info *rt = ip6_rt_copy(ort);
604 if (rt) {
605 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
606 rt->rt6i_dst.plen = 128;
607 rt->rt6i_flags |= RTF_CACHE;
608 if (rt->rt6i_flags & RTF_REJECT)
609 rt->u.dst.error = ort->u.dst.error;
610 rt->u.dst.flags |= DST_HOST;
611 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
612 }
613 return rt;
614}
615
Thomas Grafc71099a2006-08-04 23:20:06 -0700616struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl,
617 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618{
619 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800620 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700621 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800623 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800624 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625
Thomas Grafc71099a2006-08-04 23:20:06 -0700626 if (flags & RT6_F_STRICT)
627 strict = RT6_SELECT_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628
629relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700630 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800632restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700633 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634
635restart:
Thomas Grafc71099a2006-08-04 23:20:06 -0700636 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800638 if (rt == &ip6_null_entry ||
639 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800640 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800642 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700643 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800644
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800646 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800647 else {
648#if CLONE_OFFLINK_ROUTE
649 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
650#else
651 goto out2;
652#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800654
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800655 dst_release(&rt->u.dst);
656 rt = nrt ? : &ip6_null_entry;
657
658 dst_hold(&rt->u.dst);
659 if (nrt) {
660 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
661 if (!err)
662 goto out2;
663 }
664
665 if (--attempts <= 0)
666 goto out2;
667
668 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700669 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800670 * released someone could insert this route. Relookup.
671 */
672 dst_release(&rt->u.dst);
673 goto relookup;
674
675out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800676 if (reachable) {
677 reachable = 0;
678 goto restart_2;
679 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800680 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700681 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682out2:
683 rt->u.dst.lastuse = jiffies;
684 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700685
686 return rt;
687}
688
689void ip6_route_input(struct sk_buff *skb)
690{
691 struct ipv6hdr *iph = skb->nh.ipv6h;
692 struct flowi fl = {
693 .iif = skb->dev->ifindex,
694 .nl_u = {
695 .ip6_u = {
696 .daddr = iph->daddr,
697 .saddr = iph->saddr,
698 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
699 },
700 },
701 .proto = iph->nexthdr,
702 };
703 int flags = 0;
704
705 if (rt6_need_strict(&iph->daddr))
706 flags |= RT6_F_STRICT;
707
708 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
709}
710
711static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
712 struct flowi *fl, int flags)
713{
714 struct fib6_node *fn;
715 struct rt6_info *rt, *nrt;
716 int strict = 0;
717 int attempts = 3;
718 int err;
719 int reachable = RT6_SELECT_F_REACHABLE;
720
721 if (flags & RT6_F_STRICT)
722 strict = RT6_SELECT_F_IFACE;
723
724relookup:
725 read_lock_bh(&table->tb6_lock);
726
727restart_2:
728 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
729
730restart:
731 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
732 BACKTRACK();
733 if (rt == &ip6_null_entry ||
734 rt->rt6i_flags & RTF_CACHE)
735 goto out;
736
737 dst_hold(&rt->u.dst);
738 read_unlock_bh(&table->tb6_lock);
739
740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
742 else {
743#if CLONE_OFFLINK_ROUTE
744 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
745#else
746 goto out2;
747#endif
748 }
749
750 dst_release(&rt->u.dst);
751 rt = nrt ? : &ip6_null_entry;
752
753 dst_hold(&rt->u.dst);
754 if (nrt) {
755 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
756 if (!err)
757 goto out2;
758 }
759
760 if (--attempts <= 0)
761 goto out2;
762
763 /*
764 * Race condition! In the gap, when table->tb6_lock was
765 * released someone could insert this route. Relookup.
766 */
767 dst_release(&rt->u.dst);
768 goto relookup;
769
770out:
771 if (reachable) {
772 reachable = 0;
773 goto restart_2;
774 }
775 dst_hold(&rt->u.dst);
776 read_unlock_bh(&table->tb6_lock);
777out2:
778 rt->u.dst.lastuse = jiffies;
779 rt->u.dst.__use++;
780 return rt;
781}
782
783struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
784{
785 int flags = 0;
786
787 if (rt6_need_strict(&fl->fl6_dst))
788 flags |= RT6_F_STRICT;
789
790 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791}
792
793
794/*
795 * Destination cache support functions
796 */
797
798static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
799{
800 struct rt6_info *rt;
801
802 rt = (struct rt6_info *) dst;
803
804 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
805 return dst;
806
807 return NULL;
808}
809
810static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
811{
812 struct rt6_info *rt = (struct rt6_info *) dst;
813
814 if (rt) {
815 if (rt->rt6i_flags & RTF_CACHE)
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -0700816 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 else
818 dst_release(dst);
819 }
820 return NULL;
821}
822
823static void ip6_link_failure(struct sk_buff *skb)
824{
825 struct rt6_info *rt;
826
827 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
828
829 rt = (struct rt6_info *) skb->dst;
830 if (rt) {
831 if (rt->rt6i_flags&RTF_CACHE) {
832 dst_set_expires(&rt->u.dst, 0);
833 rt->rt6i_flags |= RTF_EXPIRES;
834 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
835 rt->rt6i_node->fn_sernum = -1;
836 }
837}
838
839static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
840{
841 struct rt6_info *rt6 = (struct rt6_info*)dst;
842
843 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
844 rt6->rt6i_flags |= RTF_MODIFIED;
845 if (mtu < IPV6_MIN_MTU) {
846 mtu = IPV6_MIN_MTU;
847 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
848 }
849 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700850 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 }
852}
853
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854static int ipv6_get_mtu(struct net_device *dev);
855
856static inline unsigned int ipv6_advmss(unsigned int mtu)
857{
858 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
859
860 if (mtu < ip6_rt_min_advmss)
861 mtu = ip6_rt_min_advmss;
862
863 /*
864 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
865 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
866 * IPV6_MAXPLEN is also valid and means: "any MSS,
867 * rely only on pmtu discovery"
868 */
869 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
870 mtu = IPV6_MAXPLEN;
871 return mtu;
872}
873
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700874static struct dst_entry *ndisc_dst_gc_list;
875DEFINE_SPINLOCK(ndisc_lock);
876
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
878 struct neighbour *neigh,
879 struct in6_addr *addr,
880 int (*output)(struct sk_buff *))
881{
882 struct rt6_info *rt;
883 struct inet6_dev *idev = in6_dev_get(dev);
884
885 if (unlikely(idev == NULL))
886 return NULL;
887
888 rt = ip6_dst_alloc();
889 if (unlikely(rt == NULL)) {
890 in6_dev_put(idev);
891 goto out;
892 }
893
894 dev_hold(dev);
895 if (neigh)
896 neigh_hold(neigh);
897 else
898 neigh = ndisc_get_neigh(dev, addr);
899
900 rt->rt6i_dev = dev;
901 rt->rt6i_idev = idev;
902 rt->rt6i_nexthop = neigh;
903 atomic_set(&rt->u.dst.__refcnt, 1);
904 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
905 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
906 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
907 rt->u.dst.output = output;
908
909#if 0 /* there's no chance to use these for ndisc */
910 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
911 ? DST_HOST
912 : 0;
913 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
914 rt->rt6i_dst.plen = 128;
915#endif
916
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700917 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 rt->u.dst.next = ndisc_dst_gc_list;
919 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700920 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921
922 fib6_force_start_gc();
923
924out:
925 return (struct dst_entry *)rt;
926}
927
928int ndisc_dst_gc(int *more)
929{
930 struct dst_entry *dst, *next, **pprev;
931 int freed;
932
933 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700934 freed = 0;
935
936 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700938
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939 while ((dst = *pprev) != NULL) {
940 if (!atomic_read(&dst->__refcnt)) {
941 *pprev = dst->next;
942 dst_free(dst);
943 freed++;
944 } else {
945 pprev = &dst->next;
946 (*more)++;
947 }
948 }
949
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700950 spin_unlock_bh(&ndisc_lock);
951
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 return freed;
953}
954
955static int ip6_dst_gc(void)
956{
957 static unsigned expire = 30*HZ;
958 static unsigned long last_gc;
959 unsigned long now = jiffies;
960
961 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
962 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
963 goto out;
964
965 expire++;
966 fib6_run_gc(expire);
967 last_gc = now;
968 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
969 expire = ip6_rt_gc_timeout>>1;
970
971out:
972 expire -= expire>>ip6_rt_gc_elasticity;
973 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
974}
975
976/* Clean host part of a prefix. Not necessary in radix tree,
977 but results in cleaner routing tables.
978
979 Remove it only when all the things will work!
980 */
981
982static int ipv6_get_mtu(struct net_device *dev)
983{
984 int mtu = IPV6_MIN_MTU;
985 struct inet6_dev *idev;
986
987 idev = in6_dev_get(dev);
988 if (idev) {
989 mtu = idev->cnf.mtu6;
990 in6_dev_put(idev);
991 }
992 return mtu;
993}
994
995int ipv6_get_hoplimit(struct net_device *dev)
996{
997 int hoplimit = ipv6_devconf.hop_limit;
998 struct inet6_dev *idev;
999
1000 idev = in6_dev_get(dev);
1001 if (idev) {
1002 hoplimit = idev->cnf.hop_limit;
1003 in6_dev_put(idev);
1004 }
1005 return hoplimit;
1006}
1007
1008/*
1009 *
1010 */
1011
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001012int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
Thomas Grafc71099a2006-08-04 23:20:06 -07001013 void *_rtattr, struct netlink_skb_parms *req,
1014 u32 table_id)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015{
1016 int err;
1017 struct rtmsg *r;
1018 struct rtattr **rta;
1019 struct rt6_info *rt = NULL;
1020 struct net_device *dev = NULL;
1021 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001022 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 int addr_type;
1024
1025 rta = (struct rtattr **) _rtattr;
1026
1027 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
1028 return -EINVAL;
1029#ifndef CONFIG_IPV6_SUBTREES
1030 if (rtmsg->rtmsg_src_len)
1031 return -EINVAL;
1032#endif
1033 if (rtmsg->rtmsg_ifindex) {
1034 err = -ENODEV;
1035 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
1036 if (!dev)
1037 goto out;
1038 idev = in6_dev_get(dev);
1039 if (!idev)
1040 goto out;
1041 }
1042
1043 if (rtmsg->rtmsg_metric == 0)
1044 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
1045
Thomas Grafc71099a2006-08-04 23:20:06 -07001046 table = fib6_new_table(table_id);
1047 if (table == NULL) {
1048 err = -ENOBUFS;
1049 goto out;
1050 }
1051
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 rt = ip6_dst_alloc();
1053
1054 if (rt == NULL) {
1055 err = -ENOMEM;
1056 goto out;
1057 }
1058
1059 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -08001060 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 if (nlh && (r = NLMSG_DATA(nlh))) {
1062 rt->rt6i_protocol = r->rtm_protocol;
1063 } else {
1064 rt->rt6i_protocol = RTPROT_BOOT;
1065 }
1066
1067 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
1068
1069 if (addr_type & IPV6_ADDR_MULTICAST)
1070 rt->u.dst.input = ip6_mc_input;
1071 else
1072 rt->u.dst.input = ip6_forward;
1073
1074 rt->u.dst.output = ip6_output;
1075
1076 ipv6_addr_prefix(&rt->rt6i_dst.addr,
1077 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
1078 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
1079 if (rt->rt6i_dst.plen == 128)
1080 rt->u.dst.flags = DST_HOST;
1081
1082#ifdef CONFIG_IPV6_SUBTREES
1083 ipv6_addr_prefix(&rt->rt6i_src.addr,
1084 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1085 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
1086#endif
1087
1088 rt->rt6i_metric = rtmsg->rtmsg_metric;
1089
1090 /* We cannot add true routes via loopback here,
1091 they would result in kernel looping; promote them to reject routes
1092 */
1093 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
1094 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1095 /* hold loopback dev/idev if we haven't done so. */
1096 if (dev != &loopback_dev) {
1097 if (dev) {
1098 dev_put(dev);
1099 in6_dev_put(idev);
1100 }
1101 dev = &loopback_dev;
1102 dev_hold(dev);
1103 idev = in6_dev_get(dev);
1104 if (!idev) {
1105 err = -ENODEV;
1106 goto out;
1107 }
1108 }
1109 rt->u.dst.output = ip6_pkt_discard_out;
1110 rt->u.dst.input = ip6_pkt_discard;
1111 rt->u.dst.error = -ENETUNREACH;
1112 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1113 goto install_route;
1114 }
1115
1116 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
1117 struct in6_addr *gw_addr;
1118 int gwa_type;
1119
1120 gw_addr = &rtmsg->rtmsg_gateway;
1121 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1122 gwa_type = ipv6_addr_type(gw_addr);
1123
1124 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1125 struct rt6_info *grt;
1126
1127 /* IPv6 strictly inhibits using not link-local
1128 addresses as nexthop address.
1129 Otherwise, router will not able to send redirects.
1130 It is very good, but in some (rare!) circumstances
1131 (SIT, PtP, NBMA NOARP links) it is handy to allow
1132 some exceptions. --ANK
1133 */
1134 err = -EINVAL;
1135 if (!(gwa_type&IPV6_ADDR_UNICAST))
1136 goto out;
1137
1138 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1139
1140 err = -EHOSTUNREACH;
1141 if (grt == NULL)
1142 goto out;
1143 if (dev) {
1144 if (dev != grt->rt6i_dev) {
1145 dst_release(&grt->u.dst);
1146 goto out;
1147 }
1148 } else {
1149 dev = grt->rt6i_dev;
1150 idev = grt->rt6i_idev;
1151 dev_hold(dev);
1152 in6_dev_hold(grt->rt6i_idev);
1153 }
1154 if (!(grt->rt6i_flags&RTF_GATEWAY))
1155 err = 0;
1156 dst_release(&grt->u.dst);
1157
1158 if (err)
1159 goto out;
1160 }
1161 err = -EINVAL;
1162 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1163 goto out;
1164 }
1165
1166 err = -ENODEV;
1167 if (dev == NULL)
1168 goto out;
1169
1170 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1171 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1172 if (IS_ERR(rt->rt6i_nexthop)) {
1173 err = PTR_ERR(rt->rt6i_nexthop);
1174 rt->rt6i_nexthop = NULL;
1175 goto out;
1176 }
1177 }
1178
1179 rt->rt6i_flags = rtmsg->rtmsg_flags;
1180
1181install_route:
1182 if (rta && rta[RTA_METRICS-1]) {
1183 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1184 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1185
1186 while (RTA_OK(attr, attrlen)) {
1187 unsigned flavor = attr->rta_type;
1188 if (flavor) {
1189 if (flavor > RTAX_MAX) {
1190 err = -EINVAL;
1191 goto out;
1192 }
1193 rt->u.dst.metrics[flavor-1] =
1194 *(u32 *)RTA_DATA(attr);
1195 }
1196 attr = RTA_NEXT(attr, attrlen);
1197 }
1198 }
1199
1200 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1201 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1202 if (!rt->u.dst.metrics[RTAX_MTU-1])
1203 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1204 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1205 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1206 rt->u.dst.dev = dev;
1207 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001208 rt->rt6i_table = table;
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001209 return ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211out:
1212 if (dev)
1213 dev_put(dev);
1214 if (idev)
1215 in6_dev_put(idev);
1216 if (rt)
1217 dst_free((struct dst_entry *) rt);
1218 return err;
1219}
1220
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001221int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222{
1223 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001224 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225
Thomas Grafc71099a2006-08-04 23:20:06 -07001226 table = rt->rt6i_table;
1227 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001229 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 dst_release(&rt->u.dst);
1231
Thomas Grafc71099a2006-08-04 23:20:06 -07001232 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
1234 return err;
1235}
1236
Thomas Grafc71099a2006-08-04 23:20:06 -07001237static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1238 void *_rtattr, struct netlink_skb_parms *req,
1239 u32 table_id)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240{
Thomas Grafc71099a2006-08-04 23:20:06 -07001241 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 struct fib6_node *fn;
1243 struct rt6_info *rt;
1244 int err = -ESRCH;
1245
Thomas Grafc71099a2006-08-04 23:20:06 -07001246 table = fib6_get_table(table_id);
1247 if (table == NULL)
1248 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
Thomas Grafc71099a2006-08-04 23:20:06 -07001250 read_lock_bh(&table->tb6_lock);
1251
1252 fn = fib6_locate(&table->tb6_root,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1254 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1255
1256 if (fn) {
1257 for (rt = fn->leaf; rt; rt = rt->u.next) {
1258 if (rtmsg->rtmsg_ifindex &&
1259 (rt->rt6i_dev == NULL ||
1260 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1261 continue;
1262 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1263 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1264 continue;
1265 if (rtmsg->rtmsg_metric &&
1266 rtmsg->rtmsg_metric != rt->rt6i_metric)
1267 continue;
1268 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001269 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001271 return ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 }
1273 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001274 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275
1276 return err;
1277}
1278
1279/*
1280 * Handle redirects
1281 */
1282void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1283 struct neighbour *neigh, u8 *lladdr, int on_link)
1284{
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001285 struct rt6_info *rt, *nrt = NULL;
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001286 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001287 struct fib6_table *table;
Tom Tucker8d717402006-07-30 20:43:36 -07001288 struct netevent_redirect netevent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
Thomas Grafc71099a2006-08-04 23:20:06 -07001290 /* TODO: Very lazy, might need to check all tables */
1291 table = fib6_get_table(RT6_TABLE_MAIN);
1292 if (table == NULL)
1293 return;
1294
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001296 * Get the "current" route for this destination and
1297 * check if the redirect has come from approriate router.
1298 *
1299 * RFC 2461 specifies that redirects should only be
1300 * accepted if they come from the nexthop to the target.
1301 * Due to the way the routes are chosen, this notion
1302 * is a bit fuzzy and one might need to check all possible
1303 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305
Thomas Grafc71099a2006-08-04 23:20:06 -07001306 read_lock_bh(&table->tb6_lock);
1307 fn = fib6_lookup(&table->tb6_root, dest, NULL);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001308restart:
1309 for (rt = fn->leaf; rt; rt = rt->u.next) {
1310 /*
1311 * Current route is on-link; redirect is always invalid.
1312 *
1313 * Seems, previous statement is not true. It could
1314 * be node, which looks for us as on-link (f.e. proxy ndisc)
1315 * But then router serving it might decide, that we should
1316 * know truth 8)8) --ANK (980726).
1317 */
1318 if (rt6_check_expired(rt))
1319 continue;
1320 if (!(rt->rt6i_flags & RTF_GATEWAY))
1321 continue;
1322 if (neigh->dev != rt->rt6i_dev)
1323 continue;
1324 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1325 continue;
1326 break;
1327 }
1328 if (rt)
1329 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001330 else if (rt6_need_strict(dest)) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001331 while ((fn = fn->parent) != NULL) {
1332 if (fn->fn_flags & RTN_ROOT)
1333 break;
1334 if (fn->fn_flags & RTN_RTINFO)
1335 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001337 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001338 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001339
1340 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 if (net_ratelimit())
1342 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1343 "for redirect target\n");
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001344 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 }
1346
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 /*
1348 * We have finally decided to accept it.
1349 */
1350
1351 neigh_update(neigh, lladdr, NUD_STALE,
1352 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1353 NEIGH_UPDATE_F_OVERRIDE|
1354 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1355 NEIGH_UPDATE_F_ISROUTER))
1356 );
1357
1358 /*
1359 * Redirect received -> path was valid.
1360 * Look, redirects are sent only in response to data packets,
1361 * so that this nexthop apparently is reachable. --ANK
1362 */
1363 dst_confirm(&rt->u.dst);
1364
1365 /* Duplicate redirect: silently ignore. */
1366 if (neigh == rt->u.dst.neighbour)
1367 goto out;
1368
1369 nrt = ip6_rt_copy(rt);
1370 if (nrt == NULL)
1371 goto out;
1372
1373 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1374 if (on_link)
1375 nrt->rt6i_flags &= ~RTF_GATEWAY;
1376
1377 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1378 nrt->rt6i_dst.plen = 128;
1379 nrt->u.dst.flags |= DST_HOST;
1380
1381 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1382 nrt->rt6i_nexthop = neigh_clone(neigh);
1383 /* Reset pmtu, it may be better */
1384 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1385 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1386
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001387 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 goto out;
1389
Tom Tucker8d717402006-07-30 20:43:36 -07001390 netevent.old = &rt->u.dst;
1391 netevent.new = &nrt->u.dst;
1392 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1393
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 if (rt->rt6i_flags&RTF_CACHE) {
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001395 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396 return;
1397 }
1398
1399out:
1400 dst_release(&rt->u.dst);
1401 return;
1402}
1403
1404/*
1405 * Handle ICMP "packet too big" messages
1406 * i.e. Path MTU discovery
1407 */
1408
1409void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1410 struct net_device *dev, u32 pmtu)
1411{
1412 struct rt6_info *rt, *nrt;
1413 int allfrag = 0;
1414
1415 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1416 if (rt == NULL)
1417 return;
1418
1419 if (pmtu >= dst_mtu(&rt->u.dst))
1420 goto out;
1421
1422 if (pmtu < IPV6_MIN_MTU) {
1423 /*
1424 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1425 * MTU (1280) and a fragment header should always be included
1426 * after a node receiving Too Big message reporting PMTU is
1427 * less than the IPv6 Minimum Link MTU.
1428 */
1429 pmtu = IPV6_MIN_MTU;
1430 allfrag = 1;
1431 }
1432
1433 /* New mtu received -> path was valid.
1434 They are sent only in response to data packets,
1435 so that this nexthop apparently is reachable. --ANK
1436 */
1437 dst_confirm(&rt->u.dst);
1438
1439 /* Host route. If it is static, it would be better
1440 not to override it, but add new one, so that
1441 when cache entry will expire old pmtu
1442 would return automatically.
1443 */
1444 if (rt->rt6i_flags & RTF_CACHE) {
1445 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1446 if (allfrag)
1447 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1448 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1449 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1450 goto out;
1451 }
1452
1453 /* Network route.
1454 Two cases are possible:
1455 1. It is connected route. Action: COW
1456 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1457 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001458 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001459 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001460 else
1461 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001462
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001463 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001464 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1465 if (allfrag)
1466 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1467
1468 /* According to RFC 1981, detecting PMTU increase shouldn't be
1469 * happened within 5 mins, the recommended timer is 10 mins.
1470 * Here this route expiration time is set to ip6_rt_mtu_expires
1471 * which is 10 mins. After 10 mins the decreased pmtu is expired
1472 * and detecting PMTU increase will be automatically happened.
1473 */
1474 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1475 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1476
1477 ip6_ins_rt(nrt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479out:
1480 dst_release(&rt->u.dst);
1481}
1482
1483/*
1484 * Misc support functions
1485 */
1486
1487static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1488{
1489 struct rt6_info *rt = ip6_dst_alloc();
1490
1491 if (rt) {
1492 rt->u.dst.input = ort->u.dst.input;
1493 rt->u.dst.output = ort->u.dst.output;
1494
1495 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1496 rt->u.dst.dev = ort->u.dst.dev;
1497 if (rt->u.dst.dev)
1498 dev_hold(rt->u.dst.dev);
1499 rt->rt6i_idev = ort->rt6i_idev;
1500 if (rt->rt6i_idev)
1501 in6_dev_hold(rt->rt6i_idev);
1502 rt->u.dst.lastuse = jiffies;
1503 rt->rt6i_expires = 0;
1504
1505 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1506 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1507 rt->rt6i_metric = 0;
1508
1509 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1510#ifdef CONFIG_IPV6_SUBTREES
1511 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1512#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001513 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 }
1515 return rt;
1516}
1517
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001518#ifdef CONFIG_IPV6_ROUTE_INFO
1519static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1520 struct in6_addr *gwaddr, int ifindex)
1521{
1522 struct fib6_node *fn;
1523 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001524 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001525
Thomas Grafc71099a2006-08-04 23:20:06 -07001526 table = fib6_get_table(RT6_TABLE_INFO);
1527 if (table == NULL)
1528 return NULL;
1529
1530 write_lock_bh(&table->tb6_lock);
1531 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001532 if (!fn)
1533 goto out;
1534
1535 for (rt = fn->leaf; rt; rt = rt->u.next) {
1536 if (rt->rt6i_dev->ifindex != ifindex)
1537 continue;
1538 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1539 continue;
1540 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1541 continue;
1542 dst_hold(&rt->u.dst);
1543 break;
1544 }
1545out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001546 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001547 return rt;
1548}
1549
1550static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1551 struct in6_addr *gwaddr, int ifindex,
1552 unsigned pref)
1553{
1554 struct in6_rtmsg rtmsg;
1555
1556 memset(&rtmsg, 0, sizeof(rtmsg));
1557 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1558 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1559 rtmsg.rtmsg_dst_len = prefixlen;
1560 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1561 rtmsg.rtmsg_metric = 1024;
1562 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001563 /* We should treat it as a default route if prefix length is 0. */
1564 if (!prefixlen)
1565 rtmsg.rtmsg_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001566 rtmsg.rtmsg_ifindex = ifindex;
1567
Thomas Grafc71099a2006-08-04 23:20:06 -07001568 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001569
1570 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1571}
1572#endif
1573
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1575{
1576 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001577 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578
Thomas Grafc71099a2006-08-04 23:20:06 -07001579 table = fib6_get_table(RT6_TABLE_DFLT);
1580 if (table == NULL)
1581 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582
Thomas Grafc71099a2006-08-04 23:20:06 -07001583 write_lock_bh(&table->tb6_lock);
1584 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001586 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1588 break;
1589 }
1590 if (rt)
1591 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001592 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 return rt;
1594}
1595
1596struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001597 struct net_device *dev,
1598 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599{
1600 struct in6_rtmsg rtmsg;
1601
1602 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1603 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1604 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1605 rtmsg.rtmsg_metric = 1024;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001606 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1607 RTF_PREF(pref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608
1609 rtmsg.rtmsg_ifindex = dev->ifindex;
1610
Thomas Grafc71099a2006-08-04 23:20:06 -07001611 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 return rt6_get_dflt_router(gwaddr, dev);
1613}
1614
1615void rt6_purge_dflt_routers(void)
1616{
1617 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001618 struct fib6_table *table;
1619
1620 /* NOTE: Keep consistent with rt6_get_dflt_router */
1621 table = fib6_get_table(RT6_TABLE_DFLT);
1622 if (table == NULL)
1623 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624
1625restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001626 read_lock_bh(&table->tb6_lock);
1627 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1629 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001630 read_unlock_bh(&table->tb6_lock);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001631 ip6_del_rt(rt, NULL, NULL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 goto restart;
1633 }
1634 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001635 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636}
1637
1638int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1639{
1640 struct in6_rtmsg rtmsg;
1641 int err;
1642
1643 switch(cmd) {
1644 case SIOCADDRT: /* Add a route */
1645 case SIOCDELRT: /* Delete a route */
1646 if (!capable(CAP_NET_ADMIN))
1647 return -EPERM;
1648 err = copy_from_user(&rtmsg, arg,
1649 sizeof(struct in6_rtmsg));
1650 if (err)
1651 return -EFAULT;
1652
1653 rtnl_lock();
1654 switch (cmd) {
1655 case SIOCADDRT:
Thomas Grafc71099a2006-08-04 23:20:06 -07001656 err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
1657 RT6_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 break;
1659 case SIOCDELRT:
Thomas Grafc71099a2006-08-04 23:20:06 -07001660 err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
1661 RT6_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 break;
1663 default:
1664 err = -EINVAL;
1665 }
1666 rtnl_unlock();
1667
1668 return err;
1669 };
1670
1671 return -EINVAL;
1672}
1673
1674/*
1675 * Drop the packet on the floor
1676 */
1677
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001678static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679{
Lv Liangying76d0cc12006-08-29 00:00:47 -07001680 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1681 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1682 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1683
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1685 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1686 kfree_skb(skb);
1687 return 0;
1688}
1689
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001690static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691{
1692 skb->dev = skb->dst->dev;
1693 return ip6_pkt_discard(skb);
1694}
1695
1696/*
1697 * Allocate a dst for local (unicast / anycast) address.
1698 */
1699
1700struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1701 const struct in6_addr *addr,
1702 int anycast)
1703{
1704 struct rt6_info *rt = ip6_dst_alloc();
1705
1706 if (rt == NULL)
1707 return ERR_PTR(-ENOMEM);
1708
1709 dev_hold(&loopback_dev);
1710 in6_dev_hold(idev);
1711
1712 rt->u.dst.flags = DST_HOST;
1713 rt->u.dst.input = ip6_input;
1714 rt->u.dst.output = ip6_output;
1715 rt->rt6i_dev = &loopback_dev;
1716 rt->rt6i_idev = idev;
1717 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1718 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1719 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1720 rt->u.dst.obsolete = -1;
1721
1722 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001723 if (anycast)
1724 rt->rt6i_flags |= RTF_ANYCAST;
1725 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726 rt->rt6i_flags |= RTF_LOCAL;
1727 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1728 if (rt->rt6i_nexthop == NULL) {
1729 dst_free((struct dst_entry *) rt);
1730 return ERR_PTR(-ENOMEM);
1731 }
1732
1733 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1734 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001735 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736
1737 atomic_set(&rt->u.dst.__refcnt, 1);
1738
1739 return rt;
1740}
1741
1742static int fib6_ifdown(struct rt6_info *rt, void *arg)
1743{
1744 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1745 rt != &ip6_null_entry) {
1746 RT6_TRACE("deleted by ifdown %p\n", rt);
1747 return -1;
1748 }
1749 return 0;
1750}
1751
1752void rt6_ifdown(struct net_device *dev)
1753{
Thomas Grafc71099a2006-08-04 23:20:06 -07001754 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755}
1756
1757struct rt6_mtu_change_arg
1758{
1759 struct net_device *dev;
1760 unsigned mtu;
1761};
1762
1763static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1764{
1765 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1766 struct inet6_dev *idev;
1767
1768 /* In IPv6 pmtu discovery is not optional,
1769 so that RTAX_MTU lock cannot disable it.
1770 We still use this lock to block changes
1771 caused by addrconf/ndisc.
1772 */
1773
1774 idev = __in6_dev_get(arg->dev);
1775 if (idev == NULL)
1776 return 0;
1777
1778 /* For administrative MTU increase, there is no way to discover
1779 IPv6 PMTU increase, so PMTU increase should be updated here.
1780 Since RFC 1981 doesn't include administrative MTU increase
1781 update PMTU increase is a MUST. (i.e. jumbo frame)
1782 */
1783 /*
1784 If new MTU is less than route PMTU, this new MTU will be the
1785 lowest MTU in the path, update the route PMTU to reflect PMTU
1786 decreases; if new MTU is greater than route PMTU, and the
1787 old MTU is the lowest MTU in the path, update the route PMTU
1788 to reflect the increase. In this case if the other nodes' MTU
1789 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1790 PMTU discouvery.
1791 */
1792 if (rt->rt6i_dev == arg->dev &&
1793 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1794 (dst_mtu(&rt->u.dst) > arg->mtu ||
1795 (dst_mtu(&rt->u.dst) < arg->mtu &&
1796 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1797 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1798 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1799 return 0;
1800}
1801
1802void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1803{
Thomas Grafc71099a2006-08-04 23:20:06 -07001804 struct rt6_mtu_change_arg arg = {
1805 .dev = dev,
1806 .mtu = mtu,
1807 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808
Thomas Grafc71099a2006-08-04 23:20:06 -07001809 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810}
1811
1812static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1813 struct in6_rtmsg *rtmsg)
1814{
1815 memset(rtmsg, 0, sizeof(*rtmsg));
1816
1817 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1818 rtmsg->rtmsg_src_len = r->rtm_src_len;
1819 rtmsg->rtmsg_flags = RTF_UP;
1820 if (r->rtm_type == RTN_UNREACHABLE)
1821 rtmsg->rtmsg_flags |= RTF_REJECT;
1822
1823 if (rta[RTA_GATEWAY-1]) {
1824 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1825 return -EINVAL;
1826 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1827 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1828 }
1829 if (rta[RTA_DST-1]) {
1830 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1831 return -EINVAL;
1832 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1833 }
1834 if (rta[RTA_SRC-1]) {
1835 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1836 return -EINVAL;
1837 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1838 }
1839 if (rta[RTA_OIF-1]) {
1840 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1841 return -EINVAL;
1842 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1843 }
1844 if (rta[RTA_PRIORITY-1]) {
1845 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1846 return -EINVAL;
1847 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1848 }
1849 return 0;
1850}
1851
1852int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1853{
1854 struct rtmsg *r = NLMSG_DATA(nlh);
1855 struct in6_rtmsg rtmsg;
1856
1857 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1858 return -EINVAL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001859 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860}
1861
1862int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1863{
1864 struct rtmsg *r = NLMSG_DATA(nlh);
1865 struct in6_rtmsg rtmsg;
1866
1867 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1868 return -EINVAL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001869 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870}
1871
1872struct rt6_rtnl_dump_arg
1873{
1874 struct sk_buff *skb;
1875 struct netlink_callback *cb;
1876};
1877
1878static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001879 struct in6_addr *dst, struct in6_addr *src,
1880 int iif, int type, u32 pid, u32 seq,
1881 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882{
1883 struct rtmsg *rtm;
1884 struct nlmsghdr *nlh;
1885 unsigned char *b = skb->tail;
1886 struct rta_cacheinfo ci;
1887
1888 if (prefix) { /* user wants prefix routes only */
1889 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1890 /* success since this is not a prefix route */
1891 return 1;
1892 }
1893 }
1894
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001895 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 rtm = NLMSG_DATA(nlh);
1897 rtm->rtm_family = AF_INET6;
1898 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1899 rtm->rtm_src_len = rt->rt6i_src.plen;
1900 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07001901 if (rt->rt6i_table)
1902 rtm->rtm_table = rt->rt6i_table->tb6_id;
1903 else
1904 rtm->rtm_table = RT6_TABLE_UNSPEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905 rtm->rtm_table = RT_TABLE_MAIN;
1906 if (rt->rt6i_flags&RTF_REJECT)
1907 rtm->rtm_type = RTN_UNREACHABLE;
1908 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1909 rtm->rtm_type = RTN_LOCAL;
1910 else
1911 rtm->rtm_type = RTN_UNICAST;
1912 rtm->rtm_flags = 0;
1913 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1914 rtm->rtm_protocol = rt->rt6i_protocol;
1915 if (rt->rt6i_flags&RTF_DYNAMIC)
1916 rtm->rtm_protocol = RTPROT_REDIRECT;
1917 else if (rt->rt6i_flags & RTF_ADDRCONF)
1918 rtm->rtm_protocol = RTPROT_KERNEL;
1919 else if (rt->rt6i_flags&RTF_DEFAULT)
1920 rtm->rtm_protocol = RTPROT_RA;
1921
1922 if (rt->rt6i_flags&RTF_CACHE)
1923 rtm->rtm_flags |= RTM_F_CLONED;
1924
1925 if (dst) {
1926 RTA_PUT(skb, RTA_DST, 16, dst);
1927 rtm->rtm_dst_len = 128;
1928 } else if (rtm->rtm_dst_len)
1929 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1930#ifdef CONFIG_IPV6_SUBTREES
1931 if (src) {
1932 RTA_PUT(skb, RTA_SRC, 16, src);
1933 rtm->rtm_src_len = 128;
1934 } else if (rtm->rtm_src_len)
1935 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1936#endif
1937 if (iif)
1938 RTA_PUT(skb, RTA_IIF, 4, &iif);
1939 else if (dst) {
1940 struct in6_addr saddr_buf;
1941 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1942 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1943 }
1944 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1945 goto rtattr_failure;
1946 if (rt->u.dst.neighbour)
1947 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1948 if (rt->u.dst.dev)
1949 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1950 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1951 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1952 if (rt->rt6i_expires)
1953 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1954 else
1955 ci.rta_expires = 0;
1956 ci.rta_used = rt->u.dst.__use;
1957 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1958 ci.rta_error = rt->u.dst.error;
1959 ci.rta_id = 0;
1960 ci.rta_ts = 0;
1961 ci.rta_tsage = 0;
1962 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1963 nlh->nlmsg_len = skb->tail - b;
1964 return skb->len;
1965
1966nlmsg_failure:
1967rtattr_failure:
1968 skb_trim(skb, b - skb->data);
1969 return -1;
1970}
1971
1972static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1973{
1974 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1975 int prefix;
1976
1977 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1978 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1979 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1980 } else
1981 prefix = 0;
1982
1983 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1984 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001985 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986}
1987
1988static int fib6_dump_node(struct fib6_walker_t *w)
1989{
1990 int res;
1991 struct rt6_info *rt;
1992
1993 for (rt = w->leaf; rt; rt = rt->u.next) {
1994 res = rt6_dump_route(rt, w->args);
1995 if (res < 0) {
1996 /* Frame is full, suspend walking */
1997 w->leaf = rt;
1998 return 1;
1999 }
2000 BUG_TRAP(res!=0);
2001 }
2002 w->leaf = NULL;
2003 return 0;
2004}
2005
2006static void fib6_dump_end(struct netlink_callback *cb)
2007{
2008 struct fib6_walker_t *w = (void*)cb->args[0];
2009
2010 if (w) {
2011 cb->args[0] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012 kfree(w);
2013 }
Herbert Xuefacfbc2005-11-12 12:12:05 -08002014 cb->done = (void*)cb->args[1];
2015 cb->args[1] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016}
2017
2018static int fib6_dump_done(struct netlink_callback *cb)
2019{
2020 fib6_dump_end(cb);
Thomas Grafa8f74b22005-11-10 02:25:52 +01002021 return cb->done ? cb->done(cb) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022}
2023
2024int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
2025{
Thomas Grafc71099a2006-08-04 23:20:06 -07002026 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 struct rt6_rtnl_dump_arg arg;
2028 struct fib6_walker_t *w;
Thomas Grafc71099a2006-08-04 23:20:06 -07002029 int i, res = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030
2031 arg.skb = skb;
2032 arg.cb = cb;
2033
Thomas Grafc71099a2006-08-04 23:20:06 -07002034 /*
2035 * cb->args[0] = pointer to walker structure
2036 * cb->args[1] = saved cb->done() pointer
2037 * cb->args[2] = current table being dumped
2038 */
2039
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 w = (void*)cb->args[0];
2041 if (w == NULL) {
2042 /* New dump:
2043 *
2044 * 1. hook callback destructor.
2045 */
2046 cb->args[1] = (long)cb->done;
2047 cb->done = fib6_dump_done;
2048
2049 /*
2050 * 2. allocate and initialize walker.
2051 */
Ingo Oeser0c600ed2006-03-20 23:01:32 -08002052 w = kzalloc(sizeof(*w), GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 if (w == NULL)
2054 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 w->func = fib6_dump_node;
2056 w->args = &arg;
2057 cb->args[0] = (long)w;
Thomas Grafc71099a2006-08-04 23:20:06 -07002058 cb->args[2] = FIB6_TABLE_MIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059 } else {
2060 w->args = &arg;
Thomas Grafc71099a2006-08-04 23:20:06 -07002061 i = cb->args[2];
2062 if (i > FIB6_TABLE_MAX)
2063 goto end;
2064
2065 table = fib6_get_table(i);
2066 if (table != NULL) {
2067 read_lock_bh(&table->tb6_lock);
2068 w->root = &table->tb6_root;
2069 res = fib6_walk_continue(w);
2070 read_unlock_bh(&table->tb6_lock);
2071 if (res != 0) {
2072 if (res < 0)
2073 fib6_walker_unlink(w);
2074 goto end;
2075 }
2076 }
2077
2078 fib6_walker_unlink(w);
2079 cb->args[2] = ++i;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 }
Thomas Grafc71099a2006-08-04 23:20:06 -07002081
2082 for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) {
2083 table = fib6_get_table(i);
2084 if (table == NULL)
2085 continue;
2086
2087 read_lock_bh(&table->tb6_lock);
2088 w->root = &table->tb6_root;
2089 res = fib6_walk(w);
2090 read_unlock_bh(&table->tb6_lock);
2091 if (res)
2092 break;
2093 }
2094end:
2095 cb->args[2] = i;
2096
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 res = res < 0 ? res : skb->len;
2098 /* res < 0 is an error. (really, impossible)
2099 res == 0 means that dump is complete, but skb still can contain data.
2100 res > 0 dump is not complete, but frame is full.
2101 */
2102 /* Destroy walker, if dump of this table is complete. */
2103 if (res <= 0)
2104 fib6_dump_end(cb);
2105 return res;
2106}
2107
2108int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2109{
2110 struct rtattr **rta = arg;
2111 int iif = 0;
2112 int err = -ENOBUFS;
2113 struct sk_buff *skb;
2114 struct flowi fl;
2115 struct rt6_info *rt;
2116
2117 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2118 if (skb == NULL)
2119 goto out;
2120
2121 /* Reserve room for dummy headers, this skb can pass
2122 through good chunk of routing engine.
2123 */
2124 skb->mac.raw = skb->data;
2125 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2126
2127 memset(&fl, 0, sizeof(fl));
2128 if (rta[RTA_SRC-1])
2129 ipv6_addr_copy(&fl.fl6_src,
2130 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
2131 if (rta[RTA_DST-1])
2132 ipv6_addr_copy(&fl.fl6_dst,
2133 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
2134
2135 if (rta[RTA_IIF-1])
2136 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
2137
2138 if (iif) {
2139 struct net_device *dev;
2140 dev = __dev_get_by_index(iif);
2141 if (!dev) {
2142 err = -ENODEV;
2143 goto out_free;
2144 }
2145 }
2146
2147 fl.oif = 0;
2148 if (rta[RTA_OIF-1])
2149 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
2150
2151 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
2152
2153 skb->dst = &rt->u.dst;
2154
2155 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2156 err = rt6_fill_node(skb, rt,
2157 &fl.fl6_dst, &fl.fl6_src,
2158 iif,
2159 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002160 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 if (err < 0) {
2162 err = -EMSGSIZE;
2163 goto out_free;
2164 }
2165
2166 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
2167 if (err > 0)
2168 err = 0;
2169out:
2170 return err;
2171out_free:
2172 kfree_skb(skb);
2173 goto out;
2174}
2175
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002176void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
2177 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178{
2179 struct sk_buff *skb;
2180 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002181 u32 pid = current->pid;
2182 u32 seq = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002183
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002184 if (req)
2185 pid = req->pid;
2186 if (nlh)
2187 seq = nlh->nlmsg_seq;
2188
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189 skb = alloc_skb(size, gfp_any());
2190 if (!skb) {
Patrick McHardyac6d4392005-08-14 19:29:52 -07002191 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 return;
2193 }
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002194 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 kfree_skb(skb);
Patrick McHardyac6d4392005-08-14 19:29:52 -07002196 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 return;
2198 }
Patrick McHardyac6d4392005-08-14 19:29:52 -07002199 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2200 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201}
2202
2203/*
2204 * /proc
2205 */
2206
2207#ifdef CONFIG_PROC_FS
2208
2209#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2210
2211struct rt6_proc_arg
2212{
2213 char *buffer;
2214 int offset;
2215 int length;
2216 int skip;
2217 int len;
2218};
2219
2220static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2221{
2222 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2223 int i;
2224
2225 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2226 arg->skip++;
2227 return 0;
2228 }
2229
2230 if (arg->len >= arg->length)
2231 return 0;
2232
2233 for (i=0; i<16; i++) {
2234 sprintf(arg->buffer + arg->len, "%02x",
2235 rt->rt6i_dst.addr.s6_addr[i]);
2236 arg->len += 2;
2237 }
2238 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2239 rt->rt6i_dst.plen);
2240
2241#ifdef CONFIG_IPV6_SUBTREES
2242 for (i=0; i<16; i++) {
2243 sprintf(arg->buffer + arg->len, "%02x",
2244 rt->rt6i_src.addr.s6_addr[i]);
2245 arg->len += 2;
2246 }
2247 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2248 rt->rt6i_src.plen);
2249#else
2250 sprintf(arg->buffer + arg->len,
2251 "00000000000000000000000000000000 00 ");
2252 arg->len += 36;
2253#endif
2254
2255 if (rt->rt6i_nexthop) {
2256 for (i=0; i<16; i++) {
2257 sprintf(arg->buffer + arg->len, "%02x",
2258 rt->rt6i_nexthop->primary_key[i]);
2259 arg->len += 2;
2260 }
2261 } else {
2262 sprintf(arg->buffer + arg->len,
2263 "00000000000000000000000000000000");
2264 arg->len += 32;
2265 }
2266 arg->len += sprintf(arg->buffer + arg->len,
2267 " %08x %08x %08x %08x %8s\n",
2268 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2269 rt->u.dst.__use, rt->rt6i_flags,
2270 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2271 return 0;
2272}
2273
2274static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2275{
Thomas Grafc71099a2006-08-04 23:20:06 -07002276 struct rt6_proc_arg arg = {
2277 .buffer = buffer,
2278 .offset = offset,
2279 .length = length,
2280 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281
Thomas Grafc71099a2006-08-04 23:20:06 -07002282 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283
2284 *start = buffer;
2285 if (offset)
2286 *start += offset % RT6_INFO_LEN;
2287
2288 arg.len -= offset % RT6_INFO_LEN;
2289
2290 if (arg.len > length)
2291 arg.len = length;
2292 if (arg.len < 0)
2293 arg.len = 0;
2294
2295 return arg.len;
2296}
2297
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2299{
2300 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2301 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2302 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2303 rt6_stats.fib_rt_cache,
2304 atomic_read(&ip6_dst_ops.entries),
2305 rt6_stats.fib_discarded_routes);
2306
2307 return 0;
2308}
2309
2310static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2311{
2312 return single_open(file, rt6_stats_seq_show, NULL);
2313}
2314
2315static struct file_operations rt6_stats_seq_fops = {
2316 .owner = THIS_MODULE,
2317 .open = rt6_stats_seq_open,
2318 .read = seq_read,
2319 .llseek = seq_lseek,
2320 .release = single_release,
2321};
2322#endif /* CONFIG_PROC_FS */
2323
2324#ifdef CONFIG_SYSCTL
2325
2326static int flush_delay;
2327
2328static
2329int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2330 void __user *buffer, size_t *lenp, loff_t *ppos)
2331{
2332 if (write) {
2333 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2334 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2335 return 0;
2336 } else
2337 return -EINVAL;
2338}
2339
2340ctl_table ipv6_route_table[] = {
2341 {
2342 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2343 .procname = "flush",
2344 .data = &flush_delay,
2345 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002346 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347 .proc_handler = &ipv6_sysctl_rtcache_flush
2348 },
2349 {
2350 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2351 .procname = "gc_thresh",
2352 .data = &ip6_dst_ops.gc_thresh,
2353 .maxlen = sizeof(int),
2354 .mode = 0644,
2355 .proc_handler = &proc_dointvec,
2356 },
2357 {
2358 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2359 .procname = "max_size",
2360 .data = &ip6_rt_max_size,
2361 .maxlen = sizeof(int),
2362 .mode = 0644,
2363 .proc_handler = &proc_dointvec,
2364 },
2365 {
2366 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2367 .procname = "gc_min_interval",
2368 .data = &ip6_rt_gc_min_interval,
2369 .maxlen = sizeof(int),
2370 .mode = 0644,
2371 .proc_handler = &proc_dointvec_jiffies,
2372 .strategy = &sysctl_jiffies,
2373 },
2374 {
2375 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2376 .procname = "gc_timeout",
2377 .data = &ip6_rt_gc_timeout,
2378 .maxlen = sizeof(int),
2379 .mode = 0644,
2380 .proc_handler = &proc_dointvec_jiffies,
2381 .strategy = &sysctl_jiffies,
2382 },
2383 {
2384 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2385 .procname = "gc_interval",
2386 .data = &ip6_rt_gc_interval,
2387 .maxlen = sizeof(int),
2388 .mode = 0644,
2389 .proc_handler = &proc_dointvec_jiffies,
2390 .strategy = &sysctl_jiffies,
2391 },
2392 {
2393 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2394 .procname = "gc_elasticity",
2395 .data = &ip6_rt_gc_elasticity,
2396 .maxlen = sizeof(int),
2397 .mode = 0644,
2398 .proc_handler = &proc_dointvec_jiffies,
2399 .strategy = &sysctl_jiffies,
2400 },
2401 {
2402 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2403 .procname = "mtu_expires",
2404 .data = &ip6_rt_mtu_expires,
2405 .maxlen = sizeof(int),
2406 .mode = 0644,
2407 .proc_handler = &proc_dointvec_jiffies,
2408 .strategy = &sysctl_jiffies,
2409 },
2410 {
2411 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2412 .procname = "min_adv_mss",
2413 .data = &ip6_rt_min_advmss,
2414 .maxlen = sizeof(int),
2415 .mode = 0644,
2416 .proc_handler = &proc_dointvec_jiffies,
2417 .strategy = &sysctl_jiffies,
2418 },
2419 {
2420 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2421 .procname = "gc_min_interval_ms",
2422 .data = &ip6_rt_gc_min_interval,
2423 .maxlen = sizeof(int),
2424 .mode = 0644,
2425 .proc_handler = &proc_dointvec_ms_jiffies,
2426 .strategy = &sysctl_ms_jiffies,
2427 },
2428 { .ctl_name = 0 }
2429};
2430
2431#endif
2432
2433void __init ip6_route_init(void)
2434{
2435 struct proc_dir_entry *p;
2436
2437 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2438 sizeof(struct rt6_info),
2439 0, SLAB_HWCACHE_ALIGN,
2440 NULL, NULL);
2441 if (!ip6_dst_ops.kmem_cachep)
2442 panic("cannot create ip6_dst_cache");
2443
2444 fib6_init();
2445#ifdef CONFIG_PROC_FS
2446 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2447 if (p)
2448 p->owner = THIS_MODULE;
2449
2450 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2451#endif
2452#ifdef CONFIG_XFRM
2453 xfrm6_init();
2454#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002455#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2456 fib6_rules_init();
2457#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458}
2459
2460void ip6_route_cleanup(void)
2461{
Thomas Graf101367c2006-08-04 03:39:02 -07002462#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2463 fib6_rules_cleanup();
2464#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465#ifdef CONFIG_PROC_FS
2466 proc_net_remove("ipv6_route");
2467 proc_net_remove("rt6_stats");
2468#endif
2469#ifdef CONFIG_XFRM
2470 xfrm6_fini();
2471#endif
2472 rt6_ifdown(NULL);
2473 fib6_gc_cleanup();
2474 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2475}