blob: 9ec348a72a9593a865da8ded4b93938a633197fe [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/if_arp.h>
39
40#ifdef CONFIG_PROC_FS
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#endif
44
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
Thomas Graf101367c2006-08-04 03:39:02 -0700143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
158 }
159 },
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
163};
164
165struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187/* allocate dst with ip6_dst_ops */
188static __inline__ struct rt6_info *ip6_dst_alloc(void)
189{
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
201 }
202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
209
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
215 }
216 }
217}
218
219static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220{
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
223}
224
Thomas Grafc71099a2006-08-04 23:20:06 -0700225static inline int rt6_need_strict(struct in6_addr *daddr)
226{
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229}
230
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700232 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 */
234
235static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
238{
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
241
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
255 }
256 local = sprt;
257 }
258 }
259
260 if (local)
261 return local;
262
263 if (strict)
264 return &ip6_null_entry;
265 }
266 return rt;
267}
268
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800269#ifdef CONFIG_IPV6_ROUTER_PREF
270static void rt6_probe(struct rt6_info *rt)
271{
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273 /*
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
277 *
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
280 */
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800286 struct in6_addr mcaddr;
287 struct in6_addr *target;
288
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
291
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
297}
298#else
299static inline void rt6_probe(struct rt6_info *rt)
300{
301 return;
302}
303#endif
304
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800306 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800308static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317}
318
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800319static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700329 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800330 read_unlock_bh(&neigh->lock);
331 }
332 return m;
333}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
337{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700338 int m, n;
339
340 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800343#ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700346 n = rt6_check_neigh(rt);
347 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800348 m |= 16;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800350 return -1;
351 return m;
352}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
356{
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
364
365 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800367 rt = rt->u.next) {
368 int m;
369
370 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 continue;
372
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800373 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800379 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800380 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800381 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800383 } else {
384 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 }
386 }
387
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700392 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700393 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700397 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 }
399
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800403 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404}
405
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800406#ifdef CONFIG_IPV6_ROUTE_INFO
407int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
409{
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
415
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
418 }
419
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
428 }
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
432 }
433 }
434
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
445 }
446
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
455 }
456
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700460 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800461 rt = NULL;
462 }
463
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
477 }
478 dst_release(&rt->u.dst);
479 }
480 return 0;
481}
482#endif
483
Thomas Grafc71099a2006-08-04 23:20:06 -0700484#define BACKTRACK() \
485if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486 while ((fn = fn->parent) != NULL) { \
487 if (fn->fn_flags & RTN_TL_ROOT) { \
488 dst_hold(&rt->u.dst); \
489 goto out; \
490 } \
491 if (fn->fn_flags & RTN_RTINFO) \
492 goto restart; \
493 } \
494}
495
496static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498{
499 struct fib6_node *fn;
500 struct rt6_info *rt;
501
Thomas Grafc71099a2006-08-04 23:20:06 -0700502 read_lock_bh(&table->tb6_lock);
503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
504restart:
505 rt = fn->leaf;
506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
507 BACKTRACK();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700509out:
510 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511
512 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700513 rt->u.dst.__use++;
514
515 return rt;
516
517}
518
519struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
520 int oif, int strict)
521{
522 struct flowi fl = {
523 .oif = oif,
524 .nl_u = {
525 .ip6_u = {
526 .daddr = *daddr,
527 /* TODO: saddr */
528 },
529 },
530 };
531 struct dst_entry *dst;
532 int flags = strict ? RT6_F_STRICT : 0;
533
534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
535 if (dst->error == 0)
536 return (struct rt6_info *) dst;
537
538 dst_release(dst);
539
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 return NULL;
541}
542
Thomas Grafc71099a2006-08-04 23:20:06 -0700543/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 It takes new route entry, the addition fails by any reason the
545 route is freed. In any case, if caller does not hold it, it may
546 be destroyed.
547 */
548
Thomas Graf40e22e82006-08-22 00:00:45 -0700549static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
550 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551{
552 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700553 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554
Thomas Grafc71099a2006-08-04 23:20:06 -0700555 table = rt->rt6i_table;
556 write_lock_bh(&table->tb6_lock);
557 err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
558 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559
560 return err;
561}
562
Thomas Graf40e22e82006-08-22 00:00:45 -0700563int ip6_ins_rt(struct rt6_info *rt)
564{
565 return __ip6_ins_rt(rt, NULL, NULL, NULL);
566}
567
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800568static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
569 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 struct rt6_info *rt;
572
573 /*
574 * Clone the route.
575 */
576
577 rt = ip6_rt_copy(ort);
578
579 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900580 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
581 if (rt->rt6i_dst.plen != 128 &&
582 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
583 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900585 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900587 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 rt->rt6i_dst.plen = 128;
589 rt->rt6i_flags |= RTF_CACHE;
590 rt->u.dst.flags |= DST_HOST;
591
592#ifdef CONFIG_IPV6_SUBTREES
593 if (rt->rt6i_src.plen && saddr) {
594 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
595 rt->rt6i_src.plen = 128;
596 }
597#endif
598
599 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
600
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800601 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800603 return rt;
604}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800606static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
607{
608 struct rt6_info *rt = ip6_rt_copy(ort);
609 if (rt) {
610 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
611 rt->rt6i_dst.plen = 128;
612 rt->rt6i_flags |= RTF_CACHE;
613 if (rt->rt6i_flags & RTF_REJECT)
614 rt->u.dst.error = ort->u.dst.error;
615 rt->u.dst.flags |= DST_HOST;
616 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
617 }
618 return rt;
619}
620
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700621static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
622 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623{
624 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800625 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700626 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800628 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800629 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
Thomas Grafc71099a2006-08-04 23:20:06 -0700631 if (flags & RT6_F_STRICT)
632 strict = RT6_SELECT_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
634relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700635 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800637restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700638 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
640restart:
Thomas Grafc71099a2006-08-04 23:20:06 -0700641 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800643 if (rt == &ip6_null_entry ||
644 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800645 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800647 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700648 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800649
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800650 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800651 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800652 else {
653#if CLONE_OFFLINK_ROUTE
654 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
655#else
656 goto out2;
657#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800659
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800660 dst_release(&rt->u.dst);
661 rt = nrt ? : &ip6_null_entry;
662
663 dst_hold(&rt->u.dst);
664 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700665 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800666 if (!err)
667 goto out2;
668 }
669
670 if (--attempts <= 0)
671 goto out2;
672
673 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700674 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 * released someone could insert this route. Relookup.
676 */
677 dst_release(&rt->u.dst);
678 goto relookup;
679
680out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800681 if (reachable) {
682 reachable = 0;
683 goto restart_2;
684 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800685 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700686 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687out2:
688 rt->u.dst.lastuse = jiffies;
689 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700690
691 return rt;
692}
693
694void ip6_route_input(struct sk_buff *skb)
695{
696 struct ipv6hdr *iph = skb->nh.ipv6h;
697 struct flowi fl = {
698 .iif = skb->dev->ifindex,
699 .nl_u = {
700 .ip6_u = {
701 .daddr = iph->daddr,
702 .saddr = iph->saddr,
703 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
704 },
705 },
706 .proto = iph->nexthdr,
707 };
708 int flags = 0;
709
710 if (rt6_need_strict(&iph->daddr))
711 flags |= RT6_F_STRICT;
712
713 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
714}
715
716static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
717 struct flowi *fl, int flags)
718{
719 struct fib6_node *fn;
720 struct rt6_info *rt, *nrt;
721 int strict = 0;
722 int attempts = 3;
723 int err;
724 int reachable = RT6_SELECT_F_REACHABLE;
725
726 if (flags & RT6_F_STRICT)
727 strict = RT6_SELECT_F_IFACE;
728
729relookup:
730 read_lock_bh(&table->tb6_lock);
731
732restart_2:
733 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
734
735restart:
736 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
737 BACKTRACK();
738 if (rt == &ip6_null_entry ||
739 rt->rt6i_flags & RTF_CACHE)
740 goto out;
741
742 dst_hold(&rt->u.dst);
743 read_unlock_bh(&table->tb6_lock);
744
745 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
746 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
747 else {
748#if CLONE_OFFLINK_ROUTE
749 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
750#else
751 goto out2;
752#endif
753 }
754
755 dst_release(&rt->u.dst);
756 rt = nrt ? : &ip6_null_entry;
757
758 dst_hold(&rt->u.dst);
759 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700760 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700761 if (!err)
762 goto out2;
763 }
764
765 if (--attempts <= 0)
766 goto out2;
767
768 /*
769 * Race condition! In the gap, when table->tb6_lock was
770 * released someone could insert this route. Relookup.
771 */
772 dst_release(&rt->u.dst);
773 goto relookup;
774
775out:
776 if (reachable) {
777 reachable = 0;
778 goto restart_2;
779 }
780 dst_hold(&rt->u.dst);
781 read_unlock_bh(&table->tb6_lock);
782out2:
783 rt->u.dst.lastuse = jiffies;
784 rt->u.dst.__use++;
785 return rt;
786}
787
788struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
789{
790 int flags = 0;
791
792 if (rt6_need_strict(&fl->fl6_dst))
793 flags |= RT6_F_STRICT;
794
795 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796}
797
798
799/*
800 * Destination cache support functions
801 */
802
803static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
804{
805 struct rt6_info *rt;
806
807 rt = (struct rt6_info *) dst;
808
809 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
810 return dst;
811
812 return NULL;
813}
814
815static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
816{
817 struct rt6_info *rt = (struct rt6_info *) dst;
818
819 if (rt) {
820 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700821 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822 else
823 dst_release(dst);
824 }
825 return NULL;
826}
827
828static void ip6_link_failure(struct sk_buff *skb)
829{
830 struct rt6_info *rt;
831
832 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
833
834 rt = (struct rt6_info *) skb->dst;
835 if (rt) {
836 if (rt->rt6i_flags&RTF_CACHE) {
837 dst_set_expires(&rt->u.dst, 0);
838 rt->rt6i_flags |= RTF_EXPIRES;
839 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
840 rt->rt6i_node->fn_sernum = -1;
841 }
842}
843
844static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
845{
846 struct rt6_info *rt6 = (struct rt6_info*)dst;
847
848 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
849 rt6->rt6i_flags |= RTF_MODIFIED;
850 if (mtu < IPV6_MIN_MTU) {
851 mtu = IPV6_MIN_MTU;
852 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
853 }
854 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700855 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 }
857}
858
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859static int ipv6_get_mtu(struct net_device *dev);
860
861static inline unsigned int ipv6_advmss(unsigned int mtu)
862{
863 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
864
865 if (mtu < ip6_rt_min_advmss)
866 mtu = ip6_rt_min_advmss;
867
868 /*
869 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
870 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
871 * IPV6_MAXPLEN is also valid and means: "any MSS,
872 * rely only on pmtu discovery"
873 */
874 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
875 mtu = IPV6_MAXPLEN;
876 return mtu;
877}
878
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700879static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700880static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700881
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
883 struct neighbour *neigh,
884 struct in6_addr *addr,
885 int (*output)(struct sk_buff *))
886{
887 struct rt6_info *rt;
888 struct inet6_dev *idev = in6_dev_get(dev);
889
890 if (unlikely(idev == NULL))
891 return NULL;
892
893 rt = ip6_dst_alloc();
894 if (unlikely(rt == NULL)) {
895 in6_dev_put(idev);
896 goto out;
897 }
898
899 dev_hold(dev);
900 if (neigh)
901 neigh_hold(neigh);
902 else
903 neigh = ndisc_get_neigh(dev, addr);
904
905 rt->rt6i_dev = dev;
906 rt->rt6i_idev = idev;
907 rt->rt6i_nexthop = neigh;
908 atomic_set(&rt->u.dst.__refcnt, 1);
909 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
910 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
911 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
912 rt->u.dst.output = output;
913
914#if 0 /* there's no chance to use these for ndisc */
915 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
916 ? DST_HOST
917 : 0;
918 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
919 rt->rt6i_dst.plen = 128;
920#endif
921
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700922 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 rt->u.dst.next = ndisc_dst_gc_list;
924 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700925 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926
927 fib6_force_start_gc();
928
929out:
930 return (struct dst_entry *)rt;
931}
932
933int ndisc_dst_gc(int *more)
934{
935 struct dst_entry *dst, *next, **pprev;
936 int freed;
937
938 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700939 freed = 0;
940
941 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700943
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 while ((dst = *pprev) != NULL) {
945 if (!atomic_read(&dst->__refcnt)) {
946 *pprev = dst->next;
947 dst_free(dst);
948 freed++;
949 } else {
950 pprev = &dst->next;
951 (*more)++;
952 }
953 }
954
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700955 spin_unlock_bh(&ndisc_lock);
956
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 return freed;
958}
959
960static int ip6_dst_gc(void)
961{
962 static unsigned expire = 30*HZ;
963 static unsigned long last_gc;
964 unsigned long now = jiffies;
965
966 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
967 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
968 goto out;
969
970 expire++;
971 fib6_run_gc(expire);
972 last_gc = now;
973 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
974 expire = ip6_rt_gc_timeout>>1;
975
976out:
977 expire -= expire>>ip6_rt_gc_elasticity;
978 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
979}
980
981/* Clean host part of a prefix. Not necessary in radix tree,
982 but results in cleaner routing tables.
983
984 Remove it only when all the things will work!
985 */
986
987static int ipv6_get_mtu(struct net_device *dev)
988{
989 int mtu = IPV6_MIN_MTU;
990 struct inet6_dev *idev;
991
992 idev = in6_dev_get(dev);
993 if (idev) {
994 mtu = idev->cnf.mtu6;
995 in6_dev_put(idev);
996 }
997 return mtu;
998}
999
1000int ipv6_get_hoplimit(struct net_device *dev)
1001{
1002 int hoplimit = ipv6_devconf.hop_limit;
1003 struct inet6_dev *idev;
1004
1005 idev = in6_dev_get(dev);
1006 if (idev) {
1007 hoplimit = idev->cnf.hop_limit;
1008 in6_dev_put(idev);
1009 }
1010 return hoplimit;
1011}
1012
1013/*
1014 *
1015 */
1016
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001017int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
Thomas Grafc71099a2006-08-04 23:20:06 -07001018 void *_rtattr, struct netlink_skb_parms *req,
1019 u32 table_id)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020{
1021 int err;
1022 struct rtmsg *r;
1023 struct rtattr **rta;
1024 struct rt6_info *rt = NULL;
1025 struct net_device *dev = NULL;
1026 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001027 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 int addr_type;
1029
1030 rta = (struct rtattr **) _rtattr;
1031
1032 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
1033 return -EINVAL;
1034#ifndef CONFIG_IPV6_SUBTREES
1035 if (rtmsg->rtmsg_src_len)
1036 return -EINVAL;
1037#endif
1038 if (rtmsg->rtmsg_ifindex) {
1039 err = -ENODEV;
1040 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
1041 if (!dev)
1042 goto out;
1043 idev = in6_dev_get(dev);
1044 if (!idev)
1045 goto out;
1046 }
1047
1048 if (rtmsg->rtmsg_metric == 0)
1049 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
1050
Thomas Grafc71099a2006-08-04 23:20:06 -07001051 table = fib6_new_table(table_id);
1052 if (table == NULL) {
1053 err = -ENOBUFS;
1054 goto out;
1055 }
1056
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 rt = ip6_dst_alloc();
1058
1059 if (rt == NULL) {
1060 err = -ENOMEM;
1061 goto out;
1062 }
1063
1064 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki3dd4bc62005-12-19 14:02:45 -08001065 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066 if (nlh && (r = NLMSG_DATA(nlh))) {
1067 rt->rt6i_protocol = r->rtm_protocol;
1068 } else {
1069 rt->rt6i_protocol = RTPROT_BOOT;
1070 }
1071
1072 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
1073
1074 if (addr_type & IPV6_ADDR_MULTICAST)
1075 rt->u.dst.input = ip6_mc_input;
1076 else
1077 rt->u.dst.input = ip6_forward;
1078
1079 rt->u.dst.output = ip6_output;
1080
1081 ipv6_addr_prefix(&rt->rt6i_dst.addr,
1082 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
1083 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
1084 if (rt->rt6i_dst.plen == 128)
1085 rt->u.dst.flags = DST_HOST;
1086
1087#ifdef CONFIG_IPV6_SUBTREES
1088 ipv6_addr_prefix(&rt->rt6i_src.addr,
1089 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1090 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
1091#endif
1092
1093 rt->rt6i_metric = rtmsg->rtmsg_metric;
1094
1095 /* We cannot add true routes via loopback here,
1096 they would result in kernel looping; promote them to reject routes
1097 */
1098 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
1099 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1100 /* hold loopback dev/idev if we haven't done so. */
1101 if (dev != &loopback_dev) {
1102 if (dev) {
1103 dev_put(dev);
1104 in6_dev_put(idev);
1105 }
1106 dev = &loopback_dev;
1107 dev_hold(dev);
1108 idev = in6_dev_get(dev);
1109 if (!idev) {
1110 err = -ENODEV;
1111 goto out;
1112 }
1113 }
1114 rt->u.dst.output = ip6_pkt_discard_out;
1115 rt->u.dst.input = ip6_pkt_discard;
1116 rt->u.dst.error = -ENETUNREACH;
1117 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1118 goto install_route;
1119 }
1120
1121 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
1122 struct in6_addr *gw_addr;
1123 int gwa_type;
1124
1125 gw_addr = &rtmsg->rtmsg_gateway;
1126 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1127 gwa_type = ipv6_addr_type(gw_addr);
1128
1129 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1130 struct rt6_info *grt;
1131
1132 /* IPv6 strictly inhibits using not link-local
1133 addresses as nexthop address.
1134 Otherwise, router will not able to send redirects.
1135 It is very good, but in some (rare!) circumstances
1136 (SIT, PtP, NBMA NOARP links) it is handy to allow
1137 some exceptions. --ANK
1138 */
1139 err = -EINVAL;
1140 if (!(gwa_type&IPV6_ADDR_UNICAST))
1141 goto out;
1142
1143 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1144
1145 err = -EHOSTUNREACH;
1146 if (grt == NULL)
1147 goto out;
1148 if (dev) {
1149 if (dev != grt->rt6i_dev) {
1150 dst_release(&grt->u.dst);
1151 goto out;
1152 }
1153 } else {
1154 dev = grt->rt6i_dev;
1155 idev = grt->rt6i_idev;
1156 dev_hold(dev);
1157 in6_dev_hold(grt->rt6i_idev);
1158 }
1159 if (!(grt->rt6i_flags&RTF_GATEWAY))
1160 err = 0;
1161 dst_release(&grt->u.dst);
1162
1163 if (err)
1164 goto out;
1165 }
1166 err = -EINVAL;
1167 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1168 goto out;
1169 }
1170
1171 err = -ENODEV;
1172 if (dev == NULL)
1173 goto out;
1174
1175 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1176 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1177 if (IS_ERR(rt->rt6i_nexthop)) {
1178 err = PTR_ERR(rt->rt6i_nexthop);
1179 rt->rt6i_nexthop = NULL;
1180 goto out;
1181 }
1182 }
1183
1184 rt->rt6i_flags = rtmsg->rtmsg_flags;
1185
1186install_route:
1187 if (rta && rta[RTA_METRICS-1]) {
1188 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1189 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1190
1191 while (RTA_OK(attr, attrlen)) {
1192 unsigned flavor = attr->rta_type;
1193 if (flavor) {
1194 if (flavor > RTAX_MAX) {
1195 err = -EINVAL;
1196 goto out;
1197 }
1198 rt->u.dst.metrics[flavor-1] =
1199 *(u32 *)RTA_DATA(attr);
1200 }
1201 attr = RTA_NEXT(attr, attrlen);
1202 }
1203 }
1204
1205 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1206 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1207 if (!rt->u.dst.metrics[RTAX_MTU-1])
1208 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1209 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1210 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1211 rt->u.dst.dev = dev;
1212 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001213 rt->rt6i_table = table;
Thomas Graf40e22e82006-08-22 00:00:45 -07001214 return __ip6_ins_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215
1216out:
1217 if (dev)
1218 dev_put(dev);
1219 if (idev)
1220 in6_dev_put(idev);
1221 if (rt)
1222 dst_free((struct dst_entry *) rt);
1223 return err;
1224}
1225
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001226static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
1227 void *_rtattr, struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228{
1229 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001230 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
Patrick McHardy6c813a72006-08-06 22:22:47 -07001232 if (rt == &ip6_null_entry)
1233 return -ENOENT;
1234
Thomas Grafc71099a2006-08-04 23:20:06 -07001235 table = rt->rt6i_table;
1236 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001238 err = fib6_del(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 dst_release(&rt->u.dst);
1240
Thomas Grafc71099a2006-08-04 23:20:06 -07001241 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242
1243 return err;
1244}
1245
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001246int ip6_del_rt(struct rt6_info *rt)
1247{
1248 return __ip6_del_rt(rt, NULL, NULL, NULL);
1249}
1250
Thomas Grafc71099a2006-08-04 23:20:06 -07001251static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1252 void *_rtattr, struct netlink_skb_parms *req,
1253 u32 table_id)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254{
Thomas Grafc71099a2006-08-04 23:20:06 -07001255 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 struct fib6_node *fn;
1257 struct rt6_info *rt;
1258 int err = -ESRCH;
1259
Thomas Grafc71099a2006-08-04 23:20:06 -07001260 table = fib6_get_table(table_id);
1261 if (table == NULL)
1262 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263
Thomas Grafc71099a2006-08-04 23:20:06 -07001264 read_lock_bh(&table->tb6_lock);
1265
1266 fn = fib6_locate(&table->tb6_root,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1268 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1269
1270 if (fn) {
1271 for (rt = fn->leaf; rt; rt = rt->u.next) {
1272 if (rtmsg->rtmsg_ifindex &&
1273 (rt->rt6i_dev == NULL ||
1274 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1275 continue;
1276 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1277 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1278 continue;
1279 if (rtmsg->rtmsg_metric &&
1280 rtmsg->rtmsg_metric != rt->rt6i_metric)
1281 continue;
1282 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001283 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001285 return __ip6_del_rt(rt, nlh, _rtattr, req);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 }
1287 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001288 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
1290 return err;
1291}
1292
1293/*
1294 * Handle redirects
1295 */
1296void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1297 struct neighbour *neigh, u8 *lladdr, int on_link)
1298{
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001299 struct rt6_info *rt, *nrt = NULL;
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001300 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001301 struct fib6_table *table;
Tom Tucker8d717402006-07-30 20:43:36 -07001302 struct netevent_redirect netevent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
Thomas Grafc71099a2006-08-04 23:20:06 -07001304 /* TODO: Very lazy, might need to check all tables */
1305 table = fib6_get_table(RT6_TABLE_MAIN);
1306 if (table == NULL)
1307 return;
1308
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001310 * Get the "current" route for this destination and
1311 * check if the redirect has come from approriate router.
1312 *
1313 * RFC 2461 specifies that redirects should only be
1314 * accepted if they come from the nexthop to the target.
1315 * Due to the way the routes are chosen, this notion
1316 * is a bit fuzzy and one might need to check all possible
1317 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319
Thomas Grafc71099a2006-08-04 23:20:06 -07001320 read_lock_bh(&table->tb6_lock);
1321 fn = fib6_lookup(&table->tb6_root, dest, NULL);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001322restart:
1323 for (rt = fn->leaf; rt; rt = rt->u.next) {
1324 /*
1325 * Current route is on-link; redirect is always invalid.
1326 *
1327 * Seems, previous statement is not true. It could
1328 * be node, which looks for us as on-link (f.e. proxy ndisc)
1329 * But then router serving it might decide, that we should
1330 * know truth 8)8) --ANK (980726).
1331 */
1332 if (rt6_check_expired(rt))
1333 continue;
1334 if (!(rt->rt6i_flags & RTF_GATEWAY))
1335 continue;
1336 if (neigh->dev != rt->rt6i_dev)
1337 continue;
1338 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1339 continue;
1340 break;
1341 }
1342 if (rt)
1343 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001344 else if (rt6_need_strict(dest)) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001345 while ((fn = fn->parent) != NULL) {
1346 if (fn->fn_flags & RTN_ROOT)
1347 break;
1348 if (fn->fn_flags & RTN_RTINFO)
1349 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001351 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001352 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001353
1354 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 if (net_ratelimit())
1356 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1357 "for redirect target\n");
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001358 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 }
1360
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 /*
1362 * We have finally decided to accept it.
1363 */
1364
1365 neigh_update(neigh, lladdr, NUD_STALE,
1366 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1367 NEIGH_UPDATE_F_OVERRIDE|
1368 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1369 NEIGH_UPDATE_F_ISROUTER))
1370 );
1371
1372 /*
1373 * Redirect received -> path was valid.
1374 * Look, redirects are sent only in response to data packets,
1375 * so that this nexthop apparently is reachable. --ANK
1376 */
1377 dst_confirm(&rt->u.dst);
1378
1379 /* Duplicate redirect: silently ignore. */
1380 if (neigh == rt->u.dst.neighbour)
1381 goto out;
1382
1383 nrt = ip6_rt_copy(rt);
1384 if (nrt == NULL)
1385 goto out;
1386
1387 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1388 if (on_link)
1389 nrt->rt6i_flags &= ~RTF_GATEWAY;
1390
1391 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1392 nrt->rt6i_dst.plen = 128;
1393 nrt->u.dst.flags |= DST_HOST;
1394
1395 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1396 nrt->rt6i_nexthop = neigh_clone(neigh);
1397 /* Reset pmtu, it may be better */
1398 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1399 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1400
Thomas Graf40e22e82006-08-22 00:00:45 -07001401 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 goto out;
1403
Tom Tucker8d717402006-07-30 20:43:36 -07001404 netevent.old = &rt->u.dst;
1405 netevent.new = &nrt->u.dst;
1406 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1407
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001409 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 return;
1411 }
1412
1413out:
1414 dst_release(&rt->u.dst);
1415 return;
1416}
1417
1418/*
1419 * Handle ICMP "packet too big" messages
1420 * i.e. Path MTU discovery
1421 */
1422
1423void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1424 struct net_device *dev, u32 pmtu)
1425{
1426 struct rt6_info *rt, *nrt;
1427 int allfrag = 0;
1428
1429 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1430 if (rt == NULL)
1431 return;
1432
1433 if (pmtu >= dst_mtu(&rt->u.dst))
1434 goto out;
1435
1436 if (pmtu < IPV6_MIN_MTU) {
1437 /*
1438 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1439 * MTU (1280) and a fragment header should always be included
1440 * after a node receiving Too Big message reporting PMTU is
1441 * less than the IPv6 Minimum Link MTU.
1442 */
1443 pmtu = IPV6_MIN_MTU;
1444 allfrag = 1;
1445 }
1446
1447 /* New mtu received -> path was valid.
1448 They are sent only in response to data packets,
1449 so that this nexthop apparently is reachable. --ANK
1450 */
1451 dst_confirm(&rt->u.dst);
1452
1453 /* Host route. If it is static, it would be better
1454 not to override it, but add new one, so that
1455 when cache entry will expire old pmtu
1456 would return automatically.
1457 */
1458 if (rt->rt6i_flags & RTF_CACHE) {
1459 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1460 if (allfrag)
1461 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1462 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1463 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1464 goto out;
1465 }
1466
1467 /* Network route.
1468 Two cases are possible:
1469 1. It is connected route. Action: COW
1470 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1471 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001472 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001473 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001474 else
1475 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001476
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001477 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001478 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1479 if (allfrag)
1480 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1481
1482 /* According to RFC 1981, detecting PMTU increase shouldn't be
1483 * happened within 5 mins, the recommended timer is 10 mins.
1484 * Here this route expiration time is set to ip6_rt_mtu_expires
1485 * which is 10 mins. After 10 mins the decreased pmtu is expired
1486 * and detecting PMTU increase will be automatically happened.
1487 */
1488 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1489 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1490
Thomas Graf40e22e82006-08-22 00:00:45 -07001491 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493out:
1494 dst_release(&rt->u.dst);
1495}
1496
1497/*
1498 * Misc support functions
1499 */
1500
1501static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1502{
1503 struct rt6_info *rt = ip6_dst_alloc();
1504
1505 if (rt) {
1506 rt->u.dst.input = ort->u.dst.input;
1507 rt->u.dst.output = ort->u.dst.output;
1508
1509 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1510 rt->u.dst.dev = ort->u.dst.dev;
1511 if (rt->u.dst.dev)
1512 dev_hold(rt->u.dst.dev);
1513 rt->rt6i_idev = ort->rt6i_idev;
1514 if (rt->rt6i_idev)
1515 in6_dev_hold(rt->rt6i_idev);
1516 rt->u.dst.lastuse = jiffies;
1517 rt->rt6i_expires = 0;
1518
1519 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1520 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1521 rt->rt6i_metric = 0;
1522
1523 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1524#ifdef CONFIG_IPV6_SUBTREES
1525 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1526#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001527 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 }
1529 return rt;
1530}
1531
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001532#ifdef CONFIG_IPV6_ROUTE_INFO
1533static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1534 struct in6_addr *gwaddr, int ifindex)
1535{
1536 struct fib6_node *fn;
1537 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001538 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001539
Thomas Grafc71099a2006-08-04 23:20:06 -07001540 table = fib6_get_table(RT6_TABLE_INFO);
1541 if (table == NULL)
1542 return NULL;
1543
1544 write_lock_bh(&table->tb6_lock);
1545 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001546 if (!fn)
1547 goto out;
1548
1549 for (rt = fn->leaf; rt; rt = rt->u.next) {
1550 if (rt->rt6i_dev->ifindex != ifindex)
1551 continue;
1552 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1553 continue;
1554 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1555 continue;
1556 dst_hold(&rt->u.dst);
1557 break;
1558 }
1559out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001560 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001561 return rt;
1562}
1563
1564static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1565 struct in6_addr *gwaddr, int ifindex,
1566 unsigned pref)
1567{
1568 struct in6_rtmsg rtmsg;
1569
1570 memset(&rtmsg, 0, sizeof(rtmsg));
1571 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1572 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1573 rtmsg.rtmsg_dst_len = prefixlen;
1574 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1575 rtmsg.rtmsg_metric = 1024;
1576 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001577 /* We should treat it as a default route if prefix length is 0. */
1578 if (!prefixlen)
1579 rtmsg.rtmsg_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001580 rtmsg.rtmsg_ifindex = ifindex;
1581
Thomas Grafc71099a2006-08-04 23:20:06 -07001582 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001583
1584 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1585}
1586#endif
1587
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1589{
1590 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001591 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592
Thomas Grafc71099a2006-08-04 23:20:06 -07001593 table = fib6_get_table(RT6_TABLE_DFLT);
1594 if (table == NULL)
1595 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596
Thomas Grafc71099a2006-08-04 23:20:06 -07001597 write_lock_bh(&table->tb6_lock);
1598 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001600 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1602 break;
1603 }
1604 if (rt)
1605 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001606 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 return rt;
1608}
1609
1610struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001611 struct net_device *dev,
1612 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613{
1614 struct in6_rtmsg rtmsg;
1615
1616 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1617 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1618 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1619 rtmsg.rtmsg_metric = 1024;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001620 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1621 RTF_PREF(pref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622
1623 rtmsg.rtmsg_ifindex = dev->ifindex;
1624
Thomas Grafc71099a2006-08-04 23:20:06 -07001625 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 return rt6_get_dflt_router(gwaddr, dev);
1627}
1628
1629void rt6_purge_dflt_routers(void)
1630{
1631 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001632 struct fib6_table *table;
1633
1634 /* NOTE: Keep consistent with rt6_get_dflt_router */
1635 table = fib6_get_table(RT6_TABLE_DFLT);
1636 if (table == NULL)
1637 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638
1639restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001640 read_lock_bh(&table->tb6_lock);
1641 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1643 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001644 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001645 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 goto restart;
1647 }
1648 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001649 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650}
1651
1652int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1653{
1654 struct in6_rtmsg rtmsg;
1655 int err;
1656
1657 switch(cmd) {
1658 case SIOCADDRT: /* Add a route */
1659 case SIOCDELRT: /* Delete a route */
1660 if (!capable(CAP_NET_ADMIN))
1661 return -EPERM;
1662 err = copy_from_user(&rtmsg, arg,
1663 sizeof(struct in6_rtmsg));
1664 if (err)
1665 return -EFAULT;
1666
1667 rtnl_lock();
1668 switch (cmd) {
1669 case SIOCADDRT:
Thomas Grafc71099a2006-08-04 23:20:06 -07001670 err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
1671 RT6_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 break;
1673 case SIOCDELRT:
Thomas Grafc71099a2006-08-04 23:20:06 -07001674 err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
1675 RT6_TABLE_MAIN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 break;
1677 default:
1678 err = -EINVAL;
1679 }
1680 rtnl_unlock();
1681
1682 return err;
1683 };
1684
1685 return -EINVAL;
1686}
1687
1688/*
1689 * Drop the packet on the floor
1690 */
1691
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001692static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693{
Lv Liangying76d0cc12006-08-29 00:00:47 -07001694 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1695 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1696 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1697
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1699 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1700 kfree_skb(skb);
1701 return 0;
1702}
1703
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001704static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705{
1706 skb->dev = skb->dst->dev;
1707 return ip6_pkt_discard(skb);
1708}
1709
1710/*
1711 * Allocate a dst for local (unicast / anycast) address.
1712 */
1713
1714struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1715 const struct in6_addr *addr,
1716 int anycast)
1717{
1718 struct rt6_info *rt = ip6_dst_alloc();
1719
1720 if (rt == NULL)
1721 return ERR_PTR(-ENOMEM);
1722
1723 dev_hold(&loopback_dev);
1724 in6_dev_hold(idev);
1725
1726 rt->u.dst.flags = DST_HOST;
1727 rt->u.dst.input = ip6_input;
1728 rt->u.dst.output = ip6_output;
1729 rt->rt6i_dev = &loopback_dev;
1730 rt->rt6i_idev = idev;
1731 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1732 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1733 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1734 rt->u.dst.obsolete = -1;
1735
1736 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001737 if (anycast)
1738 rt->rt6i_flags |= RTF_ANYCAST;
1739 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 rt->rt6i_flags |= RTF_LOCAL;
1741 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1742 if (rt->rt6i_nexthop == NULL) {
1743 dst_free((struct dst_entry *) rt);
1744 return ERR_PTR(-ENOMEM);
1745 }
1746
1747 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1748 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001749 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750
1751 atomic_set(&rt->u.dst.__refcnt, 1);
1752
1753 return rt;
1754}
1755
1756static int fib6_ifdown(struct rt6_info *rt, void *arg)
1757{
1758 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1759 rt != &ip6_null_entry) {
1760 RT6_TRACE("deleted by ifdown %p\n", rt);
1761 return -1;
1762 }
1763 return 0;
1764}
1765
1766void rt6_ifdown(struct net_device *dev)
1767{
Thomas Grafc71099a2006-08-04 23:20:06 -07001768 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769}
1770
1771struct rt6_mtu_change_arg
1772{
1773 struct net_device *dev;
1774 unsigned mtu;
1775};
1776
1777static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1778{
1779 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1780 struct inet6_dev *idev;
1781
1782 /* In IPv6 pmtu discovery is not optional,
1783 so that RTAX_MTU lock cannot disable it.
1784 We still use this lock to block changes
1785 caused by addrconf/ndisc.
1786 */
1787
1788 idev = __in6_dev_get(arg->dev);
1789 if (idev == NULL)
1790 return 0;
1791
1792 /* For administrative MTU increase, there is no way to discover
1793 IPv6 PMTU increase, so PMTU increase should be updated here.
1794 Since RFC 1981 doesn't include administrative MTU increase
1795 update PMTU increase is a MUST. (i.e. jumbo frame)
1796 */
1797 /*
1798 If new MTU is less than route PMTU, this new MTU will be the
1799 lowest MTU in the path, update the route PMTU to reflect PMTU
1800 decreases; if new MTU is greater than route PMTU, and the
1801 old MTU is the lowest MTU in the path, update the route PMTU
1802 to reflect the increase. In this case if the other nodes' MTU
1803 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1804 PMTU discouvery.
1805 */
1806 if (rt->rt6i_dev == arg->dev &&
1807 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1808 (dst_mtu(&rt->u.dst) > arg->mtu ||
1809 (dst_mtu(&rt->u.dst) < arg->mtu &&
1810 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1811 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1812 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1813 return 0;
1814}
1815
1816void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1817{
Thomas Grafc71099a2006-08-04 23:20:06 -07001818 struct rt6_mtu_change_arg arg = {
1819 .dev = dev,
1820 .mtu = mtu,
1821 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822
Thomas Grafc71099a2006-08-04 23:20:06 -07001823 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824}
1825
1826static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1827 struct in6_rtmsg *rtmsg)
1828{
1829 memset(rtmsg, 0, sizeof(*rtmsg));
1830
1831 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1832 rtmsg->rtmsg_src_len = r->rtm_src_len;
1833 rtmsg->rtmsg_flags = RTF_UP;
1834 if (r->rtm_type == RTN_UNREACHABLE)
1835 rtmsg->rtmsg_flags |= RTF_REJECT;
1836
1837 if (rta[RTA_GATEWAY-1]) {
1838 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1839 return -EINVAL;
1840 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1841 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1842 }
1843 if (rta[RTA_DST-1]) {
1844 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1845 return -EINVAL;
1846 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1847 }
1848 if (rta[RTA_SRC-1]) {
1849 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1850 return -EINVAL;
1851 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1852 }
1853 if (rta[RTA_OIF-1]) {
1854 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1855 return -EINVAL;
1856 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1857 }
1858 if (rta[RTA_PRIORITY-1]) {
1859 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1860 return -EINVAL;
1861 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1862 }
1863 return 0;
1864}
1865
1866int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1867{
1868 struct rtmsg *r = NLMSG_DATA(nlh);
1869 struct in6_rtmsg rtmsg;
1870
1871 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1872 return -EINVAL;
Patrick McHardy9e762a42006-08-10 23:09:48 -07001873 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb),
1874 rtm_get_table(arg, r->rtm_table));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875}
1876
1877int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1878{
1879 struct rtmsg *r = NLMSG_DATA(nlh);
1880 struct in6_rtmsg rtmsg;
1881
1882 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1883 return -EINVAL;
Patrick McHardy9e762a42006-08-10 23:09:48 -07001884 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb),
1885 rtm_get_table(arg, r->rtm_table));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886}
1887
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001889 struct in6_addr *dst, struct in6_addr *src,
1890 int iif, int type, u32 pid, u32 seq,
1891 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892{
1893 struct rtmsg *rtm;
1894 struct nlmsghdr *nlh;
1895 unsigned char *b = skb->tail;
1896 struct rta_cacheinfo ci;
Patrick McHardy9e762a42006-08-10 23:09:48 -07001897 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
1899 if (prefix) { /* user wants prefix routes only */
1900 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1901 /* success since this is not a prefix route */
1902 return 1;
1903 }
1904 }
1905
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001906 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907 rtm = NLMSG_DATA(nlh);
1908 rtm->rtm_family = AF_INET6;
1909 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1910 rtm->rtm_src_len = rt->rt6i_src.plen;
1911 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07001912 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07001913 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07001914 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07001915 table = RT6_TABLE_UNSPEC;
1916 rtm->rtm_table = table;
1917 RTA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 if (rt->rt6i_flags&RTF_REJECT)
1919 rtm->rtm_type = RTN_UNREACHABLE;
1920 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1921 rtm->rtm_type = RTN_LOCAL;
1922 else
1923 rtm->rtm_type = RTN_UNICAST;
1924 rtm->rtm_flags = 0;
1925 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1926 rtm->rtm_protocol = rt->rt6i_protocol;
1927 if (rt->rt6i_flags&RTF_DYNAMIC)
1928 rtm->rtm_protocol = RTPROT_REDIRECT;
1929 else if (rt->rt6i_flags & RTF_ADDRCONF)
1930 rtm->rtm_protocol = RTPROT_KERNEL;
1931 else if (rt->rt6i_flags&RTF_DEFAULT)
1932 rtm->rtm_protocol = RTPROT_RA;
1933
1934 if (rt->rt6i_flags&RTF_CACHE)
1935 rtm->rtm_flags |= RTM_F_CLONED;
1936
1937 if (dst) {
1938 RTA_PUT(skb, RTA_DST, 16, dst);
1939 rtm->rtm_dst_len = 128;
1940 } else if (rtm->rtm_dst_len)
1941 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1942#ifdef CONFIG_IPV6_SUBTREES
1943 if (src) {
1944 RTA_PUT(skb, RTA_SRC, 16, src);
1945 rtm->rtm_src_len = 128;
1946 } else if (rtm->rtm_src_len)
1947 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1948#endif
1949 if (iif)
1950 RTA_PUT(skb, RTA_IIF, 4, &iif);
1951 else if (dst) {
1952 struct in6_addr saddr_buf;
1953 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1954 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1955 }
1956 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1957 goto rtattr_failure;
1958 if (rt->u.dst.neighbour)
1959 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1960 if (rt->u.dst.dev)
1961 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1962 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1963 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1964 if (rt->rt6i_expires)
1965 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1966 else
1967 ci.rta_expires = 0;
1968 ci.rta_used = rt->u.dst.__use;
1969 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1970 ci.rta_error = rt->u.dst.error;
1971 ci.rta_id = 0;
1972 ci.rta_ts = 0;
1973 ci.rta_tsage = 0;
1974 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1975 nlh->nlmsg_len = skb->tail - b;
1976 return skb->len;
1977
1978nlmsg_failure:
1979rtattr_failure:
1980 skb_trim(skb, b - skb->data);
1981 return -1;
1982}
1983
Patrick McHardy1b43af52006-08-10 23:11:17 -07001984int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985{
1986 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1987 int prefix;
1988
1989 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1990 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1991 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1992 } else
1993 prefix = 0;
1994
1995 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1996 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001997 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998}
1999
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2001{
2002 struct rtattr **rta = arg;
2003 int iif = 0;
2004 int err = -ENOBUFS;
2005 struct sk_buff *skb;
2006 struct flowi fl;
2007 struct rt6_info *rt;
2008
2009 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2010 if (skb == NULL)
2011 goto out;
2012
2013 /* Reserve room for dummy headers, this skb can pass
2014 through good chunk of routing engine.
2015 */
2016 skb->mac.raw = skb->data;
2017 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2018
2019 memset(&fl, 0, sizeof(fl));
2020 if (rta[RTA_SRC-1])
2021 ipv6_addr_copy(&fl.fl6_src,
2022 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
2023 if (rta[RTA_DST-1])
2024 ipv6_addr_copy(&fl.fl6_dst,
2025 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
2026
2027 if (rta[RTA_IIF-1])
2028 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
2029
2030 if (iif) {
2031 struct net_device *dev;
2032 dev = __dev_get_by_index(iif);
2033 if (!dev) {
2034 err = -ENODEV;
2035 goto out_free;
2036 }
2037 }
2038
2039 fl.oif = 0;
2040 if (rta[RTA_OIF-1])
2041 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
2042
2043 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
2044
2045 skb->dst = &rt->u.dst;
2046
2047 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2048 err = rt6_fill_node(skb, rt,
2049 &fl.fl6_dst, &fl.fl6_src,
2050 iif,
2051 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002052 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 if (err < 0) {
2054 err = -EMSGSIZE;
2055 goto out_free;
2056 }
2057
Thomas Graf2942e902006-08-15 00:30:25 -07002058 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059out:
2060 return err;
2061out_free:
2062 kfree_skb(skb);
2063 goto out;
2064}
2065
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002066void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
2067 struct netlink_skb_parms *req)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068{
2069 struct sk_buff *skb;
Thomas Graf21713eb2006-08-15 00:35:24 -07002070 u32 pid = req ? req->pid : 0;
2071 u32 seq = nlh ? nlh->nlmsg_seq : 0;
2072 int payload = sizeof(struct rtmsg) + 256;
2073 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074
Thomas Graf21713eb2006-08-15 00:35:24 -07002075 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2076 if (skb == NULL)
2077 goto errout;
2078
2079 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2080 if (err < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081 kfree_skb(skb);
Thomas Graf21713eb2006-08-15 00:35:24 -07002082 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002084
2085 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2086errout:
2087 if (err < 0)
2088 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089}
2090
2091/*
2092 * /proc
2093 */
2094
2095#ifdef CONFIG_PROC_FS
2096
2097#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2098
2099struct rt6_proc_arg
2100{
2101 char *buffer;
2102 int offset;
2103 int length;
2104 int skip;
2105 int len;
2106};
2107
2108static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2109{
2110 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2111 int i;
2112
2113 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2114 arg->skip++;
2115 return 0;
2116 }
2117
2118 if (arg->len >= arg->length)
2119 return 0;
2120
2121 for (i=0; i<16; i++) {
2122 sprintf(arg->buffer + arg->len, "%02x",
2123 rt->rt6i_dst.addr.s6_addr[i]);
2124 arg->len += 2;
2125 }
2126 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2127 rt->rt6i_dst.plen);
2128
2129#ifdef CONFIG_IPV6_SUBTREES
2130 for (i=0; i<16; i++) {
2131 sprintf(arg->buffer + arg->len, "%02x",
2132 rt->rt6i_src.addr.s6_addr[i]);
2133 arg->len += 2;
2134 }
2135 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2136 rt->rt6i_src.plen);
2137#else
2138 sprintf(arg->buffer + arg->len,
2139 "00000000000000000000000000000000 00 ");
2140 arg->len += 36;
2141#endif
2142
2143 if (rt->rt6i_nexthop) {
2144 for (i=0; i<16; i++) {
2145 sprintf(arg->buffer + arg->len, "%02x",
2146 rt->rt6i_nexthop->primary_key[i]);
2147 arg->len += 2;
2148 }
2149 } else {
2150 sprintf(arg->buffer + arg->len,
2151 "00000000000000000000000000000000");
2152 arg->len += 32;
2153 }
2154 arg->len += sprintf(arg->buffer + arg->len,
2155 " %08x %08x %08x %08x %8s\n",
2156 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2157 rt->u.dst.__use, rt->rt6i_flags,
2158 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2159 return 0;
2160}
2161
2162static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2163{
Thomas Grafc71099a2006-08-04 23:20:06 -07002164 struct rt6_proc_arg arg = {
2165 .buffer = buffer,
2166 .offset = offset,
2167 .length = length,
2168 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169
Thomas Grafc71099a2006-08-04 23:20:06 -07002170 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171
2172 *start = buffer;
2173 if (offset)
2174 *start += offset % RT6_INFO_LEN;
2175
2176 arg.len -= offset % RT6_INFO_LEN;
2177
2178 if (arg.len > length)
2179 arg.len = length;
2180 if (arg.len < 0)
2181 arg.len = 0;
2182
2183 return arg.len;
2184}
2185
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2187{
2188 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2189 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2190 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2191 rt6_stats.fib_rt_cache,
2192 atomic_read(&ip6_dst_ops.entries),
2193 rt6_stats.fib_discarded_routes);
2194
2195 return 0;
2196}
2197
2198static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2199{
2200 return single_open(file, rt6_stats_seq_show, NULL);
2201}
2202
2203static struct file_operations rt6_stats_seq_fops = {
2204 .owner = THIS_MODULE,
2205 .open = rt6_stats_seq_open,
2206 .read = seq_read,
2207 .llseek = seq_lseek,
2208 .release = single_release,
2209};
2210#endif /* CONFIG_PROC_FS */
2211
2212#ifdef CONFIG_SYSCTL
2213
2214static int flush_delay;
2215
2216static
2217int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2218 void __user *buffer, size_t *lenp, loff_t *ppos)
2219{
2220 if (write) {
2221 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2222 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2223 return 0;
2224 } else
2225 return -EINVAL;
2226}
2227
2228ctl_table ipv6_route_table[] = {
2229 {
2230 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2231 .procname = "flush",
2232 .data = &flush_delay,
2233 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002234 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235 .proc_handler = &ipv6_sysctl_rtcache_flush
2236 },
2237 {
2238 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2239 .procname = "gc_thresh",
2240 .data = &ip6_dst_ops.gc_thresh,
2241 .maxlen = sizeof(int),
2242 .mode = 0644,
2243 .proc_handler = &proc_dointvec,
2244 },
2245 {
2246 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2247 .procname = "max_size",
2248 .data = &ip6_rt_max_size,
2249 .maxlen = sizeof(int),
2250 .mode = 0644,
2251 .proc_handler = &proc_dointvec,
2252 },
2253 {
2254 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2255 .procname = "gc_min_interval",
2256 .data = &ip6_rt_gc_min_interval,
2257 .maxlen = sizeof(int),
2258 .mode = 0644,
2259 .proc_handler = &proc_dointvec_jiffies,
2260 .strategy = &sysctl_jiffies,
2261 },
2262 {
2263 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2264 .procname = "gc_timeout",
2265 .data = &ip6_rt_gc_timeout,
2266 .maxlen = sizeof(int),
2267 .mode = 0644,
2268 .proc_handler = &proc_dointvec_jiffies,
2269 .strategy = &sysctl_jiffies,
2270 },
2271 {
2272 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2273 .procname = "gc_interval",
2274 .data = &ip6_rt_gc_interval,
2275 .maxlen = sizeof(int),
2276 .mode = 0644,
2277 .proc_handler = &proc_dointvec_jiffies,
2278 .strategy = &sysctl_jiffies,
2279 },
2280 {
2281 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2282 .procname = "gc_elasticity",
2283 .data = &ip6_rt_gc_elasticity,
2284 .maxlen = sizeof(int),
2285 .mode = 0644,
2286 .proc_handler = &proc_dointvec_jiffies,
2287 .strategy = &sysctl_jiffies,
2288 },
2289 {
2290 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2291 .procname = "mtu_expires",
2292 .data = &ip6_rt_mtu_expires,
2293 .maxlen = sizeof(int),
2294 .mode = 0644,
2295 .proc_handler = &proc_dointvec_jiffies,
2296 .strategy = &sysctl_jiffies,
2297 },
2298 {
2299 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2300 .procname = "min_adv_mss",
2301 .data = &ip6_rt_min_advmss,
2302 .maxlen = sizeof(int),
2303 .mode = 0644,
2304 .proc_handler = &proc_dointvec_jiffies,
2305 .strategy = &sysctl_jiffies,
2306 },
2307 {
2308 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2309 .procname = "gc_min_interval_ms",
2310 .data = &ip6_rt_gc_min_interval,
2311 .maxlen = sizeof(int),
2312 .mode = 0644,
2313 .proc_handler = &proc_dointvec_ms_jiffies,
2314 .strategy = &sysctl_ms_jiffies,
2315 },
2316 { .ctl_name = 0 }
2317};
2318
2319#endif
2320
2321void __init ip6_route_init(void)
2322{
2323 struct proc_dir_entry *p;
2324
2325 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2326 sizeof(struct rt6_info),
2327 0, SLAB_HWCACHE_ALIGN,
2328 NULL, NULL);
2329 if (!ip6_dst_ops.kmem_cachep)
2330 panic("cannot create ip6_dst_cache");
2331
2332 fib6_init();
2333#ifdef CONFIG_PROC_FS
2334 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2335 if (p)
2336 p->owner = THIS_MODULE;
2337
2338 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2339#endif
2340#ifdef CONFIG_XFRM
2341 xfrm6_init();
2342#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002343#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2344 fib6_rules_init();
2345#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346}
2347
2348void ip6_route_cleanup(void)
2349{
Thomas Graf101367c2006-08-04 03:39:02 -07002350#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2351 fib6_rules_cleanup();
2352#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353#ifdef CONFIG_PROC_FS
2354 proc_net_remove("ipv6_route");
2355 proc_net_remove("rt6_stats");
2356#endif
2357#ifdef CONFIG_XFRM
2358 xfrm6_fini();
2359#endif
2360 rt6_ifdown(NULL);
2361 fib6_gc_cleanup();
2362 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2363}