blob: f4f0c341e5c8999d7828f1eaadf7328e5d42a52a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020047#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070058#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070059#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
67/* Set to 3 to get tracing. */
68#define RT6_DEBUG 2
69
70#if RT6_DEBUG >= 3
71#define RDBG(x) printk x
72#define RT6_TRACE(x...) printk(KERN_DEBUG x)
73#else
74#define RDBG(x)
75#define RT6_TRACE(x...) do { ; } while (0)
76#endif
77
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080078#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
David S. Miller14e50e52007-05-24 18:17:54 -0700123static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
124{
125}
126
127static struct dst_ops ip6_dst_blackhole_ops = {
128 .family = AF_INET6,
129 .protocol = __constant_htons(ETH_P_IPV6),
130 .destroy = ip6_dst_destroy,
131 .check = ip6_dst_check,
132 .update_pmtu = ip6_rt_blackhole_update_pmtu,
133 .entry_size = sizeof(struct rt6_info),
134};
135
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136struct rt6_info ip6_null_entry = {
137 .u = {
138 .dst = {
139 .__refcnt = ATOMIC_INIT(1),
140 .__use = 1,
141 .dev = &loopback_dev,
142 .obsolete = -1,
143 .error = -ENETUNREACH,
144 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
145 .input = ip6_pkt_discard,
146 .output = ip6_pkt_discard_out,
147 .ops = &ip6_dst_ops,
148 .path = (struct dst_entry*)&ip6_null_entry,
149 }
150 },
151 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
152 .rt6i_metric = ~(u32) 0,
153 .rt6i_ref = ATOMIC_INIT(1),
154};
155
Thomas Graf101367c2006-08-04 03:39:02 -0700156#ifdef CONFIG_IPV6_MULTIPLE_TABLES
157
David S. Miller6723ab52006-10-18 21:20:57 -0700158static int ip6_pkt_prohibit(struct sk_buff *skb);
159static int ip6_pkt_prohibit_out(struct sk_buff *skb);
160static int ip6_pkt_blk_hole(struct sk_buff *skb);
161
Thomas Graf101367c2006-08-04 03:39:02 -0700162struct rt6_info ip6_prohibit_entry = {
163 .u = {
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .dev = &loopback_dev,
168 .obsolete = -1,
169 .error = -EACCES,
170 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700171 .input = ip6_pkt_prohibit,
172 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700173 .ops = &ip6_dst_ops,
174 .path = (struct dst_entry*)&ip6_prohibit_entry,
175 }
176 },
177 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
178 .rt6i_metric = ~(u32) 0,
179 .rt6i_ref = ATOMIC_INIT(1),
180};
181
182struct rt6_info ip6_blk_hole_entry = {
183 .u = {
184 .dst = {
185 .__refcnt = ATOMIC_INIT(1),
186 .__use = 1,
187 .dev = &loopback_dev,
188 .obsolete = -1,
189 .error = -EINVAL,
190 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700191 .input = ip6_pkt_blk_hole,
192 .output = ip6_pkt_blk_hole,
Thomas Graf101367c2006-08-04 03:39:02 -0700193 .ops = &ip6_dst_ops,
194 .path = (struct dst_entry*)&ip6_blk_hole_entry,
195 }
196 },
197 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
198 .rt6i_metric = ~(u32) 0,
199 .rt6i_ref = ATOMIC_INIT(1),
200};
201
202#endif
203
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204/* allocate dst with ip6_dst_ops */
205static __inline__ struct rt6_info *ip6_dst_alloc(void)
206{
207 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
208}
209
210static void ip6_dst_destroy(struct dst_entry *dst)
211{
212 struct rt6_info *rt = (struct rt6_info *)dst;
213 struct inet6_dev *idev = rt->rt6i_idev;
214
215 if (idev != NULL) {
216 rt->rt6i_idev = NULL;
217 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900218 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219}
220
221static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
222 int how)
223{
224 struct rt6_info *rt = (struct rt6_info *)dst;
225 struct inet6_dev *idev = rt->rt6i_idev;
226
227 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
228 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
229 if (loopback_idev != NULL) {
230 rt->rt6i_idev = loopback_idev;
231 in6_dev_put(idev);
232 }
233 }
234}
235
236static __inline__ int rt6_check_expired(const struct rt6_info *rt)
237{
238 return (rt->rt6i_flags & RTF_EXPIRES &&
239 time_after(jiffies, rt->rt6i_expires));
240}
241
Thomas Grafc71099a2006-08-04 23:20:06 -0700242static inline int rt6_need_strict(struct in6_addr *daddr)
243{
244 return (ipv6_addr_type(daddr) &
245 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
246}
247
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700249 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 */
251
252static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
253 int oif,
254 int strict)
255{
256 struct rt6_info *local = NULL;
257 struct rt6_info *sprt;
258
259 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800260 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 struct net_device *dev = sprt->rt6i_dev;
262 if (dev->ifindex == oif)
263 return sprt;
264 if (dev->flags & IFF_LOOPBACK) {
265 if (sprt->rt6i_idev == NULL ||
266 sprt->rt6i_idev->dev->ifindex != oif) {
267 if (strict && oif)
268 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900269 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270 local->rt6i_idev->dev->ifindex == oif))
271 continue;
272 }
273 local = sprt;
274 }
275 }
276
277 if (local)
278 return local;
279
280 if (strict)
281 return &ip6_null_entry;
282 }
283 return rt;
284}
285
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800286#ifdef CONFIG_IPV6_ROUTER_PREF
287static void rt6_probe(struct rt6_info *rt)
288{
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290 /*
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
294 *
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
297 */
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800303 struct in6_addr mcaddr;
304 struct in6_addr *target;
305
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
308
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
314}
315#else
316static inline void rt6_probe(struct rt6_info *rt)
317{
318 return;
319}
320#endif
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800323 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700328 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
Dave Jonesb6f99a22007-03-22 12:27:49 -0700336static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800338 struct neighbour *neigh = rt->rt6i_nexthop;
339 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700346 m = 2;
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800347 else if (!(neigh->nud_state & NUD_FAILED))
348 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800349 read_unlock_bh(&neigh->lock);
350 }
351 return m;
352}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354static int rt6_score_route(struct rt6_info *rt, int oif,
355 int strict)
356{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700357 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900358
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700359 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700360 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800361 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800362#ifdef CONFIG_IPV6_ROUTER_PREF
363 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
364#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700365 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800366 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800367 return -1;
368 return m;
369}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370
David S. Millerf11e6652007-03-24 20:36:25 -0700371static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
372 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800373{
David S. Millerf11e6652007-03-24 20:36:25 -0700374 int m;
375
376 if (rt6_check_expired(rt))
377 goto out;
378
379 m = rt6_score_route(rt, oif, strict);
380 if (m < 0)
381 goto out;
382
383 if (m > *mpri) {
384 if (strict & RT6_LOOKUP_F_REACHABLE)
385 rt6_probe(match);
386 *mpri = m;
387 match = rt;
388 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
389 rt6_probe(rt);
390 }
391
392out:
393 return match;
394}
395
396static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
397 struct rt6_info *rr_head,
398 u32 metric, int oif, int strict)
399{
400 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800401 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
David S. Millerf11e6652007-03-24 20:36:25 -0700403 match = NULL;
404 for (rt = rr_head; rt && rt->rt6i_metric == metric;
405 rt = rt->u.dst.rt6_next)
406 match = find_match(rt, oif, strict, &mpri, match);
407 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
408 rt = rt->u.dst.rt6_next)
409 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800410
David S. Millerf11e6652007-03-24 20:36:25 -0700411 return match;
412}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800413
David S. Millerf11e6652007-03-24 20:36:25 -0700414static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
415{
416 struct rt6_info *match, *rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
David S. Millerf11e6652007-03-24 20:36:25 -0700418 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
419 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
David S. Millerf11e6652007-03-24 20:36:25 -0700421 rt0 = fn->rr_ptr;
422 if (!rt0)
423 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
David S. Millerf11e6652007-03-24 20:36:25 -0700425 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800427 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700428 (strict & RT6_LOOKUP_F_REACHABLE)) {
429 struct rt6_info *next = rt0->u.dst.rt6_next;
430
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800431 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700432 if (!next || next->rt6i_metric != rt0->rt6i_metric)
433 next = fn->leaf;
434
435 if (next != rt0)
436 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 }
438
David S. Millerf11e6652007-03-24 20:36:25 -0700439 RT6_TRACE("%s() => %p\n",
440 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800442 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443}
444
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800445#ifdef CONFIG_IPV6_ROUTE_INFO
446int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
447 struct in6_addr *gwaddr)
448{
449 struct route_info *rinfo = (struct route_info *) opt;
450 struct in6_addr prefix_buf, *prefix;
451 unsigned int pref;
452 u32 lifetime;
453 struct rt6_info *rt;
454
455 if (len < sizeof(struct route_info)) {
456 return -EINVAL;
457 }
458
459 /* Sanity check for prefix_len and length */
460 if (rinfo->length > 3) {
461 return -EINVAL;
462 } else if (rinfo->prefix_len > 128) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 64) {
465 if (rinfo->length < 2) {
466 return -EINVAL;
467 }
468 } else if (rinfo->prefix_len > 0) {
469 if (rinfo->length < 1) {
470 return -EINVAL;
471 }
472 }
473
474 pref = rinfo->route_pref;
475 if (pref == ICMPV6_ROUTER_PREF_INVALID)
476 pref = ICMPV6_ROUTER_PREF_MEDIUM;
477
Al Viroe69a4ad2006-11-14 20:56:00 -0800478 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800479 if (lifetime == 0xffffffff) {
480 /* infinity */
481 } else if (lifetime > 0x7fffffff/HZ) {
482 /* Avoid arithmetic overflow */
483 lifetime = 0x7fffffff/HZ - 1;
484 }
485
486 if (rinfo->length == 3)
487 prefix = (struct in6_addr *)rinfo->prefix;
488 else {
489 /* this function is safe */
490 ipv6_addr_prefix(&prefix_buf,
491 (struct in6_addr *)rinfo->prefix,
492 rinfo->prefix_len);
493 prefix = &prefix_buf;
494 }
495
496 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
497
498 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700499 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
504 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700523#define BACKTRACK(saddr) \
524do { \
525 if (rt == &ip6_null_entry) { \
526 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700527 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700537 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700538 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700539} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700540
541static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
542 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543{
544 struct fib6_node *fn;
545 struct rt6_info *rt;
546
Thomas Grafc71099a2006-08-04 23:20:06 -0700547 read_lock_bh(&table->tb6_lock);
548 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
549restart:
550 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700551 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700552 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700553out:
YOSHIFUJI Hideaki33cc4892006-08-28 13:19:30 -0700554 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700555 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556
557 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700558 rt->u.dst.__use++;
559
560 return rt;
561
562}
563
564struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
565 int oif, int strict)
566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700572 },
573 },
574 };
575 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700577
Thomas Grafadaa70b2006-10-13 15:01:03 -0700578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
Thomas Grafc71099a2006-08-04 23:20:06 -0700583 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 return NULL;
590}
591
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900592EXPORT_SYMBOL(rt6_lookup);
593
Thomas Grafc71099a2006-08-04 23:20:06 -0700594/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
Thomas Graf86872cb2006-08-22 00:01:08 -0700600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
602 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700603 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
Thomas Grafc71099a2006-08-04 23:20:06 -0700605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700607 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700608 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609
610 return err;
611}
612
Thomas Graf40e22e82006-08-22 00:00:45 -0700613int ip6_ins_rt(struct rt6_info *rt)
614{
Thomas Graf86872cb2006-08-22 00:01:08 -0700615 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700616}
617
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800618static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 struct rt6_info *rt;
622
623 /*
624 * Clone the route.
625 */
626
627 rt = ip6_rt_copy(ort);
628
629 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900630 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 if (rt->rt6i_dst.plen != 128 &&
632 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900635 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 rt->rt6i_dst.plen = 128;
639 rt->rt6i_flags |= RTF_CACHE;
640 rt->u.dst.flags |= DST_HOST;
641
642#ifdef CONFIG_IPV6_SUBTREES
643 if (rt->rt6i_src.plen && saddr) {
644 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645 rt->rt6i_src.plen = 128;
646 }
647#endif
648
649 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
650
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800651 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800653 return rt;
654}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800656static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
657{
658 struct rt6_info *rt = ip6_rt_copy(ort);
659 if (rt) {
660 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
661 rt->rt6i_dst.plen = 128;
662 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800663 rt->u.dst.flags |= DST_HOST;
664 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
665 }
666 return rt;
667}
668
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700669static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
670 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671{
672 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800673 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700674 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800676 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800677 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700679 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
681relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700682 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800684restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700685 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
687restart:
David S. Millerf11e6652007-03-24 20:36:25 -0700688 rt = rt6_select(fn, fl->iif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700689 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800690 if (rt == &ip6_null_entry ||
691 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800692 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800694 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700695 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800696
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800697 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800698 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800699 else {
700#if CLONE_OFFLINK_ROUTE
701 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
702#else
703 goto out2;
704#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800706
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800707 dst_release(&rt->u.dst);
708 rt = nrt ? : &ip6_null_entry;
709
710 dst_hold(&rt->u.dst);
711 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700712 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800713 if (!err)
714 goto out2;
715 }
716
717 if (--attempts <= 0)
718 goto out2;
719
720 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700721 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800722 * released someone could insert this route. Relookup.
723 */
724 dst_release(&rt->u.dst);
725 goto relookup;
726
727out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800728 if (reachable) {
729 reachable = 0;
730 goto restart_2;
731 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800732 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700733 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734out2:
735 rt->u.dst.lastuse = jiffies;
736 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700737
738 return rt;
739}
740
741void ip6_route_input(struct sk_buff *skb)
742{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700743 struct ipv6hdr *iph = ipv6_hdr(skb);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700744 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700745 struct flowi fl = {
746 .iif = skb->dev->ifindex,
747 .nl_u = {
748 .ip6_u = {
749 .daddr = iph->daddr,
750 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800751 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700752 },
753 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900754 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700755 .proto = iph->nexthdr,
756 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700757
758 if (rt6_need_strict(&iph->daddr))
759 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700760
761 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
762}
763
764static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
765 struct flowi *fl, int flags)
766{
767 struct fib6_node *fn;
768 struct rt6_info *rt, *nrt;
769 int strict = 0;
770 int attempts = 3;
771 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800772 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700773
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700774 strict |= flags & RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700775
776relookup:
777 read_lock_bh(&table->tb6_lock);
778
779restart_2:
780 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
781
782restart:
David S. Millerf11e6652007-03-24 20:36:25 -0700783 rt = rt6_select(fn, fl->oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700784 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700785 if (rt == &ip6_null_entry ||
786 rt->rt6i_flags & RTF_CACHE)
787 goto out;
788
789 dst_hold(&rt->u.dst);
790 read_unlock_bh(&table->tb6_lock);
791
792 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
793 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
794 else {
795#if CLONE_OFFLINK_ROUTE
796 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
797#else
798 goto out2;
799#endif
800 }
801
802 dst_release(&rt->u.dst);
803 rt = nrt ? : &ip6_null_entry;
804
805 dst_hold(&rt->u.dst);
806 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700807 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700808 if (!err)
809 goto out2;
810 }
811
812 if (--attempts <= 0)
813 goto out2;
814
815 /*
816 * Race condition! In the gap, when table->tb6_lock was
817 * released someone could insert this route. Relookup.
818 */
819 dst_release(&rt->u.dst);
820 goto relookup;
821
822out:
823 if (reachable) {
824 reachable = 0;
825 goto restart_2;
826 }
827 dst_hold(&rt->u.dst);
828 read_unlock_bh(&table->tb6_lock);
829out2:
830 rt->u.dst.lastuse = jiffies;
831 rt->u.dst.__use++;
832 return rt;
833}
834
835struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
836{
837 int flags = 0;
838
839 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700840 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700841
Thomas Grafadaa70b2006-10-13 15:01:03 -0700842 if (!ipv6_addr_any(&fl->fl6_src))
843 flags |= RT6_LOOKUP_F_HAS_SADDR;
844
Thomas Grafc71099a2006-08-04 23:20:06 -0700845 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846}
847
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900848EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849
David S. Miller14e50e52007-05-24 18:17:54 -0700850static int ip6_blackhole_output(struct sk_buff *skb)
851{
852 kfree_skb(skb);
853 return 0;
854}
855
856int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
857{
858 struct rt6_info *ort = (struct rt6_info *) *dstp;
859 struct rt6_info *rt = (struct rt6_info *)
860 dst_alloc(&ip6_dst_blackhole_ops);
861 struct dst_entry *new = NULL;
862
863 if (rt) {
864 new = &rt->u.dst;
865
866 atomic_set(&new->__refcnt, 1);
867 new->__use = 1;
868 new->input = ip6_blackhole_output;
869 new->output = ip6_blackhole_output;
870
871 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
872 new->dev = ort->u.dst.dev;
873 if (new->dev)
874 dev_hold(new->dev);
875 rt->rt6i_idev = ort->rt6i_idev;
876 if (rt->rt6i_idev)
877 in6_dev_hold(rt->rt6i_idev);
878 rt->rt6i_expires = 0;
879
880 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
881 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
882 rt->rt6i_metric = 0;
883
884 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
885#ifdef CONFIG_IPV6_SUBTREES
886 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
887#endif
888
889 dst_free(new);
890 }
891
892 dst_release(*dstp);
893 *dstp = new;
894 return (new ? 0 : -ENOMEM);
895}
896EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
897
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898/*
899 * Destination cache support functions
900 */
901
902static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
903{
904 struct rt6_info *rt;
905
906 rt = (struct rt6_info *) dst;
907
908 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
909 return dst;
910
911 return NULL;
912}
913
914static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
915{
916 struct rt6_info *rt = (struct rt6_info *) dst;
917
918 if (rt) {
919 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700920 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 else
922 dst_release(dst);
923 }
924 return NULL;
925}
926
927static void ip6_link_failure(struct sk_buff *skb)
928{
929 struct rt6_info *rt;
930
931 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
932
933 rt = (struct rt6_info *) skb->dst;
934 if (rt) {
935 if (rt->rt6i_flags&RTF_CACHE) {
936 dst_set_expires(&rt->u.dst, 0);
937 rt->rt6i_flags |= RTF_EXPIRES;
938 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
939 rt->rt6i_node->fn_sernum = -1;
940 }
941}
942
943static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
944{
945 struct rt6_info *rt6 = (struct rt6_info*)dst;
946
947 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
948 rt6->rt6i_flags |= RTF_MODIFIED;
949 if (mtu < IPV6_MIN_MTU) {
950 mtu = IPV6_MIN_MTU;
951 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
952 }
953 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700954 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 }
956}
957
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958static int ipv6_get_mtu(struct net_device *dev);
959
960static inline unsigned int ipv6_advmss(unsigned int mtu)
961{
962 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
963
964 if (mtu < ip6_rt_min_advmss)
965 mtu = ip6_rt_min_advmss;
966
967 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900968 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
969 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
970 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 * rely only on pmtu discovery"
972 */
973 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
974 mtu = IPV6_MAXPLEN;
975 return mtu;
976}
977
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700978static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700979static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700980
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900981struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 struct neighbour *neigh,
983 struct in6_addr *addr,
984 int (*output)(struct sk_buff *))
985{
986 struct rt6_info *rt;
987 struct inet6_dev *idev = in6_dev_get(dev);
988
989 if (unlikely(idev == NULL))
990 return NULL;
991
992 rt = ip6_dst_alloc();
993 if (unlikely(rt == NULL)) {
994 in6_dev_put(idev);
995 goto out;
996 }
997
998 dev_hold(dev);
999 if (neigh)
1000 neigh_hold(neigh);
1001 else
1002 neigh = ndisc_get_neigh(dev, addr);
1003
1004 rt->rt6i_dev = dev;
1005 rt->rt6i_idev = idev;
1006 rt->rt6i_nexthop = neigh;
1007 atomic_set(&rt->u.dst.__refcnt, 1);
1008 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1009 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1010 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1011 rt->u.dst.output = output;
1012
1013#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001014 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1015 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 : 0;
1017 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1018 rt->rt6i_dst.plen = 128;
1019#endif
1020
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001021 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 rt->u.dst.next = ndisc_dst_gc_list;
1023 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001024 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025
1026 fib6_force_start_gc();
1027
1028out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001029 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030}
1031
1032int ndisc_dst_gc(int *more)
1033{
1034 struct dst_entry *dst, *next, **pprev;
1035 int freed;
1036
1037 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001038 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001039
1040 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001042
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 while ((dst = *pprev) != NULL) {
1044 if (!atomic_read(&dst->__refcnt)) {
1045 *pprev = dst->next;
1046 dst_free(dst);
1047 freed++;
1048 } else {
1049 pprev = &dst->next;
1050 (*more)++;
1051 }
1052 }
1053
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001054 spin_unlock_bh(&ndisc_lock);
1055
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 return freed;
1057}
1058
1059static int ip6_dst_gc(void)
1060{
1061 static unsigned expire = 30*HZ;
1062 static unsigned long last_gc;
1063 unsigned long now = jiffies;
1064
1065 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1066 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1067 goto out;
1068
1069 expire++;
1070 fib6_run_gc(expire);
1071 last_gc = now;
1072 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1073 expire = ip6_rt_gc_timeout>>1;
1074
1075out:
1076 expire -= expire>>ip6_rt_gc_elasticity;
1077 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1078}
1079
1080/* Clean host part of a prefix. Not necessary in radix tree,
1081 but results in cleaner routing tables.
1082
1083 Remove it only when all the things will work!
1084 */
1085
1086static int ipv6_get_mtu(struct net_device *dev)
1087{
1088 int mtu = IPV6_MIN_MTU;
1089 struct inet6_dev *idev;
1090
1091 idev = in6_dev_get(dev);
1092 if (idev) {
1093 mtu = idev->cnf.mtu6;
1094 in6_dev_put(idev);
1095 }
1096 return mtu;
1097}
1098
1099int ipv6_get_hoplimit(struct net_device *dev)
1100{
1101 int hoplimit = ipv6_devconf.hop_limit;
1102 struct inet6_dev *idev;
1103
1104 idev = in6_dev_get(dev);
1105 if (idev) {
1106 hoplimit = idev->cnf.hop_limit;
1107 in6_dev_put(idev);
1108 }
1109 return hoplimit;
1110}
1111
1112/*
1113 *
1114 */
1115
Thomas Graf86872cb2006-08-22 00:01:08 -07001116int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117{
1118 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 struct rt6_info *rt = NULL;
1120 struct net_device *dev = NULL;
1121 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001122 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 int addr_type;
1124
Thomas Graf86872cb2006-08-22 00:01:08 -07001125 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 return -EINVAL;
1127#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001128 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 return -EINVAL;
1130#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001131 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 err = -ENODEV;
Thomas Graf86872cb2006-08-22 00:01:08 -07001133 dev = dev_get_by_index(cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134 if (!dev)
1135 goto out;
1136 idev = in6_dev_get(dev);
1137 if (!idev)
1138 goto out;
1139 }
1140
Thomas Graf86872cb2006-08-22 00:01:08 -07001141 if (cfg->fc_metric == 0)
1142 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143
Thomas Graf86872cb2006-08-22 00:01:08 -07001144 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001145 if (table == NULL) {
1146 err = -ENOBUFS;
1147 goto out;
1148 }
1149
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 rt = ip6_dst_alloc();
1151
1152 if (rt == NULL) {
1153 err = -ENOMEM;
1154 goto out;
1155 }
1156
1157 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001158 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
Thomas Graf86872cb2006-08-22 00:01:08 -07001160 if (cfg->fc_protocol == RTPROT_UNSPEC)
1161 cfg->fc_protocol = RTPROT_BOOT;
1162 rt->rt6i_protocol = cfg->fc_protocol;
1163
1164 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165
1166 if (addr_type & IPV6_ADDR_MULTICAST)
1167 rt->u.dst.input = ip6_mc_input;
1168 else
1169 rt->u.dst.input = ip6_forward;
1170
1171 rt->u.dst.output = ip6_output;
1172
Thomas Graf86872cb2006-08-22 00:01:08 -07001173 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1174 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 if (rt->rt6i_dst.plen == 128)
1176 rt->u.dst.flags = DST_HOST;
1177
1178#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001179 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1180 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181#endif
1182
Thomas Graf86872cb2006-08-22 00:01:08 -07001183 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184
1185 /* We cannot add true routes via loopback here,
1186 they would result in kernel looping; promote them to reject routes
1187 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001188 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1190 /* hold loopback dev/idev if we haven't done so. */
1191 if (dev != &loopback_dev) {
1192 if (dev) {
1193 dev_put(dev);
1194 in6_dev_put(idev);
1195 }
1196 dev = &loopback_dev;
1197 dev_hold(dev);
1198 idev = in6_dev_get(dev);
1199 if (!idev) {
1200 err = -ENODEV;
1201 goto out;
1202 }
1203 }
1204 rt->u.dst.output = ip6_pkt_discard_out;
1205 rt->u.dst.input = ip6_pkt_discard;
1206 rt->u.dst.error = -ENETUNREACH;
1207 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1208 goto install_route;
1209 }
1210
Thomas Graf86872cb2006-08-22 00:01:08 -07001211 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 struct in6_addr *gw_addr;
1213 int gwa_type;
1214
Thomas Graf86872cb2006-08-22 00:01:08 -07001215 gw_addr = &cfg->fc_gateway;
1216 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 gwa_type = ipv6_addr_type(gw_addr);
1218
1219 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1220 struct rt6_info *grt;
1221
1222 /* IPv6 strictly inhibits using not link-local
1223 addresses as nexthop address.
1224 Otherwise, router will not able to send redirects.
1225 It is very good, but in some (rare!) circumstances
1226 (SIT, PtP, NBMA NOARP links) it is handy to allow
1227 some exceptions. --ANK
1228 */
1229 err = -EINVAL;
1230 if (!(gwa_type&IPV6_ADDR_UNICAST))
1231 goto out;
1232
Thomas Graf86872cb2006-08-22 00:01:08 -07001233 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
1235 err = -EHOSTUNREACH;
1236 if (grt == NULL)
1237 goto out;
1238 if (dev) {
1239 if (dev != grt->rt6i_dev) {
1240 dst_release(&grt->u.dst);
1241 goto out;
1242 }
1243 } else {
1244 dev = grt->rt6i_dev;
1245 idev = grt->rt6i_idev;
1246 dev_hold(dev);
1247 in6_dev_hold(grt->rt6i_idev);
1248 }
1249 if (!(grt->rt6i_flags&RTF_GATEWAY))
1250 err = 0;
1251 dst_release(&grt->u.dst);
1252
1253 if (err)
1254 goto out;
1255 }
1256 err = -EINVAL;
1257 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1258 goto out;
1259 }
1260
1261 err = -ENODEV;
1262 if (dev == NULL)
1263 goto out;
1264
Thomas Graf86872cb2006-08-22 00:01:08 -07001265 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1267 if (IS_ERR(rt->rt6i_nexthop)) {
1268 err = PTR_ERR(rt->rt6i_nexthop);
1269 rt->rt6i_nexthop = NULL;
1270 goto out;
1271 }
1272 }
1273
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275
1276install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001277 if (cfg->fc_mx) {
1278 struct nlattr *nla;
1279 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280
Thomas Graf86872cb2006-08-22 00:01:08 -07001281 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1282 int type = nla->nla_type;
1283
1284 if (type) {
1285 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 err = -EINVAL;
1287 goto out;
1288 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001289
1290 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 }
1293 }
1294
1295 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1296 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1297 if (!rt->u.dst.metrics[RTAX_MTU-1])
1298 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1299 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1300 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1301 rt->u.dst.dev = dev;
1302 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001303 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001304 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305
1306out:
1307 if (dev)
1308 dev_put(dev);
1309 if (idev)
1310 in6_dev_put(idev);
1311 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001312 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 return err;
1314}
1315
Thomas Graf86872cb2006-08-22 00:01:08 -07001316static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317{
1318 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001319 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320
Patrick McHardy6c813a72006-08-06 22:22:47 -07001321 if (rt == &ip6_null_entry)
1322 return -ENOENT;
1323
Thomas Grafc71099a2006-08-04 23:20:06 -07001324 table = rt->rt6i_table;
1325 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
Thomas Graf86872cb2006-08-22 00:01:08 -07001327 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 dst_release(&rt->u.dst);
1329
Thomas Grafc71099a2006-08-04 23:20:06 -07001330 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
1332 return err;
1333}
1334
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001335int ip6_del_rt(struct rt6_info *rt)
1336{
Thomas Graf86872cb2006-08-22 00:01:08 -07001337 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001338}
1339
Thomas Graf86872cb2006-08-22 00:01:08 -07001340static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341{
Thomas Grafc71099a2006-08-04 23:20:06 -07001342 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 struct fib6_node *fn;
1344 struct rt6_info *rt;
1345 int err = -ESRCH;
1346
Thomas Graf86872cb2006-08-22 00:01:08 -07001347 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001348 if (table == NULL)
1349 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350
Thomas Grafc71099a2006-08-04 23:20:06 -07001351 read_lock_bh(&table->tb6_lock);
1352
1353 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001354 &cfg->fc_dst, cfg->fc_dst_len,
1355 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001356
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001358 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001359 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001361 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001363 if (cfg->fc_flags & RTF_GATEWAY &&
1364 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001366 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 continue;
1368 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001369 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370
Thomas Graf86872cb2006-08-22 00:01:08 -07001371 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 }
1373 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001374 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
1376 return err;
1377}
1378
1379/*
1380 * Handle redirects
1381 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001382struct ip6rd_flowi {
1383 struct flowi fl;
1384 struct in6_addr gateway;
1385};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001387static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1388 struct flowi *fl,
1389 int flags)
1390{
1391 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1392 struct rt6_info *rt;
1393 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001394
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001396 * Get the "current" route for this destination and
1397 * check if the redirect has come from approriate router.
1398 *
1399 * RFC 2461 specifies that redirects should only be
1400 * accepted if they come from the nexthop to the target.
1401 * Due to the way the routes are chosen, this notion
1402 * is a bit fuzzy and one might need to check all possible
1403 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405
Thomas Grafc71099a2006-08-04 23:20:06 -07001406 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001407 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001408restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001409 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001410 /*
1411 * Current route is on-link; redirect is always invalid.
1412 *
1413 * Seems, previous statement is not true. It could
1414 * be node, which looks for us as on-link (f.e. proxy ndisc)
1415 * But then router serving it might decide, that we should
1416 * know truth 8)8) --ANK (980726).
1417 */
1418 if (rt6_check_expired(rt))
1419 continue;
1420 if (!(rt->rt6i_flags & RTF_GATEWAY))
1421 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001422 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001423 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001424 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001425 continue;
1426 break;
1427 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001428
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001429 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001430 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001431 BACKTRACK(&fl->fl6_src);
1432out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001433 dst_hold(&rt->u.dst);
1434
1435 read_unlock_bh(&table->tb6_lock);
1436
1437 return rt;
1438};
1439
1440static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1441 struct in6_addr *src,
1442 struct in6_addr *gateway,
1443 struct net_device *dev)
1444{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001445 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001446 struct ip6rd_flowi rdfl = {
1447 .fl = {
1448 .oif = dev->ifindex,
1449 .nl_u = {
1450 .ip6_u = {
1451 .daddr = *dest,
1452 .saddr = *src,
1453 },
1454 },
1455 },
1456 .gateway = *gateway,
1457 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001458
1459 if (rt6_need_strict(dest))
1460 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001461
1462 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1463}
1464
1465void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1466 struct in6_addr *saddr,
1467 struct neighbour *neigh, u8 *lladdr, int on_link)
1468{
1469 struct rt6_info *rt, *nrt = NULL;
1470 struct netevent_redirect netevent;
1471
1472 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1473
1474 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 if (net_ratelimit())
1476 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1477 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001478 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 }
1480
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 /*
1482 * We have finally decided to accept it.
1483 */
1484
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001485 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1487 NEIGH_UPDATE_F_OVERRIDE|
1488 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1489 NEIGH_UPDATE_F_ISROUTER))
1490 );
1491
1492 /*
1493 * Redirect received -> path was valid.
1494 * Look, redirects are sent only in response to data packets,
1495 * so that this nexthop apparently is reachable. --ANK
1496 */
1497 dst_confirm(&rt->u.dst);
1498
1499 /* Duplicate redirect: silently ignore. */
1500 if (neigh == rt->u.dst.neighbour)
1501 goto out;
1502
1503 nrt = ip6_rt_copy(rt);
1504 if (nrt == NULL)
1505 goto out;
1506
1507 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1508 if (on_link)
1509 nrt->rt6i_flags &= ~RTF_GATEWAY;
1510
1511 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1512 nrt->rt6i_dst.plen = 128;
1513 nrt->u.dst.flags |= DST_HOST;
1514
1515 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1516 nrt->rt6i_nexthop = neigh_clone(neigh);
1517 /* Reset pmtu, it may be better */
1518 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1519 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1520
Thomas Graf40e22e82006-08-22 00:00:45 -07001521 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 goto out;
1523
Tom Tucker8d717402006-07-30 20:43:36 -07001524 netevent.old = &rt->u.dst;
1525 netevent.new = &nrt->u.dst;
1526 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1527
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001529 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530 return;
1531 }
1532
1533out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001534 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 return;
1536}
1537
1538/*
1539 * Handle ICMP "packet too big" messages
1540 * i.e. Path MTU discovery
1541 */
1542
1543void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1544 struct net_device *dev, u32 pmtu)
1545{
1546 struct rt6_info *rt, *nrt;
1547 int allfrag = 0;
1548
1549 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1550 if (rt == NULL)
1551 return;
1552
1553 if (pmtu >= dst_mtu(&rt->u.dst))
1554 goto out;
1555
1556 if (pmtu < IPV6_MIN_MTU) {
1557 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001558 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 * MTU (1280) and a fragment header should always be included
1560 * after a node receiving Too Big message reporting PMTU is
1561 * less than the IPv6 Minimum Link MTU.
1562 */
1563 pmtu = IPV6_MIN_MTU;
1564 allfrag = 1;
1565 }
1566
1567 /* New mtu received -> path was valid.
1568 They are sent only in response to data packets,
1569 so that this nexthop apparently is reachable. --ANK
1570 */
1571 dst_confirm(&rt->u.dst);
1572
1573 /* Host route. If it is static, it would be better
1574 not to override it, but add new one, so that
1575 when cache entry will expire old pmtu
1576 would return automatically.
1577 */
1578 if (rt->rt6i_flags & RTF_CACHE) {
1579 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1580 if (allfrag)
1581 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1582 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1583 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1584 goto out;
1585 }
1586
1587 /* Network route.
1588 Two cases are possible:
1589 1. It is connected route. Action: COW
1590 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1591 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001592 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001593 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001594 else
1595 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001596
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001597 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001598 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1599 if (allfrag)
1600 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1601
1602 /* According to RFC 1981, detecting PMTU increase shouldn't be
1603 * happened within 5 mins, the recommended timer is 10 mins.
1604 * Here this route expiration time is set to ip6_rt_mtu_expires
1605 * which is 10 mins. After 10 mins the decreased pmtu is expired
1606 * and detecting PMTU increase will be automatically happened.
1607 */
1608 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1609 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1610
Thomas Graf40e22e82006-08-22 00:00:45 -07001611 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613out:
1614 dst_release(&rt->u.dst);
1615}
1616
1617/*
1618 * Misc support functions
1619 */
1620
1621static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1622{
1623 struct rt6_info *rt = ip6_dst_alloc();
1624
1625 if (rt) {
1626 rt->u.dst.input = ort->u.dst.input;
1627 rt->u.dst.output = ort->u.dst.output;
1628
1629 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001630 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 rt->u.dst.dev = ort->u.dst.dev;
1632 if (rt->u.dst.dev)
1633 dev_hold(rt->u.dst.dev);
1634 rt->rt6i_idev = ort->rt6i_idev;
1635 if (rt->rt6i_idev)
1636 in6_dev_hold(rt->rt6i_idev);
1637 rt->u.dst.lastuse = jiffies;
1638 rt->rt6i_expires = 0;
1639
1640 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1641 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1642 rt->rt6i_metric = 0;
1643
1644 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1645#ifdef CONFIG_IPV6_SUBTREES
1646 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1647#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001648 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 }
1650 return rt;
1651}
1652
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001653#ifdef CONFIG_IPV6_ROUTE_INFO
1654static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1655 struct in6_addr *gwaddr, int ifindex)
1656{
1657 struct fib6_node *fn;
1658 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001659 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001660
Thomas Grafc71099a2006-08-04 23:20:06 -07001661 table = fib6_get_table(RT6_TABLE_INFO);
1662 if (table == NULL)
1663 return NULL;
1664
1665 write_lock_bh(&table->tb6_lock);
1666 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001667 if (!fn)
1668 goto out;
1669
Eric Dumazet7cc48262007-02-09 16:22:57 -08001670 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001671 if (rt->rt6i_dev->ifindex != ifindex)
1672 continue;
1673 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1674 continue;
1675 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1676 continue;
1677 dst_hold(&rt->u.dst);
1678 break;
1679 }
1680out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001681 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001682 return rt;
1683}
1684
1685static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1686 struct in6_addr *gwaddr, int ifindex,
1687 unsigned pref)
1688{
Thomas Graf86872cb2006-08-22 00:01:08 -07001689 struct fib6_config cfg = {
1690 .fc_table = RT6_TABLE_INFO,
1691 .fc_metric = 1024,
1692 .fc_ifindex = ifindex,
1693 .fc_dst_len = prefixlen,
1694 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1695 RTF_UP | RTF_PREF(pref),
1696 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001697
Thomas Graf86872cb2006-08-22 00:01:08 -07001698 ipv6_addr_copy(&cfg.fc_dst, prefix);
1699 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1700
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001701 /* We should treat it as a default route if prefix length is 0. */
1702 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001703 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001704
Thomas Graf86872cb2006-08-22 00:01:08 -07001705 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001706
1707 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1708}
1709#endif
1710
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001712{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001714 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715
Thomas Grafc71099a2006-08-04 23:20:06 -07001716 table = fib6_get_table(RT6_TABLE_DFLT);
1717 if (table == NULL)
1718 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719
Thomas Grafc71099a2006-08-04 23:20:06 -07001720 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001721 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001723 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1725 break;
1726 }
1727 if (rt)
1728 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001729 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 return rt;
1731}
1732
1733struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001734 struct net_device *dev,
1735 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736{
Thomas Graf86872cb2006-08-22 00:01:08 -07001737 struct fib6_config cfg = {
1738 .fc_table = RT6_TABLE_DFLT,
1739 .fc_metric = 1024,
1740 .fc_ifindex = dev->ifindex,
1741 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1742 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1743 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744
Thomas Graf86872cb2006-08-22 00:01:08 -07001745 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746
Thomas Graf86872cb2006-08-22 00:01:08 -07001747 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 return rt6_get_dflt_router(gwaddr, dev);
1750}
1751
1752void rt6_purge_dflt_routers(void)
1753{
1754 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001755 struct fib6_table *table;
1756
1757 /* NOTE: Keep consistent with rt6_get_dflt_router */
1758 table = fib6_get_table(RT6_TABLE_DFLT);
1759 if (table == NULL)
1760 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761
1762restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001763 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001764 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1766 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001767 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001768 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769 goto restart;
1770 }
1771 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001772 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773}
1774
Thomas Graf86872cb2006-08-22 00:01:08 -07001775static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1776 struct fib6_config *cfg)
1777{
1778 memset(cfg, 0, sizeof(*cfg));
1779
1780 cfg->fc_table = RT6_TABLE_MAIN;
1781 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1782 cfg->fc_metric = rtmsg->rtmsg_metric;
1783 cfg->fc_expires = rtmsg->rtmsg_info;
1784 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1785 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1786 cfg->fc_flags = rtmsg->rtmsg_flags;
1787
1788 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1789 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1790 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1791}
1792
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1794{
Thomas Graf86872cb2006-08-22 00:01:08 -07001795 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796 struct in6_rtmsg rtmsg;
1797 int err;
1798
1799 switch(cmd) {
1800 case SIOCADDRT: /* Add a route */
1801 case SIOCDELRT: /* Delete a route */
1802 if (!capable(CAP_NET_ADMIN))
1803 return -EPERM;
1804 err = copy_from_user(&rtmsg, arg,
1805 sizeof(struct in6_rtmsg));
1806 if (err)
1807 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001808
1809 rtmsg_to_fib6_config(&rtmsg, &cfg);
1810
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811 rtnl_lock();
1812 switch (cmd) {
1813 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001814 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 break;
1816 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001817 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818 break;
1819 default:
1820 err = -EINVAL;
1821 }
1822 rtnl_unlock();
1823
1824 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001825 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826
1827 return -EINVAL;
1828}
1829
1830/*
1831 * Drop the packet on the floor
1832 */
1833
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001834static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1835 int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001837 int type;
1838 switch (ipstats_mib_noroutes) {
1839 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001840 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001841 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1842 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1843 break;
1844 }
1845 /* FALLTHROUGH */
1846 case IPSTATS_MIB_OUTNOROUTES:
1847 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1848 break;
1849 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001850 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 kfree_skb(skb);
1852 return 0;
1853}
1854
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001855static int ip6_pkt_discard(struct sk_buff *skb)
1856{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001857 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001858}
1859
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001860static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861{
1862 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001863 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864}
1865
David S. Miller6723ab52006-10-18 21:20:57 -07001866#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1867
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001868static int ip6_pkt_prohibit(struct sk_buff *skb)
1869{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001870 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001871}
1872
1873static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1874{
1875 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001876 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001877}
1878
1879static int ip6_pkt_blk_hole(struct sk_buff *skb)
1880{
1881 kfree_skb(skb);
1882 return 0;
1883}
1884
David S. Miller6723ab52006-10-18 21:20:57 -07001885#endif
1886
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887/*
1888 * Allocate a dst for local (unicast / anycast) address.
1889 */
1890
1891struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1892 const struct in6_addr *addr,
1893 int anycast)
1894{
1895 struct rt6_info *rt = ip6_dst_alloc();
1896
1897 if (rt == NULL)
1898 return ERR_PTR(-ENOMEM);
1899
1900 dev_hold(&loopback_dev);
1901 in6_dev_hold(idev);
1902
1903 rt->u.dst.flags = DST_HOST;
1904 rt->u.dst.input = ip6_input;
1905 rt->u.dst.output = ip6_output;
1906 rt->rt6i_dev = &loopback_dev;
1907 rt->rt6i_idev = idev;
1908 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1909 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1910 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1911 rt->u.dst.obsolete = -1;
1912
1913 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001914 if (anycast)
1915 rt->rt6i_flags |= RTF_ANYCAST;
1916 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 rt->rt6i_flags |= RTF_LOCAL;
1918 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1919 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001920 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921 return ERR_PTR(-ENOMEM);
1922 }
1923
1924 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1925 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001926 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927
1928 atomic_set(&rt->u.dst.__refcnt, 1);
1929
1930 return rt;
1931}
1932
1933static int fib6_ifdown(struct rt6_info *rt, void *arg)
1934{
1935 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1936 rt != &ip6_null_entry) {
1937 RT6_TRACE("deleted by ifdown %p\n", rt);
1938 return -1;
1939 }
1940 return 0;
1941}
1942
1943void rt6_ifdown(struct net_device *dev)
1944{
Thomas Grafc71099a2006-08-04 23:20:06 -07001945 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946}
1947
1948struct rt6_mtu_change_arg
1949{
1950 struct net_device *dev;
1951 unsigned mtu;
1952};
1953
1954static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1955{
1956 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1957 struct inet6_dev *idev;
1958
1959 /* In IPv6 pmtu discovery is not optional,
1960 so that RTAX_MTU lock cannot disable it.
1961 We still use this lock to block changes
1962 caused by addrconf/ndisc.
1963 */
1964
1965 idev = __in6_dev_get(arg->dev);
1966 if (idev == NULL)
1967 return 0;
1968
1969 /* For administrative MTU increase, there is no way to discover
1970 IPv6 PMTU increase, so PMTU increase should be updated here.
1971 Since RFC 1981 doesn't include administrative MTU increase
1972 update PMTU increase is a MUST. (i.e. jumbo frame)
1973 */
1974 /*
1975 If new MTU is less than route PMTU, this new MTU will be the
1976 lowest MTU in the path, update the route PMTU to reflect PMTU
1977 decreases; if new MTU is greater than route PMTU, and the
1978 old MTU is the lowest MTU in the path, update the route PMTU
1979 to reflect the increase. In this case if the other nodes' MTU
1980 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1981 PMTU discouvery.
1982 */
1983 if (rt->rt6i_dev == arg->dev &&
1984 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001985 (dst_mtu(&rt->u.dst) > arg->mtu ||
1986 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001987 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Simon Arlott566cfd82007-07-26 00:09:55 -07001989 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1990 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 return 0;
1992}
1993
1994void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1995{
Thomas Grafc71099a2006-08-04 23:20:06 -07001996 struct rt6_mtu_change_arg arg = {
1997 .dev = dev,
1998 .mtu = mtu,
1999 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000
Thomas Grafc71099a2006-08-04 23:20:06 -07002001 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002}
2003
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002004static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002005 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002006 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002007 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002008 [RTA_PRIORITY] = { .type = NLA_U32 },
2009 [RTA_METRICS] = { .type = NLA_NESTED },
2010};
2011
2012static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2013 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014{
Thomas Graf86872cb2006-08-22 00:01:08 -07002015 struct rtmsg *rtm;
2016 struct nlattr *tb[RTA_MAX+1];
2017 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018
Thomas Graf86872cb2006-08-22 00:01:08 -07002019 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2020 if (err < 0)
2021 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022
Thomas Graf86872cb2006-08-22 00:01:08 -07002023 err = -EINVAL;
2024 rtm = nlmsg_data(nlh);
2025 memset(cfg, 0, sizeof(*cfg));
2026
2027 cfg->fc_table = rtm->rtm_table;
2028 cfg->fc_dst_len = rtm->rtm_dst_len;
2029 cfg->fc_src_len = rtm->rtm_src_len;
2030 cfg->fc_flags = RTF_UP;
2031 cfg->fc_protocol = rtm->rtm_protocol;
2032
2033 if (rtm->rtm_type == RTN_UNREACHABLE)
2034 cfg->fc_flags |= RTF_REJECT;
2035
2036 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2037 cfg->fc_nlinfo.nlh = nlh;
2038
2039 if (tb[RTA_GATEWAY]) {
2040 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2041 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002043
2044 if (tb[RTA_DST]) {
2045 int plen = (rtm->rtm_dst_len + 7) >> 3;
2046
2047 if (nla_len(tb[RTA_DST]) < plen)
2048 goto errout;
2049
2050 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002052
2053 if (tb[RTA_SRC]) {
2054 int plen = (rtm->rtm_src_len + 7) >> 3;
2055
2056 if (nla_len(tb[RTA_SRC]) < plen)
2057 goto errout;
2058
2059 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002061
2062 if (tb[RTA_OIF])
2063 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2064
2065 if (tb[RTA_PRIORITY])
2066 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2067
2068 if (tb[RTA_METRICS]) {
2069 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2070 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002072
2073 if (tb[RTA_TABLE])
2074 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2075
2076 err = 0;
2077errout:
2078 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079}
2080
Thomas Grafc127ea22007-03-22 11:58:32 -07002081static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082{
Thomas Graf86872cb2006-08-22 00:01:08 -07002083 struct fib6_config cfg;
2084 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085
Thomas Graf86872cb2006-08-22 00:01:08 -07002086 err = rtm_to_fib6_config(skb, nlh, &cfg);
2087 if (err < 0)
2088 return err;
2089
2090 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091}
2092
Thomas Grafc127ea22007-03-22 11:58:32 -07002093static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094{
Thomas Graf86872cb2006-08-22 00:01:08 -07002095 struct fib6_config cfg;
2096 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097
Thomas Graf86872cb2006-08-22 00:01:08 -07002098 err = rtm_to_fib6_config(skb, nlh, &cfg);
2099 if (err < 0)
2100 return err;
2101
2102 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103}
2104
Thomas Graf339bf982006-11-10 14:10:15 -08002105static inline size_t rt6_nlmsg_size(void)
2106{
2107 return NLMSG_ALIGN(sizeof(struct rtmsg))
2108 + nla_total_size(16) /* RTA_SRC */
2109 + nla_total_size(16) /* RTA_DST */
2110 + nla_total_size(16) /* RTA_GATEWAY */
2111 + nla_total_size(16) /* RTA_PREFSRC */
2112 + nla_total_size(4) /* RTA_TABLE */
2113 + nla_total_size(4) /* RTA_IIF */
2114 + nla_total_size(4) /* RTA_OIF */
2115 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002116 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002117 + nla_total_size(sizeof(struct rta_cacheinfo));
2118}
2119
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002121 struct in6_addr *dst, struct in6_addr *src,
2122 int iif, int type, u32 pid, u32 seq,
2123 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124{
2125 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002126 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002127 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002128 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129
2130 if (prefix) { /* user wants prefix routes only */
2131 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2132 /* success since this is not a prefix route */
2133 return 1;
2134 }
2135 }
2136
Thomas Graf2d7202b2006-08-22 00:01:27 -07002137 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2138 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002139 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002140
2141 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142 rtm->rtm_family = AF_INET6;
2143 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2144 rtm->rtm_src_len = rt->rt6i_src.plen;
2145 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002146 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002147 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002148 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002149 table = RT6_TABLE_UNSPEC;
2150 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002151 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152 if (rt->rt6i_flags&RTF_REJECT)
2153 rtm->rtm_type = RTN_UNREACHABLE;
2154 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2155 rtm->rtm_type = RTN_LOCAL;
2156 else
2157 rtm->rtm_type = RTN_UNICAST;
2158 rtm->rtm_flags = 0;
2159 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2160 rtm->rtm_protocol = rt->rt6i_protocol;
2161 if (rt->rt6i_flags&RTF_DYNAMIC)
2162 rtm->rtm_protocol = RTPROT_REDIRECT;
2163 else if (rt->rt6i_flags & RTF_ADDRCONF)
2164 rtm->rtm_protocol = RTPROT_KERNEL;
2165 else if (rt->rt6i_flags&RTF_DEFAULT)
2166 rtm->rtm_protocol = RTPROT_RA;
2167
2168 if (rt->rt6i_flags&RTF_CACHE)
2169 rtm->rtm_flags |= RTM_F_CLONED;
2170
2171 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002172 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002173 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002175 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176#ifdef CONFIG_IPV6_SUBTREES
2177 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002178 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002179 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002181 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182#endif
2183 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002184 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002185 else if (dst) {
2186 struct in6_addr saddr_buf;
2187 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002188 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002190
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002192 goto nla_put_failure;
2193
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002195 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2196
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002198 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2199
2200 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002201
2202 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2203 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2204 expires, rt->u.dst.error) < 0)
2205 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206
Thomas Graf2d7202b2006-08-22 00:01:27 -07002207 return nlmsg_end(skb, nlh);
2208
2209nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002210 nlmsg_cancel(skb, nlh);
2211 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212}
2213
Patrick McHardy1b43af52006-08-10 23:11:17 -07002214int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215{
2216 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2217 int prefix;
2218
Thomas Graf2d7202b2006-08-22 00:01:27 -07002219 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2220 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2222 } else
2223 prefix = 0;
2224
2225 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2226 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002227 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228}
2229
Thomas Grafc127ea22007-03-22 11:58:32 -07002230static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231{
Thomas Grafab364a62006-08-22 00:01:47 -07002232 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002234 struct sk_buff *skb;
2235 struct rtmsg *rtm;
2236 struct flowi fl;
2237 int err, iif = 0;
2238
2239 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2240 if (err < 0)
2241 goto errout;
2242
2243 err = -EINVAL;
2244 memset(&fl, 0, sizeof(fl));
2245
2246 if (tb[RTA_SRC]) {
2247 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2248 goto errout;
2249
2250 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2251 }
2252
2253 if (tb[RTA_DST]) {
2254 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2255 goto errout;
2256
2257 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2258 }
2259
2260 if (tb[RTA_IIF])
2261 iif = nla_get_u32(tb[RTA_IIF]);
2262
2263 if (tb[RTA_OIF])
2264 fl.oif = nla_get_u32(tb[RTA_OIF]);
2265
2266 if (iif) {
2267 struct net_device *dev;
2268 dev = __dev_get_by_index(iif);
2269 if (!dev) {
2270 err = -ENODEV;
2271 goto errout;
2272 }
2273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274
2275 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002276 if (skb == NULL) {
2277 err = -ENOBUFS;
2278 goto errout;
2279 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280
2281 /* Reserve room for dummy headers, this skb can pass
2282 through good chunk of routing engine.
2283 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002284 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2286
Thomas Grafab364a62006-08-22 00:01:47 -07002287 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288 skb->dst = &rt->u.dst;
2289
Thomas Grafab364a62006-08-22 00:01:47 -07002290 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002292 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002294 kfree_skb(skb);
2295 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 }
2297
Thomas Graf2942e902006-08-15 00:30:25 -07002298 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002299errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301}
2302
Thomas Graf86872cb2006-08-22 00:01:08 -07002303void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304{
2305 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002306 u32 pid = 0, seq = 0;
2307 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002308 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309
Thomas Graf86872cb2006-08-22 00:01:08 -07002310 if (info) {
2311 pid = info->pid;
2312 nlh = info->nlh;
2313 if (nlh)
2314 seq = nlh->nlmsg_seq;
2315 }
2316
Thomas Graf339bf982006-11-10 14:10:15 -08002317 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002318 if (skb == NULL)
2319 goto errout;
2320
2321 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002322 if (err < 0) {
2323 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2324 WARN_ON(err == -EMSGSIZE);
2325 kfree_skb(skb);
2326 goto errout;
2327 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002328 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2329errout:
2330 if (err < 0)
2331 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332}
2333
2334/*
2335 * /proc
2336 */
2337
2338#ifdef CONFIG_PROC_FS
2339
2340#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2341
2342struct rt6_proc_arg
2343{
2344 char *buffer;
2345 int offset;
2346 int length;
2347 int skip;
2348 int len;
2349};
2350
2351static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2352{
2353 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354
2355 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2356 arg->skip++;
2357 return 0;
2358 }
2359
2360 if (arg->len >= arg->length)
2361 return 0;
2362
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002363 arg->len += sprintf(arg->buffer + arg->len,
2364 NIP6_SEQFMT " %02x ",
2365 NIP6(rt->rt6i_dst.addr),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 rt->rt6i_dst.plen);
2367
2368#ifdef CONFIG_IPV6_SUBTREES
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002369 arg->len += sprintf(arg->buffer + arg->len,
2370 NIP6_SEQFMT " %02x ",
2371 NIP6(rt->rt6i_src.addr),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372 rt->rt6i_src.plen);
2373#else
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002374 arg->len += sprintf(arg->buffer + arg->len,
2375 "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376#endif
2377
2378 if (rt->rt6i_nexthop) {
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002379 arg->len += sprintf(arg->buffer + arg->len,
2380 NIP6_SEQFMT,
2381 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 } else {
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002383 arg->len += sprintf(arg->buffer + arg->len,
2384 "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 }
2386 arg->len += sprintf(arg->buffer + arg->len,
2387 " %08x %08x %08x %08x %8s\n",
2388 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002389 rt->u.dst.__use, rt->rt6i_flags,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2391 return 0;
2392}
2393
2394static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2395{
Thomas Grafc71099a2006-08-04 23:20:06 -07002396 struct rt6_proc_arg arg = {
2397 .buffer = buffer,
2398 .offset = offset,
2399 .length = length,
2400 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401
Thomas Grafc71099a2006-08-04 23:20:06 -07002402 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403
2404 *start = buffer;
2405 if (offset)
2406 *start += offset % RT6_INFO_LEN;
2407
2408 arg.len -= offset % RT6_INFO_LEN;
2409
2410 if (arg.len > length)
2411 arg.len = length;
2412 if (arg.len < 0)
2413 arg.len = 0;
2414
2415 return arg.len;
2416}
2417
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2419{
2420 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2421 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2422 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2423 rt6_stats.fib_rt_cache,
2424 atomic_read(&ip6_dst_ops.entries),
2425 rt6_stats.fib_discarded_routes);
2426
2427 return 0;
2428}
2429
2430static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2431{
2432 return single_open(file, rt6_stats_seq_show, NULL);
2433}
2434
Arjan van de Ven9a321442007-02-12 00:55:35 -08002435static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436 .owner = THIS_MODULE,
2437 .open = rt6_stats_seq_open,
2438 .read = seq_read,
2439 .llseek = seq_lseek,
2440 .release = single_release,
2441};
2442#endif /* CONFIG_PROC_FS */
2443
2444#ifdef CONFIG_SYSCTL
2445
2446static int flush_delay;
2447
2448static
2449int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2450 void __user *buffer, size_t *lenp, loff_t *ppos)
2451{
2452 if (write) {
2453 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2454 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2455 return 0;
2456 } else
2457 return -EINVAL;
2458}
2459
2460ctl_table ipv6_route_table[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002461 {
2462 .ctl_name = NET_IPV6_ROUTE_FLUSH,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463 .procname = "flush",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002464 .data = &flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002466 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002467 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 },
2469 {
2470 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2471 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002472 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 .maxlen = sizeof(int),
2474 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002475 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 },
2477 {
2478 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2479 .procname = "max_size",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002480 .data = &ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481 .maxlen = sizeof(int),
2482 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002483 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 },
2485 {
2486 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2487 .procname = "gc_min_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002488 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 .maxlen = sizeof(int),
2490 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002491 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 .strategy = &sysctl_jiffies,
2493 },
2494 {
2495 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2496 .procname = "gc_timeout",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002497 .data = &ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 .maxlen = sizeof(int),
2499 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002500 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 .strategy = &sysctl_jiffies,
2502 },
2503 {
2504 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2505 .procname = "gc_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002506 .data = &ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507 .maxlen = sizeof(int),
2508 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002509 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510 .strategy = &sysctl_jiffies,
2511 },
2512 {
2513 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2514 .procname = "gc_elasticity",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002515 .data = &ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 .maxlen = sizeof(int),
2517 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002518 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 .strategy = &sysctl_jiffies,
2520 },
2521 {
2522 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2523 .procname = "mtu_expires",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002524 .data = &ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525 .maxlen = sizeof(int),
2526 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002527 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528 .strategy = &sysctl_jiffies,
2529 },
2530 {
2531 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2532 .procname = "min_adv_mss",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002533 .data = &ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534 .maxlen = sizeof(int),
2535 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002536 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537 .strategy = &sysctl_jiffies,
2538 },
2539 {
2540 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2541 .procname = "gc_min_interval_ms",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002542 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 .maxlen = sizeof(int),
2544 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002545 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546 .strategy = &sysctl_ms_jiffies,
2547 },
2548 { .ctl_name = 0 }
2549};
2550
2551#endif
2552
2553void __init ip6_route_init(void)
2554{
YOSHIFUJI Hideaki952a10b2007-04-21 20:13:44 +09002555#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 struct proc_dir_entry *p;
YOSHIFUJI Hideaki952a10b2007-04-21 20:13:44 +09002557#endif
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002558 ip6_dst_ops.kmem_cachep =
2559 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002560 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
David S. Miller14e50e52007-05-24 18:17:54 -07002561 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2562
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 fib6_init();
2564#ifdef CONFIG_PROC_FS
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002565 p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 if (p)
2567 p->owner = THIS_MODULE;
2568
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002569 proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570#endif
2571#ifdef CONFIG_XFRM
2572 xfrm6_init();
2573#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002574#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2575 fib6_rules_init();
2576#endif
Thomas Grafc127ea22007-03-22 11:58:32 -07002577
2578 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2579 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2580 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581}
2582
2583void ip6_route_cleanup(void)
2584{
Thomas Graf101367c2006-08-04 03:39:02 -07002585#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2586 fib6_rules_cleanup();
2587#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588#ifdef CONFIG_PROC_FS
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002589 proc_net_remove(&init_net, "ipv6_route");
2590 proc_net_remove(&init_net, "rt6_stats");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591#endif
2592#ifdef CONFIG_XFRM
2593 xfrm6_fini();
2594#endif
2595 rt6_ifdown(NULL);
2596 fib6_gc_cleanup();
2597 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2598}