blob: 55ea80fac6016929efc6fcbdabfa0289f03f78fa [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070057#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070058#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080077#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
David S. Miller14e50e52007-05-24 18:17:54 -0700122static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
123{
124}
125
126static struct dst_ops ip6_dst_blackhole_ops = {
127 .family = AF_INET6,
128 .protocol = __constant_htons(ETH_P_IPV6),
129 .destroy = ip6_dst_destroy,
130 .check = ip6_dst_check,
131 .update_pmtu = ip6_rt_blackhole_update_pmtu,
132 .entry_size = sizeof(struct rt6_info),
133};
134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135struct rt6_info ip6_null_entry = {
136 .u = {
137 .dst = {
138 .__refcnt = ATOMIC_INIT(1),
139 .__use = 1,
140 .dev = &loopback_dev,
141 .obsolete = -1,
142 .error = -ENETUNREACH,
143 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
144 .input = ip6_pkt_discard,
145 .output = ip6_pkt_discard_out,
146 .ops = &ip6_dst_ops,
147 .path = (struct dst_entry*)&ip6_null_entry,
148 }
149 },
150 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
151 .rt6i_metric = ~(u32) 0,
152 .rt6i_ref = ATOMIC_INIT(1),
153};
154
Thomas Graf101367c2006-08-04 03:39:02 -0700155#ifdef CONFIG_IPV6_MULTIPLE_TABLES
156
David S. Miller6723ab52006-10-18 21:20:57 -0700157static int ip6_pkt_prohibit(struct sk_buff *skb);
158static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159static int ip6_pkt_blk_hole(struct sk_buff *skb);
160
Thomas Graf101367c2006-08-04 03:39:02 -0700161struct rt6_info ip6_prohibit_entry = {
162 .u = {
163 .dst = {
164 .__refcnt = ATOMIC_INIT(1),
165 .__use = 1,
166 .dev = &loopback_dev,
167 .obsolete = -1,
168 .error = -EACCES,
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700170 .input = ip6_pkt_prohibit,
171 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700172 .ops = &ip6_dst_ops,
173 .path = (struct dst_entry*)&ip6_prohibit_entry,
174 }
175 },
176 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
177 .rt6i_metric = ~(u32) 0,
178 .rt6i_ref = ATOMIC_INIT(1),
179};
180
181struct rt6_info ip6_blk_hole_entry = {
182 .u = {
183 .dst = {
184 .__refcnt = ATOMIC_INIT(1),
185 .__use = 1,
186 .dev = &loopback_dev,
187 .obsolete = -1,
188 .error = -EINVAL,
189 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700190 .input = ip6_pkt_blk_hole,
191 .output = ip6_pkt_blk_hole,
Thomas Graf101367c2006-08-04 03:39:02 -0700192 .ops = &ip6_dst_ops,
193 .path = (struct dst_entry*)&ip6_blk_hole_entry,
194 }
195 },
196 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
197 .rt6i_metric = ~(u32) 0,
198 .rt6i_ref = ATOMIC_INIT(1),
199};
200
201#endif
202
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203/* allocate dst with ip6_dst_ops */
204static __inline__ struct rt6_info *ip6_dst_alloc(void)
205{
206 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
207}
208
209static void ip6_dst_destroy(struct dst_entry *dst)
210{
211 struct rt6_info *rt = (struct rt6_info *)dst;
212 struct inet6_dev *idev = rt->rt6i_idev;
213
214 if (idev != NULL) {
215 rt->rt6i_idev = NULL;
216 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900217 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218}
219
220static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
221 int how)
222{
223 struct rt6_info *rt = (struct rt6_info *)dst;
224 struct inet6_dev *idev = rt->rt6i_idev;
225
226 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
227 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
228 if (loopback_idev != NULL) {
229 rt->rt6i_idev = loopback_idev;
230 in6_dev_put(idev);
231 }
232 }
233}
234
235static __inline__ int rt6_check_expired(const struct rt6_info *rt)
236{
237 return (rt->rt6i_flags & RTF_EXPIRES &&
238 time_after(jiffies, rt->rt6i_expires));
239}
240
Thomas Grafc71099a2006-08-04 23:20:06 -0700241static inline int rt6_need_strict(struct in6_addr *daddr)
242{
243 return (ipv6_addr_type(daddr) &
244 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
245}
246
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700248 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 */
250
251static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
252 int oif,
253 int strict)
254{
255 struct rt6_info *local = NULL;
256 struct rt6_info *sprt;
257
258 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800259 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 struct net_device *dev = sprt->rt6i_dev;
261 if (dev->ifindex == oif)
262 return sprt;
263 if (dev->flags & IFF_LOOPBACK) {
264 if (sprt->rt6i_idev == NULL ||
265 sprt->rt6i_idev->dev->ifindex != oif) {
266 if (strict && oif)
267 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900268 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 local->rt6i_idev->dev->ifindex == oif))
270 continue;
271 }
272 local = sprt;
273 }
274 }
275
276 if (local)
277 return local;
278
279 if (strict)
280 return &ip6_null_entry;
281 }
282 return rt;
283}
284
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800285#ifdef CONFIG_IPV6_ROUTER_PREF
286static void rt6_probe(struct rt6_info *rt)
287{
288 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289 /*
290 * Okay, this does not seem to be appropriate
291 * for now, however, we need to check if it
292 * is really so; aka Router Reachability Probing.
293 *
294 * Router Reachability Probe MUST be rate-limited
295 * to no more than one per minute.
296 */
297 if (!neigh || (neigh->nud_state & NUD_VALID))
298 return;
299 read_lock_bh(&neigh->lock);
300 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800301 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800302 struct in6_addr mcaddr;
303 struct in6_addr *target;
304
305 neigh->updated = jiffies;
306 read_unlock_bh(&neigh->lock);
307
308 target = (struct in6_addr *)&neigh->primary_key;
309 addrconf_addr_solict_mult(target, &mcaddr);
310 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311 } else
312 read_unlock_bh(&neigh->lock);
313}
314#else
315static inline void rt6_probe(struct rt6_info *rt)
316{
317 return;
318}
319#endif
320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800322 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700324static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800326 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700327 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800328 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700329 if ((dev->flags & IFF_LOOPBACK) &&
330 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331 return 1;
332 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333}
334
Dave Jonesb6f99a22007-03-22 12:27:49 -0700335static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800337 struct neighbour *neigh = rt->rt6i_nexthop;
338 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700339 if (rt->rt6i_flags & RTF_NONEXTHOP ||
340 !(rt->rt6i_flags & RTF_GATEWAY))
341 m = 1;
342 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800343 read_lock_bh(&neigh->lock);
344 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700345 m = 2;
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800346 else if (!(neigh->nud_state & NUD_FAILED))
347 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800348 read_unlock_bh(&neigh->lock);
349 }
350 return m;
351}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800353static int rt6_score_route(struct rt6_info *rt, int oif,
354 int strict)
355{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700356 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900357
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700358 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700359 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800360 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800361#ifdef CONFIG_IPV6_ROUTER_PREF
362 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
363#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700364 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800365 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800366 return -1;
367 return m;
368}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369
David S. Millerf11e6652007-03-24 20:36:25 -0700370static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
371 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800372{
David S. Millerf11e6652007-03-24 20:36:25 -0700373 int m;
374
375 if (rt6_check_expired(rt))
376 goto out;
377
378 m = rt6_score_route(rt, oif, strict);
379 if (m < 0)
380 goto out;
381
382 if (m > *mpri) {
383 if (strict & RT6_LOOKUP_F_REACHABLE)
384 rt6_probe(match);
385 *mpri = m;
386 match = rt;
387 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
388 rt6_probe(rt);
389 }
390
391out:
392 return match;
393}
394
395static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
396 struct rt6_info *rr_head,
397 u32 metric, int oif, int strict)
398{
399 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800400 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401
David S. Millerf11e6652007-03-24 20:36:25 -0700402 match = NULL;
403 for (rt = rr_head; rt && rt->rt6i_metric == metric;
404 rt = rt->u.dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
406 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
407 rt = rt->u.dst.rt6_next)
408 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800409
David S. Millerf11e6652007-03-24 20:36:25 -0700410 return match;
411}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800412
David S. Millerf11e6652007-03-24 20:36:25 -0700413static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
414{
415 struct rt6_info *match, *rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
David S. Millerf11e6652007-03-24 20:36:25 -0700417 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
418 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
David S. Millerf11e6652007-03-24 20:36:25 -0700420 rt0 = fn->rr_ptr;
421 if (!rt0)
422 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
David S. Millerf11e6652007-03-24 20:36:25 -0700424 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800426 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700427 (strict & RT6_LOOKUP_F_REACHABLE)) {
428 struct rt6_info *next = rt0->u.dst.rt6_next;
429
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800430 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700431 if (!next || next->rt6i_metric != rt0->rt6i_metric)
432 next = fn->leaf;
433
434 if (next != rt0)
435 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 }
437
David S. Millerf11e6652007-03-24 20:36:25 -0700438 RT6_TRACE("%s() => %p\n",
439 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800441 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442}
443
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800444#ifdef CONFIG_IPV6_ROUTE_INFO
445int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct in6_addr *gwaddr)
447{
448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
450 unsigned int pref;
451 u32 lifetime;
452 struct rt6_info *rt;
453
454 if (len < sizeof(struct route_info)) {
455 return -EINVAL;
456 }
457
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 128) {
462 return -EINVAL;
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
465 return -EINVAL;
466 }
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
469 return -EINVAL;
470 }
471 }
472
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
Al Viroe69a4ad2006-11-14 20:56:00 -0800477 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800478 if (lifetime == 0xffffffff) {
479 /* infinity */
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
483 }
484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
495 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
496
497 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700498 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800499 rt = NULL;
500 }
501
502 if (!rt && lifetime)
503 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
504 pref);
505 else if (rt)
506 rt->rt6i_flags = RTF_ROUTEINFO |
507 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
508
509 if (rt) {
510 if (lifetime == 0xffffffff) {
511 rt->rt6i_flags &= ~RTF_EXPIRES;
512 } else {
513 rt->rt6i_expires = jiffies + HZ * lifetime;
514 rt->rt6i_flags |= RTF_EXPIRES;
515 }
516 dst_release(&rt->u.dst);
517 }
518 return 0;
519}
520#endif
521
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700522#define BACKTRACK(saddr) \
523do { \
524 if (rt == &ip6_null_entry) { \
525 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700526 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700527 if (fn->fn_flags & RTN_TL_ROOT) \
528 goto out; \
529 pn = fn->parent; \
530 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800531 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700532 else \
533 fn = pn; \
534 if (fn->fn_flags & RTN_RTINFO) \
535 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700536 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700537 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700538} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700539
540static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
541 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542{
543 struct fib6_node *fn;
544 struct rt6_info *rt;
545
Thomas Grafc71099a2006-08-04 23:20:06 -0700546 read_lock_bh(&table->tb6_lock);
547 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
548restart:
549 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700550 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700551 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700552out:
YOSHIFUJI Hideaki33cc4892006-08-28 13:19:30 -0700553 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700554 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
556 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700557 rt->u.dst.__use++;
558
559 return rt;
560
561}
562
563struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
564 int oif, int strict)
565{
566 struct flowi fl = {
567 .oif = oif,
568 .nl_u = {
569 .ip6_u = {
570 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700571 },
572 },
573 };
574 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700576
Thomas Grafadaa70b2006-10-13 15:01:03 -0700577 if (saddr) {
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 }
581
Thomas Grafc71099a2006-08-04 23:20:06 -0700582 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
583 if (dst->error == 0)
584 return (struct rt6_info *) dst;
585
586 dst_release(dst);
587
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 return NULL;
589}
590
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900591EXPORT_SYMBOL(rt6_lookup);
592
Thomas Grafc71099a2006-08-04 23:20:06 -0700593/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
596 be destroyed.
597 */
598
Thomas Graf86872cb2006-08-22 00:01:08 -0700599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600{
601 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700602 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
Thomas Grafc71099a2006-08-04 23:20:06 -0700604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700606 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700607 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 return err;
610}
611
Thomas Graf40e22e82006-08-22 00:00:45 -0700612int ip6_ins_rt(struct rt6_info *rt)
613{
Thomas Graf86872cb2006-08-22 00:01:08 -0700614 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700615}
616
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800617static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 struct rt6_info *rt;
621
622 /*
623 * Clone the route.
624 */
625
626 rt = ip6_rt_copy(ort);
627
628 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900629 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630 if (rt->rt6i_dst.plen != 128 &&
631 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900634 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900636 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 rt->rt6i_dst.plen = 128;
638 rt->rt6i_flags |= RTF_CACHE;
639 rt->u.dst.flags |= DST_HOST;
640
641#ifdef CONFIG_IPV6_SUBTREES
642 if (rt->rt6i_src.plen && saddr) {
643 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644 rt->rt6i_src.plen = 128;
645 }
646#endif
647
648 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800650 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800652 return rt;
653}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800655static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656{
657 struct rt6_info *rt = ip6_rt_copy(ort);
658 if (rt) {
659 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660 rt->rt6i_dst.plen = 128;
661 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800662 rt->u.dst.flags |= DST_HOST;
663 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664 }
665 return rt;
666}
667
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700668static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
669 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670{
671 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800672 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700673 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800676 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700678 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
680relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700681 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800683restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
686restart:
David S. Millerf11e6652007-03-24 20:36:25 -0700687 rt = rt6_select(fn, fl->iif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700688 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800689 if (rt == &ip6_null_entry ||
690 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800691 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800693 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700694 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800695
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800698 else {
699#if CLONE_OFFLINK_ROUTE
700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
701#else
702 goto out2;
703#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800705
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800706 dst_release(&rt->u.dst);
707 rt = nrt ? : &ip6_null_entry;
708
709 dst_hold(&rt->u.dst);
710 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700711 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800712 if (!err)
713 goto out2;
714 }
715
716 if (--attempts <= 0)
717 goto out2;
718
719 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700720 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800721 * released someone could insert this route. Relookup.
722 */
723 dst_release(&rt->u.dst);
724 goto relookup;
725
726out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800727 if (reachable) {
728 reachable = 0;
729 goto restart_2;
730 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800731 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700732 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733out2:
734 rt->u.dst.lastuse = jiffies;
735 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700736
737 return rt;
738}
739
740void ip6_route_input(struct sk_buff *skb)
741{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700742 struct ipv6hdr *iph = ipv6_hdr(skb);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700743 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700744 struct flowi fl = {
745 .iif = skb->dev->ifindex,
746 .nl_u = {
747 .ip6_u = {
748 .daddr = iph->daddr,
749 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800750 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700751 },
752 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900753 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700754 .proto = iph->nexthdr,
755 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700756
757 if (rt6_need_strict(&iph->daddr))
758 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700759
760 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
761}
762
763static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
764 struct flowi *fl, int flags)
765{
766 struct fib6_node *fn;
767 struct rt6_info *rt, *nrt;
768 int strict = 0;
769 int attempts = 3;
770 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800771 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700772
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700773 strict |= flags & RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700774
775relookup:
776 read_lock_bh(&table->tb6_lock);
777
778restart_2:
779 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
780
781restart:
David S. Millerf11e6652007-03-24 20:36:25 -0700782 rt = rt6_select(fn, fl->oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700783 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700784 if (rt == &ip6_null_entry ||
785 rt->rt6i_flags & RTF_CACHE)
786 goto out;
787
788 dst_hold(&rt->u.dst);
789 read_unlock_bh(&table->tb6_lock);
790
791 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
792 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
793 else {
794#if CLONE_OFFLINK_ROUTE
795 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
796#else
797 goto out2;
798#endif
799 }
800
801 dst_release(&rt->u.dst);
802 rt = nrt ? : &ip6_null_entry;
803
804 dst_hold(&rt->u.dst);
805 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700806 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700807 if (!err)
808 goto out2;
809 }
810
811 if (--attempts <= 0)
812 goto out2;
813
814 /*
815 * Race condition! In the gap, when table->tb6_lock was
816 * released someone could insert this route. Relookup.
817 */
818 dst_release(&rt->u.dst);
819 goto relookup;
820
821out:
822 if (reachable) {
823 reachable = 0;
824 goto restart_2;
825 }
826 dst_hold(&rt->u.dst);
827 read_unlock_bh(&table->tb6_lock);
828out2:
829 rt->u.dst.lastuse = jiffies;
830 rt->u.dst.__use++;
831 return rt;
832}
833
834struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
835{
836 int flags = 0;
837
838 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700839 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700840
Thomas Grafadaa70b2006-10-13 15:01:03 -0700841 if (!ipv6_addr_any(&fl->fl6_src))
842 flags |= RT6_LOOKUP_F_HAS_SADDR;
843
Thomas Grafc71099a2006-08-04 23:20:06 -0700844 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845}
846
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900847EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848
David S. Miller14e50e52007-05-24 18:17:54 -0700849static int ip6_blackhole_output(struct sk_buff *skb)
850{
851 kfree_skb(skb);
852 return 0;
853}
854
855int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
856{
857 struct rt6_info *ort = (struct rt6_info *) *dstp;
858 struct rt6_info *rt = (struct rt6_info *)
859 dst_alloc(&ip6_dst_blackhole_ops);
860 struct dst_entry *new = NULL;
861
862 if (rt) {
863 new = &rt->u.dst;
864
865 atomic_set(&new->__refcnt, 1);
866 new->__use = 1;
867 new->input = ip6_blackhole_output;
868 new->output = ip6_blackhole_output;
869
870 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
871 new->dev = ort->u.dst.dev;
872 if (new->dev)
873 dev_hold(new->dev);
874 rt->rt6i_idev = ort->rt6i_idev;
875 if (rt->rt6i_idev)
876 in6_dev_hold(rt->rt6i_idev);
877 rt->rt6i_expires = 0;
878
879 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
880 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
881 rt->rt6i_metric = 0;
882
883 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
884#ifdef CONFIG_IPV6_SUBTREES
885 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
886#endif
887
888 dst_free(new);
889 }
890
891 dst_release(*dstp);
892 *dstp = new;
893 return (new ? 0 : -ENOMEM);
894}
895EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
896
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897/*
898 * Destination cache support functions
899 */
900
901static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
902{
903 struct rt6_info *rt;
904
905 rt = (struct rt6_info *) dst;
906
907 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
908 return dst;
909
910 return NULL;
911}
912
913static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
914{
915 struct rt6_info *rt = (struct rt6_info *) dst;
916
917 if (rt) {
918 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700919 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 else
921 dst_release(dst);
922 }
923 return NULL;
924}
925
926static void ip6_link_failure(struct sk_buff *skb)
927{
928 struct rt6_info *rt;
929
930 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
931
932 rt = (struct rt6_info *) skb->dst;
933 if (rt) {
934 if (rt->rt6i_flags&RTF_CACHE) {
935 dst_set_expires(&rt->u.dst, 0);
936 rt->rt6i_flags |= RTF_EXPIRES;
937 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
938 rt->rt6i_node->fn_sernum = -1;
939 }
940}
941
942static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
943{
944 struct rt6_info *rt6 = (struct rt6_info*)dst;
945
946 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
947 rt6->rt6i_flags |= RTF_MODIFIED;
948 if (mtu < IPV6_MIN_MTU) {
949 mtu = IPV6_MIN_MTU;
950 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
951 }
952 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700953 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 }
955}
956
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957static int ipv6_get_mtu(struct net_device *dev);
958
959static inline unsigned int ipv6_advmss(unsigned int mtu)
960{
961 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
962
963 if (mtu < ip6_rt_min_advmss)
964 mtu = ip6_rt_min_advmss;
965
966 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900967 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
968 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
969 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 * rely only on pmtu discovery"
971 */
972 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
973 mtu = IPV6_MAXPLEN;
974 return mtu;
975}
976
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700977static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700978static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700979
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900980struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 struct neighbour *neigh,
982 struct in6_addr *addr,
983 int (*output)(struct sk_buff *))
984{
985 struct rt6_info *rt;
986 struct inet6_dev *idev = in6_dev_get(dev);
987
988 if (unlikely(idev == NULL))
989 return NULL;
990
991 rt = ip6_dst_alloc();
992 if (unlikely(rt == NULL)) {
993 in6_dev_put(idev);
994 goto out;
995 }
996
997 dev_hold(dev);
998 if (neigh)
999 neigh_hold(neigh);
1000 else
1001 neigh = ndisc_get_neigh(dev, addr);
1002
1003 rt->rt6i_dev = dev;
1004 rt->rt6i_idev = idev;
1005 rt->rt6i_nexthop = neigh;
1006 atomic_set(&rt->u.dst.__refcnt, 1);
1007 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1008 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1009 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1010 rt->u.dst.output = output;
1011
1012#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001013 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1014 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 : 0;
1016 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1017 rt->rt6i_dst.plen = 128;
1018#endif
1019
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001020 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 rt->u.dst.next = ndisc_dst_gc_list;
1022 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001023 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024
1025 fib6_force_start_gc();
1026
1027out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001028 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029}
1030
1031int ndisc_dst_gc(int *more)
1032{
1033 struct dst_entry *dst, *next, **pprev;
1034 int freed;
1035
1036 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001037 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001038
1039 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001041
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 while ((dst = *pprev) != NULL) {
1043 if (!atomic_read(&dst->__refcnt)) {
1044 *pprev = dst->next;
1045 dst_free(dst);
1046 freed++;
1047 } else {
1048 pprev = &dst->next;
1049 (*more)++;
1050 }
1051 }
1052
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001053 spin_unlock_bh(&ndisc_lock);
1054
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 return freed;
1056}
1057
1058static int ip6_dst_gc(void)
1059{
1060 static unsigned expire = 30*HZ;
1061 static unsigned long last_gc;
1062 unsigned long now = jiffies;
1063
1064 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1065 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1066 goto out;
1067
1068 expire++;
1069 fib6_run_gc(expire);
1070 last_gc = now;
1071 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1072 expire = ip6_rt_gc_timeout>>1;
1073
1074out:
1075 expire -= expire>>ip6_rt_gc_elasticity;
1076 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1077}
1078
1079/* Clean host part of a prefix. Not necessary in radix tree,
1080 but results in cleaner routing tables.
1081
1082 Remove it only when all the things will work!
1083 */
1084
1085static int ipv6_get_mtu(struct net_device *dev)
1086{
1087 int mtu = IPV6_MIN_MTU;
1088 struct inet6_dev *idev;
1089
1090 idev = in6_dev_get(dev);
1091 if (idev) {
1092 mtu = idev->cnf.mtu6;
1093 in6_dev_put(idev);
1094 }
1095 return mtu;
1096}
1097
1098int ipv6_get_hoplimit(struct net_device *dev)
1099{
1100 int hoplimit = ipv6_devconf.hop_limit;
1101 struct inet6_dev *idev;
1102
1103 idev = in6_dev_get(dev);
1104 if (idev) {
1105 hoplimit = idev->cnf.hop_limit;
1106 in6_dev_put(idev);
1107 }
1108 return hoplimit;
1109}
1110
1111/*
1112 *
1113 */
1114
Thomas Graf86872cb2006-08-22 00:01:08 -07001115int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116{
1117 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 struct rt6_info *rt = NULL;
1119 struct net_device *dev = NULL;
1120 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001121 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 int addr_type;
1123
Thomas Graf86872cb2006-08-22 00:01:08 -07001124 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 return -EINVAL;
1126#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001127 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 return -EINVAL;
1129#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001130 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 err = -ENODEV;
Thomas Graf86872cb2006-08-22 00:01:08 -07001132 dev = dev_get_by_index(cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 if (!dev)
1134 goto out;
1135 idev = in6_dev_get(dev);
1136 if (!idev)
1137 goto out;
1138 }
1139
Thomas Graf86872cb2006-08-22 00:01:08 -07001140 if (cfg->fc_metric == 0)
1141 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142
Thomas Graf86872cb2006-08-22 00:01:08 -07001143 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001144 if (table == NULL) {
1145 err = -ENOBUFS;
1146 goto out;
1147 }
1148
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 rt = ip6_dst_alloc();
1150
1151 if (rt == NULL) {
1152 err = -ENOMEM;
1153 goto out;
1154 }
1155
1156 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001157 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158
Thomas Graf86872cb2006-08-22 00:01:08 -07001159 if (cfg->fc_protocol == RTPROT_UNSPEC)
1160 cfg->fc_protocol = RTPROT_BOOT;
1161 rt->rt6i_protocol = cfg->fc_protocol;
1162
1163 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
1165 if (addr_type & IPV6_ADDR_MULTICAST)
1166 rt->u.dst.input = ip6_mc_input;
1167 else
1168 rt->u.dst.input = ip6_forward;
1169
1170 rt->u.dst.output = ip6_output;
1171
Thomas Graf86872cb2006-08-22 00:01:08 -07001172 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1173 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 if (rt->rt6i_dst.plen == 128)
1175 rt->u.dst.flags = DST_HOST;
1176
1177#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001178 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1179 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180#endif
1181
Thomas Graf86872cb2006-08-22 00:01:08 -07001182 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183
1184 /* We cannot add true routes via loopback here,
1185 they would result in kernel looping; promote them to reject routes
1186 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001187 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1189 /* hold loopback dev/idev if we haven't done so. */
1190 if (dev != &loopback_dev) {
1191 if (dev) {
1192 dev_put(dev);
1193 in6_dev_put(idev);
1194 }
1195 dev = &loopback_dev;
1196 dev_hold(dev);
1197 idev = in6_dev_get(dev);
1198 if (!idev) {
1199 err = -ENODEV;
1200 goto out;
1201 }
1202 }
1203 rt->u.dst.output = ip6_pkt_discard_out;
1204 rt->u.dst.input = ip6_pkt_discard;
1205 rt->u.dst.error = -ENETUNREACH;
1206 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1207 goto install_route;
1208 }
1209
Thomas Graf86872cb2006-08-22 00:01:08 -07001210 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 struct in6_addr *gw_addr;
1212 int gwa_type;
1213
Thomas Graf86872cb2006-08-22 00:01:08 -07001214 gw_addr = &cfg->fc_gateway;
1215 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 gwa_type = ipv6_addr_type(gw_addr);
1217
1218 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1219 struct rt6_info *grt;
1220
1221 /* IPv6 strictly inhibits using not link-local
1222 addresses as nexthop address.
1223 Otherwise, router will not able to send redirects.
1224 It is very good, but in some (rare!) circumstances
1225 (SIT, PtP, NBMA NOARP links) it is handy to allow
1226 some exceptions. --ANK
1227 */
1228 err = -EINVAL;
1229 if (!(gwa_type&IPV6_ADDR_UNICAST))
1230 goto out;
1231
Thomas Graf86872cb2006-08-22 00:01:08 -07001232 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
1234 err = -EHOSTUNREACH;
1235 if (grt == NULL)
1236 goto out;
1237 if (dev) {
1238 if (dev != grt->rt6i_dev) {
1239 dst_release(&grt->u.dst);
1240 goto out;
1241 }
1242 } else {
1243 dev = grt->rt6i_dev;
1244 idev = grt->rt6i_idev;
1245 dev_hold(dev);
1246 in6_dev_hold(grt->rt6i_idev);
1247 }
1248 if (!(grt->rt6i_flags&RTF_GATEWAY))
1249 err = 0;
1250 dst_release(&grt->u.dst);
1251
1252 if (err)
1253 goto out;
1254 }
1255 err = -EINVAL;
1256 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1257 goto out;
1258 }
1259
1260 err = -ENODEV;
1261 if (dev == NULL)
1262 goto out;
1263
Thomas Graf86872cb2006-08-22 00:01:08 -07001264 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1266 if (IS_ERR(rt->rt6i_nexthop)) {
1267 err = PTR_ERR(rt->rt6i_nexthop);
1268 rt->rt6i_nexthop = NULL;
1269 goto out;
1270 }
1271 }
1272
Thomas Graf86872cb2006-08-22 00:01:08 -07001273 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274
1275install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001276 if (cfg->fc_mx) {
1277 struct nlattr *nla;
1278 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
Thomas Graf86872cb2006-08-22 00:01:08 -07001280 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1281 int type = nla->nla_type;
1282
1283 if (type) {
1284 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 err = -EINVAL;
1286 goto out;
1287 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001288
1289 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 }
1292 }
1293
1294 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1295 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1296 if (!rt->u.dst.metrics[RTAX_MTU-1])
1297 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1298 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1299 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1300 rt->u.dst.dev = dev;
1301 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001302 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001303 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304
1305out:
1306 if (dev)
1307 dev_put(dev);
1308 if (idev)
1309 in6_dev_put(idev);
1310 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001311 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 return err;
1313}
1314
Thomas Graf86872cb2006-08-22 00:01:08 -07001315static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316{
1317 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001318 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319
Patrick McHardy6c813a72006-08-06 22:22:47 -07001320 if (rt == &ip6_null_entry)
1321 return -ENOENT;
1322
Thomas Grafc71099a2006-08-04 23:20:06 -07001323 table = rt->rt6i_table;
1324 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325
Thomas Graf86872cb2006-08-22 00:01:08 -07001326 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 dst_release(&rt->u.dst);
1328
Thomas Grafc71099a2006-08-04 23:20:06 -07001329 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
1331 return err;
1332}
1333
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001334int ip6_del_rt(struct rt6_info *rt)
1335{
Thomas Graf86872cb2006-08-22 00:01:08 -07001336 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001337}
1338
Thomas Graf86872cb2006-08-22 00:01:08 -07001339static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340{
Thomas Grafc71099a2006-08-04 23:20:06 -07001341 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 struct fib6_node *fn;
1343 struct rt6_info *rt;
1344 int err = -ESRCH;
1345
Thomas Graf86872cb2006-08-22 00:01:08 -07001346 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001347 if (table == NULL)
1348 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
Thomas Grafc71099a2006-08-04 23:20:06 -07001350 read_lock_bh(&table->tb6_lock);
1351
1352 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001353 &cfg->fc_dst, cfg->fc_dst_len,
1354 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001355
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001357 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001358 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001360 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001362 if (cfg->fc_flags & RTF_GATEWAY &&
1363 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001365 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 continue;
1367 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001368 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369
Thomas Graf86872cb2006-08-22 00:01:08 -07001370 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 }
1372 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001373 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374
1375 return err;
1376}
1377
1378/*
1379 * Handle redirects
1380 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001381struct ip6rd_flowi {
1382 struct flowi fl;
1383 struct in6_addr gateway;
1384};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001386static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1387 struct flowi *fl,
1388 int flags)
1389{
1390 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1391 struct rt6_info *rt;
1392 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001393
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001395 * Get the "current" route for this destination and
1396 * check if the redirect has come from approriate router.
1397 *
1398 * RFC 2461 specifies that redirects should only be
1399 * accepted if they come from the nexthop to the target.
1400 * Due to the way the routes are chosen, this notion
1401 * is a bit fuzzy and one might need to check all possible
1402 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404
Thomas Grafc71099a2006-08-04 23:20:06 -07001405 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001406 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001407restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001408 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001409 /*
1410 * Current route is on-link; redirect is always invalid.
1411 *
1412 * Seems, previous statement is not true. It could
1413 * be node, which looks for us as on-link (f.e. proxy ndisc)
1414 * But then router serving it might decide, that we should
1415 * know truth 8)8) --ANK (980726).
1416 */
1417 if (rt6_check_expired(rt))
1418 continue;
1419 if (!(rt->rt6i_flags & RTF_GATEWAY))
1420 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001421 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001422 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001423 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001424 continue;
1425 break;
1426 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001427
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001428 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001429 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001430 BACKTRACK(&fl->fl6_src);
1431out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001432 dst_hold(&rt->u.dst);
1433
1434 read_unlock_bh(&table->tb6_lock);
1435
1436 return rt;
1437};
1438
1439static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1440 struct in6_addr *src,
1441 struct in6_addr *gateway,
1442 struct net_device *dev)
1443{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001444 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001445 struct ip6rd_flowi rdfl = {
1446 .fl = {
1447 .oif = dev->ifindex,
1448 .nl_u = {
1449 .ip6_u = {
1450 .daddr = *dest,
1451 .saddr = *src,
1452 },
1453 },
1454 },
1455 .gateway = *gateway,
1456 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001457
1458 if (rt6_need_strict(dest))
1459 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001460
1461 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1462}
1463
1464void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1465 struct in6_addr *saddr,
1466 struct neighbour *neigh, u8 *lladdr, int on_link)
1467{
1468 struct rt6_info *rt, *nrt = NULL;
1469 struct netevent_redirect netevent;
1470
1471 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1472
1473 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 if (net_ratelimit())
1475 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1476 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001477 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 }
1479
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 /*
1481 * We have finally decided to accept it.
1482 */
1483
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001484 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1486 NEIGH_UPDATE_F_OVERRIDE|
1487 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1488 NEIGH_UPDATE_F_ISROUTER))
1489 );
1490
1491 /*
1492 * Redirect received -> path was valid.
1493 * Look, redirects are sent only in response to data packets,
1494 * so that this nexthop apparently is reachable. --ANK
1495 */
1496 dst_confirm(&rt->u.dst);
1497
1498 /* Duplicate redirect: silently ignore. */
1499 if (neigh == rt->u.dst.neighbour)
1500 goto out;
1501
1502 nrt = ip6_rt_copy(rt);
1503 if (nrt == NULL)
1504 goto out;
1505
1506 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1507 if (on_link)
1508 nrt->rt6i_flags &= ~RTF_GATEWAY;
1509
1510 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1511 nrt->rt6i_dst.plen = 128;
1512 nrt->u.dst.flags |= DST_HOST;
1513
1514 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1515 nrt->rt6i_nexthop = neigh_clone(neigh);
1516 /* Reset pmtu, it may be better */
1517 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1518 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1519
Thomas Graf40e22e82006-08-22 00:00:45 -07001520 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 goto out;
1522
Tom Tucker8d717402006-07-30 20:43:36 -07001523 netevent.old = &rt->u.dst;
1524 netevent.new = &nrt->u.dst;
1525 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1526
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001528 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 return;
1530 }
1531
1532out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001533 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 return;
1535}
1536
1537/*
1538 * Handle ICMP "packet too big" messages
1539 * i.e. Path MTU discovery
1540 */
1541
1542void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1543 struct net_device *dev, u32 pmtu)
1544{
1545 struct rt6_info *rt, *nrt;
1546 int allfrag = 0;
1547
1548 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1549 if (rt == NULL)
1550 return;
1551
1552 if (pmtu >= dst_mtu(&rt->u.dst))
1553 goto out;
1554
1555 if (pmtu < IPV6_MIN_MTU) {
1556 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001557 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 * MTU (1280) and a fragment header should always be included
1559 * after a node receiving Too Big message reporting PMTU is
1560 * less than the IPv6 Minimum Link MTU.
1561 */
1562 pmtu = IPV6_MIN_MTU;
1563 allfrag = 1;
1564 }
1565
1566 /* New mtu received -> path was valid.
1567 They are sent only in response to data packets,
1568 so that this nexthop apparently is reachable. --ANK
1569 */
1570 dst_confirm(&rt->u.dst);
1571
1572 /* Host route. If it is static, it would be better
1573 not to override it, but add new one, so that
1574 when cache entry will expire old pmtu
1575 would return automatically.
1576 */
1577 if (rt->rt6i_flags & RTF_CACHE) {
1578 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1579 if (allfrag)
1580 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1581 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1582 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1583 goto out;
1584 }
1585
1586 /* Network route.
1587 Two cases are possible:
1588 1. It is connected route. Action: COW
1589 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1590 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001591 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001592 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001593 else
1594 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001595
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001596 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001597 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1598 if (allfrag)
1599 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1600
1601 /* According to RFC 1981, detecting PMTU increase shouldn't be
1602 * happened within 5 mins, the recommended timer is 10 mins.
1603 * Here this route expiration time is set to ip6_rt_mtu_expires
1604 * which is 10 mins. After 10 mins the decreased pmtu is expired
1605 * and detecting PMTU increase will be automatically happened.
1606 */
1607 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1608 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1609
Thomas Graf40e22e82006-08-22 00:00:45 -07001610 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612out:
1613 dst_release(&rt->u.dst);
1614}
1615
1616/*
1617 * Misc support functions
1618 */
1619
1620static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1621{
1622 struct rt6_info *rt = ip6_dst_alloc();
1623
1624 if (rt) {
1625 rt->u.dst.input = ort->u.dst.input;
1626 rt->u.dst.output = ort->u.dst.output;
1627
1628 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001629 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 rt->u.dst.dev = ort->u.dst.dev;
1631 if (rt->u.dst.dev)
1632 dev_hold(rt->u.dst.dev);
1633 rt->rt6i_idev = ort->rt6i_idev;
1634 if (rt->rt6i_idev)
1635 in6_dev_hold(rt->rt6i_idev);
1636 rt->u.dst.lastuse = jiffies;
1637 rt->rt6i_expires = 0;
1638
1639 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1640 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1641 rt->rt6i_metric = 0;
1642
1643 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1644#ifdef CONFIG_IPV6_SUBTREES
1645 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1646#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001647 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 }
1649 return rt;
1650}
1651
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001652#ifdef CONFIG_IPV6_ROUTE_INFO
1653static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1654 struct in6_addr *gwaddr, int ifindex)
1655{
1656 struct fib6_node *fn;
1657 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001658 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001659
Thomas Grafc71099a2006-08-04 23:20:06 -07001660 table = fib6_get_table(RT6_TABLE_INFO);
1661 if (table == NULL)
1662 return NULL;
1663
1664 write_lock_bh(&table->tb6_lock);
1665 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001666 if (!fn)
1667 goto out;
1668
Eric Dumazet7cc48262007-02-09 16:22:57 -08001669 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001670 if (rt->rt6i_dev->ifindex != ifindex)
1671 continue;
1672 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1673 continue;
1674 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1675 continue;
1676 dst_hold(&rt->u.dst);
1677 break;
1678 }
1679out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001680 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001681 return rt;
1682}
1683
1684static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1685 struct in6_addr *gwaddr, int ifindex,
1686 unsigned pref)
1687{
Thomas Graf86872cb2006-08-22 00:01:08 -07001688 struct fib6_config cfg = {
1689 .fc_table = RT6_TABLE_INFO,
1690 .fc_metric = 1024,
1691 .fc_ifindex = ifindex,
1692 .fc_dst_len = prefixlen,
1693 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1694 RTF_UP | RTF_PREF(pref),
1695 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001696
Thomas Graf86872cb2006-08-22 00:01:08 -07001697 ipv6_addr_copy(&cfg.fc_dst, prefix);
1698 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1699
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001700 /* We should treat it as a default route if prefix length is 0. */
1701 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001702 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001703
Thomas Graf86872cb2006-08-22 00:01:08 -07001704 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001705
1706 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1707}
1708#endif
1709
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001711{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001713 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714
Thomas Grafc71099a2006-08-04 23:20:06 -07001715 table = fib6_get_table(RT6_TABLE_DFLT);
1716 if (table == NULL)
1717 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718
Thomas Grafc71099a2006-08-04 23:20:06 -07001719 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001720 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001722 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1724 break;
1725 }
1726 if (rt)
1727 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001728 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 return rt;
1730}
1731
1732struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001733 struct net_device *dev,
1734 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735{
Thomas Graf86872cb2006-08-22 00:01:08 -07001736 struct fib6_config cfg = {
1737 .fc_table = RT6_TABLE_DFLT,
1738 .fc_metric = 1024,
1739 .fc_ifindex = dev->ifindex,
1740 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1741 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1742 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743
Thomas Graf86872cb2006-08-22 00:01:08 -07001744 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745
Thomas Graf86872cb2006-08-22 00:01:08 -07001746 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 return rt6_get_dflt_router(gwaddr, dev);
1749}
1750
1751void rt6_purge_dflt_routers(void)
1752{
1753 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001754 struct fib6_table *table;
1755
1756 /* NOTE: Keep consistent with rt6_get_dflt_router */
1757 table = fib6_get_table(RT6_TABLE_DFLT);
1758 if (table == NULL)
1759 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760
1761restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001762 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001763 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1765 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001766 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001767 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 goto restart;
1769 }
1770 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001771 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772}
1773
Thomas Graf86872cb2006-08-22 00:01:08 -07001774static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1775 struct fib6_config *cfg)
1776{
1777 memset(cfg, 0, sizeof(*cfg));
1778
1779 cfg->fc_table = RT6_TABLE_MAIN;
1780 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1781 cfg->fc_metric = rtmsg->rtmsg_metric;
1782 cfg->fc_expires = rtmsg->rtmsg_info;
1783 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1784 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1785 cfg->fc_flags = rtmsg->rtmsg_flags;
1786
1787 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1788 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1789 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1790}
1791
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1793{
Thomas Graf86872cb2006-08-22 00:01:08 -07001794 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 struct in6_rtmsg rtmsg;
1796 int err;
1797
1798 switch(cmd) {
1799 case SIOCADDRT: /* Add a route */
1800 case SIOCDELRT: /* Delete a route */
1801 if (!capable(CAP_NET_ADMIN))
1802 return -EPERM;
1803 err = copy_from_user(&rtmsg, arg,
1804 sizeof(struct in6_rtmsg));
1805 if (err)
1806 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001807
1808 rtmsg_to_fib6_config(&rtmsg, &cfg);
1809
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 rtnl_lock();
1811 switch (cmd) {
1812 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001813 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 break;
1815 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001816 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 break;
1818 default:
1819 err = -EINVAL;
1820 }
1821 rtnl_unlock();
1822
1823 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001824 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825
1826 return -EINVAL;
1827}
1828
1829/*
1830 * Drop the packet on the floor
1831 */
1832
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001833static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1834 int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001836 int type;
1837 switch (ipstats_mib_noroutes) {
1838 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001839 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001840 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1841 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1842 break;
1843 }
1844 /* FALLTHROUGH */
1845 case IPSTATS_MIB_OUTNOROUTES:
1846 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1847 break;
1848 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001849 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 kfree_skb(skb);
1851 return 0;
1852}
1853
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001854static int ip6_pkt_discard(struct sk_buff *skb)
1855{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001856 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001857}
1858
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001859static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860{
1861 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001862 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863}
1864
David S. Miller6723ab52006-10-18 21:20:57 -07001865#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1866
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001867static int ip6_pkt_prohibit(struct sk_buff *skb)
1868{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001869 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001870}
1871
1872static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1873{
1874 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001875 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001876}
1877
1878static int ip6_pkt_blk_hole(struct sk_buff *skb)
1879{
1880 kfree_skb(skb);
1881 return 0;
1882}
1883
David S. Miller6723ab52006-10-18 21:20:57 -07001884#endif
1885
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886/*
1887 * Allocate a dst for local (unicast / anycast) address.
1888 */
1889
1890struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1891 const struct in6_addr *addr,
1892 int anycast)
1893{
1894 struct rt6_info *rt = ip6_dst_alloc();
1895
1896 if (rt == NULL)
1897 return ERR_PTR(-ENOMEM);
1898
1899 dev_hold(&loopback_dev);
1900 in6_dev_hold(idev);
1901
1902 rt->u.dst.flags = DST_HOST;
1903 rt->u.dst.input = ip6_input;
1904 rt->u.dst.output = ip6_output;
1905 rt->rt6i_dev = &loopback_dev;
1906 rt->rt6i_idev = idev;
1907 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1908 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1909 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1910 rt->u.dst.obsolete = -1;
1911
1912 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001913 if (anycast)
1914 rt->rt6i_flags |= RTF_ANYCAST;
1915 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916 rt->rt6i_flags |= RTF_LOCAL;
1917 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1918 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001919 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 return ERR_PTR(-ENOMEM);
1921 }
1922
1923 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1924 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001925 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926
1927 atomic_set(&rt->u.dst.__refcnt, 1);
1928
1929 return rt;
1930}
1931
1932static int fib6_ifdown(struct rt6_info *rt, void *arg)
1933{
1934 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1935 rt != &ip6_null_entry) {
1936 RT6_TRACE("deleted by ifdown %p\n", rt);
1937 return -1;
1938 }
1939 return 0;
1940}
1941
1942void rt6_ifdown(struct net_device *dev)
1943{
Thomas Grafc71099a2006-08-04 23:20:06 -07001944 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945}
1946
1947struct rt6_mtu_change_arg
1948{
1949 struct net_device *dev;
1950 unsigned mtu;
1951};
1952
1953static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1954{
1955 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1956 struct inet6_dev *idev;
1957
1958 /* In IPv6 pmtu discovery is not optional,
1959 so that RTAX_MTU lock cannot disable it.
1960 We still use this lock to block changes
1961 caused by addrconf/ndisc.
1962 */
1963
1964 idev = __in6_dev_get(arg->dev);
1965 if (idev == NULL)
1966 return 0;
1967
1968 /* For administrative MTU increase, there is no way to discover
1969 IPv6 PMTU increase, so PMTU increase should be updated here.
1970 Since RFC 1981 doesn't include administrative MTU increase
1971 update PMTU increase is a MUST. (i.e. jumbo frame)
1972 */
1973 /*
1974 If new MTU is less than route PMTU, this new MTU will be the
1975 lowest MTU in the path, update the route PMTU to reflect PMTU
1976 decreases; if new MTU is greater than route PMTU, and the
1977 old MTU is the lowest MTU in the path, update the route PMTU
1978 to reflect the increase. In this case if the other nodes' MTU
1979 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1980 PMTU discouvery.
1981 */
1982 if (rt->rt6i_dev == arg->dev &&
1983 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001984 (dst_mtu(&rt->u.dst) > arg->mtu ||
1985 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001986 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Simon Arlott566cfd82007-07-26 00:09:55 -07001988 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1989 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990 return 0;
1991}
1992
1993void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1994{
Thomas Grafc71099a2006-08-04 23:20:06 -07001995 struct rt6_mtu_change_arg arg = {
1996 .dev = dev,
1997 .mtu = mtu,
1998 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999
Thomas Grafc71099a2006-08-04 23:20:06 -07002000 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001}
2002
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002003static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002004 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002005 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002006 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002007 [RTA_PRIORITY] = { .type = NLA_U32 },
2008 [RTA_METRICS] = { .type = NLA_NESTED },
2009};
2010
2011static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2012 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013{
Thomas Graf86872cb2006-08-22 00:01:08 -07002014 struct rtmsg *rtm;
2015 struct nlattr *tb[RTA_MAX+1];
2016 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017
Thomas Graf86872cb2006-08-22 00:01:08 -07002018 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2019 if (err < 0)
2020 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021
Thomas Graf86872cb2006-08-22 00:01:08 -07002022 err = -EINVAL;
2023 rtm = nlmsg_data(nlh);
2024 memset(cfg, 0, sizeof(*cfg));
2025
2026 cfg->fc_table = rtm->rtm_table;
2027 cfg->fc_dst_len = rtm->rtm_dst_len;
2028 cfg->fc_src_len = rtm->rtm_src_len;
2029 cfg->fc_flags = RTF_UP;
2030 cfg->fc_protocol = rtm->rtm_protocol;
2031
2032 if (rtm->rtm_type == RTN_UNREACHABLE)
2033 cfg->fc_flags |= RTF_REJECT;
2034
2035 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2036 cfg->fc_nlinfo.nlh = nlh;
2037
2038 if (tb[RTA_GATEWAY]) {
2039 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2040 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002042
2043 if (tb[RTA_DST]) {
2044 int plen = (rtm->rtm_dst_len + 7) >> 3;
2045
2046 if (nla_len(tb[RTA_DST]) < plen)
2047 goto errout;
2048
2049 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002051
2052 if (tb[RTA_SRC]) {
2053 int plen = (rtm->rtm_src_len + 7) >> 3;
2054
2055 if (nla_len(tb[RTA_SRC]) < plen)
2056 goto errout;
2057
2058 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002060
2061 if (tb[RTA_OIF])
2062 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2063
2064 if (tb[RTA_PRIORITY])
2065 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2066
2067 if (tb[RTA_METRICS]) {
2068 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2069 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002071
2072 if (tb[RTA_TABLE])
2073 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2074
2075 err = 0;
2076errout:
2077 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078}
2079
Thomas Grafc127ea22007-03-22 11:58:32 -07002080static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081{
Thomas Graf86872cb2006-08-22 00:01:08 -07002082 struct fib6_config cfg;
2083 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084
Thomas Graf86872cb2006-08-22 00:01:08 -07002085 err = rtm_to_fib6_config(skb, nlh, &cfg);
2086 if (err < 0)
2087 return err;
2088
2089 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090}
2091
Thomas Grafc127ea22007-03-22 11:58:32 -07002092static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093{
Thomas Graf86872cb2006-08-22 00:01:08 -07002094 struct fib6_config cfg;
2095 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096
Thomas Graf86872cb2006-08-22 00:01:08 -07002097 err = rtm_to_fib6_config(skb, nlh, &cfg);
2098 if (err < 0)
2099 return err;
2100
2101 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102}
2103
Thomas Graf339bf982006-11-10 14:10:15 -08002104static inline size_t rt6_nlmsg_size(void)
2105{
2106 return NLMSG_ALIGN(sizeof(struct rtmsg))
2107 + nla_total_size(16) /* RTA_SRC */
2108 + nla_total_size(16) /* RTA_DST */
2109 + nla_total_size(16) /* RTA_GATEWAY */
2110 + nla_total_size(16) /* RTA_PREFSRC */
2111 + nla_total_size(4) /* RTA_TABLE */
2112 + nla_total_size(4) /* RTA_IIF */
2113 + nla_total_size(4) /* RTA_OIF */
2114 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002115 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002116 + nla_total_size(sizeof(struct rta_cacheinfo));
2117}
2118
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002120 struct in6_addr *dst, struct in6_addr *src,
2121 int iif, int type, u32 pid, u32 seq,
2122 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123{
2124 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002125 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002126 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002127 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128
2129 if (prefix) { /* user wants prefix routes only */
2130 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2131 /* success since this is not a prefix route */
2132 return 1;
2133 }
2134 }
2135
Thomas Graf2d7202b2006-08-22 00:01:27 -07002136 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2137 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002138 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002139
2140 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002141 rtm->rtm_family = AF_INET6;
2142 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2143 rtm->rtm_src_len = rt->rt6i_src.plen;
2144 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002145 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002146 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002147 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002148 table = RT6_TABLE_UNSPEC;
2149 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002150 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 if (rt->rt6i_flags&RTF_REJECT)
2152 rtm->rtm_type = RTN_UNREACHABLE;
2153 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2154 rtm->rtm_type = RTN_LOCAL;
2155 else
2156 rtm->rtm_type = RTN_UNICAST;
2157 rtm->rtm_flags = 0;
2158 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2159 rtm->rtm_protocol = rt->rt6i_protocol;
2160 if (rt->rt6i_flags&RTF_DYNAMIC)
2161 rtm->rtm_protocol = RTPROT_REDIRECT;
2162 else if (rt->rt6i_flags & RTF_ADDRCONF)
2163 rtm->rtm_protocol = RTPROT_KERNEL;
2164 else if (rt->rt6i_flags&RTF_DEFAULT)
2165 rtm->rtm_protocol = RTPROT_RA;
2166
2167 if (rt->rt6i_flags&RTF_CACHE)
2168 rtm->rtm_flags |= RTM_F_CLONED;
2169
2170 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002171 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002172 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002174 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175#ifdef CONFIG_IPV6_SUBTREES
2176 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002177 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002178 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002180 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002181#endif
2182 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002183 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184 else if (dst) {
2185 struct in6_addr saddr_buf;
2186 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002187 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002189
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002191 goto nla_put_failure;
2192
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002194 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2195
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002197 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2198
2199 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002200
2201 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2202 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2203 expires, rt->u.dst.error) < 0)
2204 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205
Thomas Graf2d7202b2006-08-22 00:01:27 -07002206 return nlmsg_end(skb, nlh);
2207
2208nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002209 nlmsg_cancel(skb, nlh);
2210 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211}
2212
Patrick McHardy1b43af52006-08-10 23:11:17 -07002213int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214{
2215 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2216 int prefix;
2217
Thomas Graf2d7202b2006-08-22 00:01:27 -07002218 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2219 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2221 } else
2222 prefix = 0;
2223
2224 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2225 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002226 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227}
2228
Thomas Grafc127ea22007-03-22 11:58:32 -07002229static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230{
Thomas Grafab364a62006-08-22 00:01:47 -07002231 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002233 struct sk_buff *skb;
2234 struct rtmsg *rtm;
2235 struct flowi fl;
2236 int err, iif = 0;
2237
2238 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2239 if (err < 0)
2240 goto errout;
2241
2242 err = -EINVAL;
2243 memset(&fl, 0, sizeof(fl));
2244
2245 if (tb[RTA_SRC]) {
2246 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2247 goto errout;
2248
2249 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2250 }
2251
2252 if (tb[RTA_DST]) {
2253 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2254 goto errout;
2255
2256 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2257 }
2258
2259 if (tb[RTA_IIF])
2260 iif = nla_get_u32(tb[RTA_IIF]);
2261
2262 if (tb[RTA_OIF])
2263 fl.oif = nla_get_u32(tb[RTA_OIF]);
2264
2265 if (iif) {
2266 struct net_device *dev;
2267 dev = __dev_get_by_index(iif);
2268 if (!dev) {
2269 err = -ENODEV;
2270 goto errout;
2271 }
2272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273
2274 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002275 if (skb == NULL) {
2276 err = -ENOBUFS;
2277 goto errout;
2278 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279
2280 /* Reserve room for dummy headers, this skb can pass
2281 through good chunk of routing engine.
2282 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002283 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2285
Thomas Grafab364a62006-08-22 00:01:47 -07002286 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 skb->dst = &rt->u.dst;
2288
Thomas Grafab364a62006-08-22 00:01:47 -07002289 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002291 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002293 kfree_skb(skb);
2294 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002295 }
2296
Thomas Graf2942e902006-08-15 00:30:25 -07002297 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002298errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300}
2301
Thomas Graf86872cb2006-08-22 00:01:08 -07002302void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303{
2304 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002305 u32 pid = 0, seq = 0;
2306 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002307 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308
Thomas Graf86872cb2006-08-22 00:01:08 -07002309 if (info) {
2310 pid = info->pid;
2311 nlh = info->nlh;
2312 if (nlh)
2313 seq = nlh->nlmsg_seq;
2314 }
2315
Thomas Graf339bf982006-11-10 14:10:15 -08002316 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002317 if (skb == NULL)
2318 goto errout;
2319
2320 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002321 if (err < 0) {
2322 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2323 WARN_ON(err == -EMSGSIZE);
2324 kfree_skb(skb);
2325 goto errout;
2326 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002327 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2328errout:
2329 if (err < 0)
2330 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331}
2332
2333/*
2334 * /proc
2335 */
2336
2337#ifdef CONFIG_PROC_FS
2338
2339#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2340
2341struct rt6_proc_arg
2342{
2343 char *buffer;
2344 int offset;
2345 int length;
2346 int skip;
2347 int len;
2348};
2349
2350static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2351{
2352 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353
2354 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2355 arg->skip++;
2356 return 0;
2357 }
2358
2359 if (arg->len >= arg->length)
2360 return 0;
2361
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002362 arg->len += sprintf(arg->buffer + arg->len,
2363 NIP6_SEQFMT " %02x ",
2364 NIP6(rt->rt6i_dst.addr),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 rt->rt6i_dst.plen);
2366
2367#ifdef CONFIG_IPV6_SUBTREES
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002368 arg->len += sprintf(arg->buffer + arg->len,
2369 NIP6_SEQFMT " %02x ",
2370 NIP6(rt->rt6i_src.addr),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 rt->rt6i_src.plen);
2372#else
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002373 arg->len += sprintf(arg->buffer + arg->len,
2374 "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375#endif
2376
2377 if (rt->rt6i_nexthop) {
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002378 arg->len += sprintf(arg->buffer + arg->len,
2379 NIP6_SEQFMT,
2380 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 } else {
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002382 arg->len += sprintf(arg->buffer + arg->len,
2383 "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 }
2385 arg->len += sprintf(arg->buffer + arg->len,
2386 " %08x %08x %08x %08x %8s\n",
2387 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002388 rt->u.dst.__use, rt->rt6i_flags,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2390 return 0;
2391}
2392
2393static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2394{
Thomas Grafc71099a2006-08-04 23:20:06 -07002395 struct rt6_proc_arg arg = {
2396 .buffer = buffer,
2397 .offset = offset,
2398 .length = length,
2399 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400
Thomas Grafc71099a2006-08-04 23:20:06 -07002401 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402
2403 *start = buffer;
2404 if (offset)
2405 *start += offset % RT6_INFO_LEN;
2406
2407 arg.len -= offset % RT6_INFO_LEN;
2408
2409 if (arg.len > length)
2410 arg.len = length;
2411 if (arg.len < 0)
2412 arg.len = 0;
2413
2414 return arg.len;
2415}
2416
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2418{
2419 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2420 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2421 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2422 rt6_stats.fib_rt_cache,
2423 atomic_read(&ip6_dst_ops.entries),
2424 rt6_stats.fib_discarded_routes);
2425
2426 return 0;
2427}
2428
2429static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2430{
2431 return single_open(file, rt6_stats_seq_show, NULL);
2432}
2433
Arjan van de Ven9a321442007-02-12 00:55:35 -08002434static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435 .owner = THIS_MODULE,
2436 .open = rt6_stats_seq_open,
2437 .read = seq_read,
2438 .llseek = seq_lseek,
2439 .release = single_release,
2440};
2441#endif /* CONFIG_PROC_FS */
2442
2443#ifdef CONFIG_SYSCTL
2444
2445static int flush_delay;
2446
2447static
2448int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2449 void __user *buffer, size_t *lenp, loff_t *ppos)
2450{
2451 if (write) {
2452 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2453 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2454 return 0;
2455 } else
2456 return -EINVAL;
2457}
2458
2459ctl_table ipv6_route_table[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002460 {
2461 .ctl_name = NET_IPV6_ROUTE_FLUSH,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 .procname = "flush",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002463 .data = &flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002465 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002466 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 },
2468 {
2469 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2470 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002471 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472 .maxlen = sizeof(int),
2473 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002474 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 },
2476 {
2477 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2478 .procname = "max_size",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002479 .data = &ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 .maxlen = sizeof(int),
2481 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002482 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483 },
2484 {
2485 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2486 .procname = "gc_min_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002487 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 .maxlen = sizeof(int),
2489 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002490 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 .strategy = &sysctl_jiffies,
2492 },
2493 {
2494 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2495 .procname = "gc_timeout",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002496 .data = &ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497 .maxlen = sizeof(int),
2498 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002499 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500 .strategy = &sysctl_jiffies,
2501 },
2502 {
2503 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2504 .procname = "gc_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002505 .data = &ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 .maxlen = sizeof(int),
2507 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002508 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002509 .strategy = &sysctl_jiffies,
2510 },
2511 {
2512 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2513 .procname = "gc_elasticity",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002514 .data = &ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 .maxlen = sizeof(int),
2516 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002517 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518 .strategy = &sysctl_jiffies,
2519 },
2520 {
2521 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2522 .procname = "mtu_expires",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002523 .data = &ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 .maxlen = sizeof(int),
2525 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002526 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527 .strategy = &sysctl_jiffies,
2528 },
2529 {
2530 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2531 .procname = "min_adv_mss",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002532 .data = &ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 .maxlen = sizeof(int),
2534 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002535 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 .strategy = &sysctl_jiffies,
2537 },
2538 {
2539 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2540 .procname = "gc_min_interval_ms",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002541 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 .maxlen = sizeof(int),
2543 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002544 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 .strategy = &sysctl_ms_jiffies,
2546 },
2547 { .ctl_name = 0 }
2548};
2549
2550#endif
2551
2552void __init ip6_route_init(void)
2553{
YOSHIFUJI Hideaki952a10b2007-04-21 20:13:44 +09002554#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 struct proc_dir_entry *p;
YOSHIFUJI Hideaki952a10b2007-04-21 20:13:44 +09002556#endif
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002557 ip6_dst_ops.kmem_cachep =
2558 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002559 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
David S. Miller14e50e52007-05-24 18:17:54 -07002560 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2561
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 fib6_init();
2563#ifdef CONFIG_PROC_FS
2564 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2565 if (p)
2566 p->owner = THIS_MODULE;
2567
2568 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2569#endif
2570#ifdef CONFIG_XFRM
2571 xfrm6_init();
2572#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002573#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2574 fib6_rules_init();
2575#endif
Thomas Grafc127ea22007-03-22 11:58:32 -07002576
2577 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2578 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2579 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580}
2581
2582void ip6_route_cleanup(void)
2583{
Thomas Graf101367c2006-08-04 03:39:02 -07002584#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2585 fib6_rules_cleanup();
2586#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587#ifdef CONFIG_PROC_FS
2588 proc_net_remove("ipv6_route");
2589 proc_net_remove("rt6_stats");
2590#endif
2591#ifdef CONFIG_XFRM
2592 xfrm6_fini();
2593#endif
2594 rt6_ifdown(NULL);
2595 fib6_gc_cleanup();
2596 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2597}