blob: ac70e2d3b10c76bb0a1a21cc1f39127d63d56642 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020043#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070054#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070055#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080074#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2;
78static int ip6_rt_gc_timeout = 60*HZ;
79int ip6_rt_gc_interval = 30*HZ;
80static int ip6_rt_gc_elasticity = 9;
81static int ip6_rt_mtu_expires = 10*60*HZ;
82static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90static int ip6_dst_gc(void);
91
92static int ip6_pkt_discard(struct sk_buff *skb);
93static int ip6_pkt_discard_out(struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097#ifdef CONFIG_IPV6_ROUTE_INFO
98static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
99 struct in6_addr *gwaddr, int ifindex,
100 unsigned pref);
101static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex);
103#endif
104
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105static struct dst_ops ip6_dst_ops = {
106 .family = AF_INET6,
107 .protocol = __constant_htons(ETH_P_IPV6),
108 .gc = ip6_dst_gc,
109 .gc_thresh = 1024,
110 .check = ip6_dst_check,
111 .destroy = ip6_dst_destroy,
112 .ifdown = ip6_dst_ifdown,
113 .negative_advice = ip6_negative_advice,
114 .link_failure = ip6_link_failure,
115 .update_pmtu = ip6_rt_update_pmtu,
116 .entry_size = sizeof(struct rt6_info),
117};
118
David S. Miller14e50e52007-05-24 18:17:54 -0700119static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
120{
121}
122
123static struct dst_ops ip6_dst_blackhole_ops = {
124 .family = AF_INET6,
125 .protocol = __constant_htons(ETH_P_IPV6),
126 .destroy = ip6_dst_destroy,
127 .check = ip6_dst_check,
128 .update_pmtu = ip6_rt_blackhole_update_pmtu,
129 .entry_size = sizeof(struct rt6_info),
130};
131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132struct rt6_info ip6_null_entry = {
133 .u = {
134 .dst = {
135 .__refcnt = ATOMIC_INIT(1),
136 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137 .obsolete = -1,
138 .error = -ENETUNREACH,
139 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
140 .input = ip6_pkt_discard,
141 .output = ip6_pkt_discard_out,
142 .ops = &ip6_dst_ops,
143 .path = (struct dst_entry*)&ip6_null_entry,
144 }
145 },
146 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
147 .rt6i_metric = ~(u32) 0,
148 .rt6i_ref = ATOMIC_INIT(1),
149};
150
Thomas Graf101367c2006-08-04 03:39:02 -0700151#ifdef CONFIG_IPV6_MULTIPLE_TABLES
152
David S. Miller6723ab52006-10-18 21:20:57 -0700153static int ip6_pkt_prohibit(struct sk_buff *skb);
154static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700155
Thomas Graf101367c2006-08-04 03:39:02 -0700156struct rt6_info ip6_prohibit_entry = {
157 .u = {
158 .dst = {
159 .__refcnt = ATOMIC_INIT(1),
160 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700161 .obsolete = -1,
162 .error = -EACCES,
163 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700164 .input = ip6_pkt_prohibit,
165 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700166 .ops = &ip6_dst_ops,
167 .path = (struct dst_entry*)&ip6_prohibit_entry,
168 }
169 },
170 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
171 .rt6i_metric = ~(u32) 0,
172 .rt6i_ref = ATOMIC_INIT(1),
173};
174
175struct rt6_info ip6_blk_hole_entry = {
176 .u = {
177 .dst = {
178 .__refcnt = ATOMIC_INIT(1),
179 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700180 .obsolete = -1,
181 .error = -EINVAL,
182 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800183 .input = dst_discard,
184 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700185 .ops = &ip6_dst_ops,
186 .path = (struct dst_entry*)&ip6_blk_hole_entry,
187 }
188 },
189 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
190 .rt6i_metric = ~(u32) 0,
191 .rt6i_ref = ATOMIC_INIT(1),
192};
193
194#endif
195
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196/* allocate dst with ip6_dst_ops */
197static __inline__ struct rt6_info *ip6_dst_alloc(void)
198{
199 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
200}
201
202static void ip6_dst_destroy(struct dst_entry *dst)
203{
204 struct rt6_info *rt = (struct rt6_info *)dst;
205 struct inet6_dev *idev = rt->rt6i_idev;
206
207 if (idev != NULL) {
208 rt->rt6i_idev = NULL;
209 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900210 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211}
212
213static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
214 int how)
215{
216 struct rt6_info *rt = (struct rt6_info *)dst;
217 struct inet6_dev *idev = rt->rt6i_idev;
218
Eric W. Biederman2774c7a2007-09-26 22:10:56 -0700219 if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
220 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 if (loopback_idev != NULL) {
222 rt->rt6i_idev = loopback_idev;
223 in6_dev_put(idev);
224 }
225 }
226}
227
228static __inline__ int rt6_check_expired(const struct rt6_info *rt)
229{
230 return (rt->rt6i_flags & RTF_EXPIRES &&
231 time_after(jiffies, rt->rt6i_expires));
232}
233
Thomas Grafc71099a2006-08-04 23:20:06 -0700234static inline int rt6_need_strict(struct in6_addr *daddr)
235{
236 return (ipv6_addr_type(daddr) &
237 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
238}
239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700241 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 */
243
244static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
245 int oif,
246 int strict)
247{
248 struct rt6_info *local = NULL;
249 struct rt6_info *sprt;
250
251 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800252 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253 struct net_device *dev = sprt->rt6i_dev;
254 if (dev->ifindex == oif)
255 return sprt;
256 if (dev->flags & IFF_LOOPBACK) {
257 if (sprt->rt6i_idev == NULL ||
258 sprt->rt6i_idev->dev->ifindex != oif) {
259 if (strict && oif)
260 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900261 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 local->rt6i_idev->dev->ifindex == oif))
263 continue;
264 }
265 local = sprt;
266 }
267 }
268
269 if (local)
270 return local;
271
272 if (strict)
273 return &ip6_null_entry;
274 }
275 return rt;
276}
277
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800278#ifdef CONFIG_IPV6_ROUTER_PREF
279static void rt6_probe(struct rt6_info *rt)
280{
281 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
282 /*
283 * Okay, this does not seem to be appropriate
284 * for now, however, we need to check if it
285 * is really so; aka Router Reachability Probing.
286 *
287 * Router Reachability Probe MUST be rate-limited
288 * to no more than one per minute.
289 */
290 if (!neigh || (neigh->nud_state & NUD_VALID))
291 return;
292 read_lock_bh(&neigh->lock);
293 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800294 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800295 struct in6_addr mcaddr;
296 struct in6_addr *target;
297
298 neigh->updated = jiffies;
299 read_unlock_bh(&neigh->lock);
300
301 target = (struct in6_addr *)&neigh->primary_key;
302 addrconf_addr_solict_mult(target, &mcaddr);
303 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
304 } else
305 read_unlock_bh(&neigh->lock);
306}
307#else
308static inline void rt6_probe(struct rt6_info *rt)
309{
310 return;
311}
312#endif
313
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800315 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700317static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800319 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700320 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800321 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700322 if ((dev->flags & IFF_LOOPBACK) &&
323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
324 return 1;
325 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326}
327
Dave Jonesb6f99a22007-03-22 12:27:49 -0700328static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800330 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800331 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700332 if (rt->rt6i_flags & RTF_NONEXTHOP ||
333 !(rt->rt6i_flags & RTF_GATEWAY))
334 m = 1;
335 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800336 read_lock_bh(&neigh->lock);
337 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700338 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800339#ifdef CONFIG_IPV6_ROUTER_PREF
340 else if (neigh->nud_state & NUD_FAILED)
341 m = 0;
342#endif
343 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800344 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800345 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800346 } else
347 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800348 return m;
349}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800351static int rt6_score_route(struct rt6_info *rt, int oif,
352 int strict)
353{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700354 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900355
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700356 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800358 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800359#ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700362 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800364 return -1;
365 return m;
366}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
David S. Millerf11e6652007-03-24 20:36:25 -0700368static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800370{
David S. Millerf11e6652007-03-24 20:36:25 -0700371 int m;
372
373 if (rt6_check_expired(rt))
374 goto out;
375
376 m = rt6_score_route(rt, oif, strict);
377 if (m < 0)
378 goto out;
379
380 if (m > *mpri) {
381 if (strict & RT6_LOOKUP_F_REACHABLE)
382 rt6_probe(match);
383 *mpri = m;
384 match = rt;
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 rt6_probe(rt);
387 }
388
389out:
390 return match;
391}
392
393static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
396{
397 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800398 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
David S. Millerf11e6652007-03-24 20:36:25 -0700400 match = NULL;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
402 rt = rt->u.dst.rt6_next)
403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405 rt = rt->u.dst.rt6_next)
406 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800407
David S. Millerf11e6652007-03-24 20:36:25 -0700408 return match;
409}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800410
David S. Millerf11e6652007-03-24 20:36:25 -0700411static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412{
413 struct rt6_info *match, *rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414
David S. Millerf11e6652007-03-24 20:36:25 -0700415 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
David S. Millerf11e6652007-03-24 20:36:25 -0700418 rt0 = fn->rr_ptr;
419 if (!rt0)
420 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
David S. Millerf11e6652007-03-24 20:36:25 -0700422 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800424 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700425 (strict & RT6_LOOKUP_F_REACHABLE)) {
426 struct rt6_info *next = rt0->u.dst.rt6_next;
427
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800428 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700429 if (!next || next->rt6i_metric != rt0->rt6i_metric)
430 next = fn->leaf;
431
432 if (next != rt0)
433 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 }
435
David S. Millerf11e6652007-03-24 20:36:25 -0700436 RT6_TRACE("%s() => %p\n",
437 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800439 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440}
441
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800442#ifdef CONFIG_IPV6_ROUTE_INFO
443int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
444 struct in6_addr *gwaddr)
445{
446 struct route_info *rinfo = (struct route_info *) opt;
447 struct in6_addr prefix_buf, *prefix;
448 unsigned int pref;
449 u32 lifetime;
450 struct rt6_info *rt;
451
452 if (len < sizeof(struct route_info)) {
453 return -EINVAL;
454 }
455
456 /* Sanity check for prefix_len and length */
457 if (rinfo->length > 3) {
458 return -EINVAL;
459 } else if (rinfo->prefix_len > 128) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 64) {
462 if (rinfo->length < 2) {
463 return -EINVAL;
464 }
465 } else if (rinfo->prefix_len > 0) {
466 if (rinfo->length < 1) {
467 return -EINVAL;
468 }
469 }
470
471 pref = rinfo->route_pref;
472 if (pref == ICMPV6_ROUTER_PREF_INVALID)
473 pref = ICMPV6_ROUTER_PREF_MEDIUM;
474
Al Viroe69a4ad2006-11-14 20:56:00 -0800475 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800476 if (lifetime == 0xffffffff) {
477 /* infinity */
478 } else if (lifetime > 0x7fffffff/HZ) {
479 /* Avoid arithmetic overflow */
480 lifetime = 0x7fffffff/HZ - 1;
481 }
482
483 if (rinfo->length == 3)
484 prefix = (struct in6_addr *)rinfo->prefix;
485 else {
486 /* this function is safe */
487 ipv6_addr_prefix(&prefix_buf,
488 (struct in6_addr *)rinfo->prefix,
489 rinfo->prefix_len);
490 prefix = &prefix_buf;
491 }
492
493 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
494
495 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700496 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800497 rt = NULL;
498 }
499
500 if (!rt && lifetime)
501 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 pref);
503 else if (rt)
504 rt->rt6i_flags = RTF_ROUTEINFO |
505 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506
507 if (rt) {
508 if (lifetime == 0xffffffff) {
509 rt->rt6i_flags &= ~RTF_EXPIRES;
510 } else {
511 rt->rt6i_expires = jiffies + HZ * lifetime;
512 rt->rt6i_flags |= RTF_EXPIRES;
513 }
514 dst_release(&rt->u.dst);
515 }
516 return 0;
517}
518#endif
519
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700520#define BACKTRACK(saddr) \
521do { \
522 if (rt == &ip6_null_entry) { \
523 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700524 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700525 if (fn->fn_flags & RTN_TL_ROOT) \
526 goto out; \
527 pn = fn->parent; \
528 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800529 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700530 else \
531 fn = pn; \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700534 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700535 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700536} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700537
538static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
539 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540{
541 struct fib6_node *fn;
542 struct rt6_info *rt;
543
Thomas Grafc71099a2006-08-04 23:20:06 -0700544 read_lock_bh(&table->tb6_lock);
545 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
546restart:
547 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700548 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700549 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700550out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800551 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700552 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700553 return rt;
554
555}
556
557struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
558 int oif, int strict)
559{
560 struct flowi fl = {
561 .oif = oif,
562 .nl_u = {
563 .ip6_u = {
564 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700565 },
566 },
567 };
568 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700569 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700570
Thomas Grafadaa70b2006-10-13 15:01:03 -0700571 if (saddr) {
572 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
573 flags |= RT6_LOOKUP_F_HAS_SADDR;
574 }
575
Thomas Grafc71099a2006-08-04 23:20:06 -0700576 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
577 if (dst->error == 0)
578 return (struct rt6_info *) dst;
579
580 dst_release(dst);
581
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 return NULL;
583}
584
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900585EXPORT_SYMBOL(rt6_lookup);
586
Thomas Grafc71099a2006-08-04 23:20:06 -0700587/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 It takes new route entry, the addition fails by any reason the
589 route is freed. In any case, if caller does not hold it, it may
590 be destroyed.
591 */
592
Thomas Graf86872cb2006-08-22 00:01:08 -0700593static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594{
595 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700596 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
Thomas Grafc71099a2006-08-04 23:20:06 -0700598 table = rt->rt6i_table;
599 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700600 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700601 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602
603 return err;
604}
605
Thomas Graf40e22e82006-08-22 00:00:45 -0700606int ip6_ins_rt(struct rt6_info *rt)
607{
Thomas Graf86872cb2006-08-22 00:01:08 -0700608 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700609}
610
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800611static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
612 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 struct rt6_info *rt;
615
616 /*
617 * Clone the route.
618 */
619
620 rt = ip6_rt_copy(ort);
621
622 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900623 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
624 if (rt->rt6i_dst.plen != 128 &&
625 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
626 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900628 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900630 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 rt->rt6i_dst.plen = 128;
632 rt->rt6i_flags |= RTF_CACHE;
633 rt->u.dst.flags |= DST_HOST;
634
635#ifdef CONFIG_IPV6_SUBTREES
636 if (rt->rt6i_src.plen && saddr) {
637 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
638 rt->rt6i_src.plen = 128;
639 }
640#endif
641
642 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
643
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800644 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800646 return rt;
647}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800649static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
650{
651 struct rt6_info *rt = ip6_rt_copy(ort);
652 if (rt) {
653 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
654 rt->rt6i_dst.plen = 128;
655 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800656 rt->u.dst.flags |= DST_HOST;
657 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
658 }
659 return rt;
660}
661
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700662static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700663 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664{
665 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800666 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700667 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800669 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800670 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700672 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673
674relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700675 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800677restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700678 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
680restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700681 rt = rt6_select(fn, oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700682 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800683 if (rt == &ip6_null_entry ||
684 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800685 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800687 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700688 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800689
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800690 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800691 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800692 else {
693#if CLONE_OFFLINK_ROUTE
694 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
695#else
696 goto out2;
697#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800699
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800700 dst_release(&rt->u.dst);
701 rt = nrt ? : &ip6_null_entry;
702
703 dst_hold(&rt->u.dst);
704 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700705 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800706 if (!err)
707 goto out2;
708 }
709
710 if (--attempts <= 0)
711 goto out2;
712
713 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700714 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800715 * released someone could insert this route. Relookup.
716 */
717 dst_release(&rt->u.dst);
718 goto relookup;
719
720out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800721 if (reachable) {
722 reachable = 0;
723 goto restart_2;
724 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800725 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700726 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727out2:
728 rt->u.dst.lastuse = jiffies;
729 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700730
731 return rt;
732}
733
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700734static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
735 struct flowi *fl, int flags)
736{
737 return ip6_pol_route(table, fl->iif, fl, flags);
738}
739
Thomas Grafc71099a2006-08-04 23:20:06 -0700740void ip6_route_input(struct sk_buff *skb)
741{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700742 struct ipv6hdr *iph = ipv6_hdr(skb);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700743 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700744 struct flowi fl = {
745 .iif = skb->dev->ifindex,
746 .nl_u = {
747 .ip6_u = {
748 .daddr = iph->daddr,
749 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800750 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700751 },
752 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900753 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700754 .proto = iph->nexthdr,
755 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700756
757 if (rt6_need_strict(&iph->daddr))
758 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700759
760 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
761}
762
763static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
764 struct flowi *fl, int flags)
765{
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700766 return ip6_pol_route(table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700767}
768
769struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
770{
771 int flags = 0;
772
773 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700774 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700775
Thomas Grafadaa70b2006-10-13 15:01:03 -0700776 if (!ipv6_addr_any(&fl->fl6_src))
777 flags |= RT6_LOOKUP_F_HAS_SADDR;
778
Thomas Grafc71099a2006-08-04 23:20:06 -0700779 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780}
781
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900782EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783
David S. Miller14e50e52007-05-24 18:17:54 -0700784int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
785{
786 struct rt6_info *ort = (struct rt6_info *) *dstp;
787 struct rt6_info *rt = (struct rt6_info *)
788 dst_alloc(&ip6_dst_blackhole_ops);
789 struct dst_entry *new = NULL;
790
791 if (rt) {
792 new = &rt->u.dst;
793
794 atomic_set(&new->__refcnt, 1);
795 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800796 new->input = dst_discard;
797 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700798
799 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
800 new->dev = ort->u.dst.dev;
801 if (new->dev)
802 dev_hold(new->dev);
803 rt->rt6i_idev = ort->rt6i_idev;
804 if (rt->rt6i_idev)
805 in6_dev_hold(rt->rt6i_idev);
806 rt->rt6i_expires = 0;
807
808 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
809 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
810 rt->rt6i_metric = 0;
811
812 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
813#ifdef CONFIG_IPV6_SUBTREES
814 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
815#endif
816
817 dst_free(new);
818 }
819
820 dst_release(*dstp);
821 *dstp = new;
822 return (new ? 0 : -ENOMEM);
823}
824EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
825
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826/*
827 * Destination cache support functions
828 */
829
830static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
831{
832 struct rt6_info *rt;
833
834 rt = (struct rt6_info *) dst;
835
836 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
837 return dst;
838
839 return NULL;
840}
841
842static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
843{
844 struct rt6_info *rt = (struct rt6_info *) dst;
845
846 if (rt) {
847 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700848 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 else
850 dst_release(dst);
851 }
852 return NULL;
853}
854
855static void ip6_link_failure(struct sk_buff *skb)
856{
857 struct rt6_info *rt;
858
859 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
860
861 rt = (struct rt6_info *) skb->dst;
862 if (rt) {
863 if (rt->rt6i_flags&RTF_CACHE) {
864 dst_set_expires(&rt->u.dst, 0);
865 rt->rt6i_flags |= RTF_EXPIRES;
866 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
867 rt->rt6i_node->fn_sernum = -1;
868 }
869}
870
871static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
872{
873 struct rt6_info *rt6 = (struct rt6_info*)dst;
874
875 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
876 rt6->rt6i_flags |= RTF_MODIFIED;
877 if (mtu < IPV6_MIN_MTU) {
878 mtu = IPV6_MIN_MTU;
879 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
880 }
881 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700882 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 }
884}
885
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886static int ipv6_get_mtu(struct net_device *dev);
887
888static inline unsigned int ipv6_advmss(unsigned int mtu)
889{
890 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
891
892 if (mtu < ip6_rt_min_advmss)
893 mtu = ip6_rt_min_advmss;
894
895 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900896 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
897 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
898 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 * rely only on pmtu discovery"
900 */
901 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
902 mtu = IPV6_MAXPLEN;
903 return mtu;
904}
905
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700906static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700907static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700908
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900909struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 struct neighbour *neigh,
911 struct in6_addr *addr,
912 int (*output)(struct sk_buff *))
913{
914 struct rt6_info *rt;
915 struct inet6_dev *idev = in6_dev_get(dev);
916
917 if (unlikely(idev == NULL))
918 return NULL;
919
920 rt = ip6_dst_alloc();
921 if (unlikely(rt == NULL)) {
922 in6_dev_put(idev);
923 goto out;
924 }
925
926 dev_hold(dev);
927 if (neigh)
928 neigh_hold(neigh);
929 else
930 neigh = ndisc_get_neigh(dev, addr);
931
932 rt->rt6i_dev = dev;
933 rt->rt6i_idev = idev;
934 rt->rt6i_nexthop = neigh;
935 atomic_set(&rt->u.dst.__refcnt, 1);
936 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
937 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
938 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
939 rt->u.dst.output = output;
940
941#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900942 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
943 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944 : 0;
945 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
946 rt->rt6i_dst.plen = 128;
947#endif
948
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700949 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 rt->u.dst.next = ndisc_dst_gc_list;
951 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700952 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953
954 fib6_force_start_gc();
955
956out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900957 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958}
959
960int ndisc_dst_gc(int *more)
961{
962 struct dst_entry *dst, *next, **pprev;
963 int freed;
964
965 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900966 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700967
968 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 while ((dst = *pprev) != NULL) {
972 if (!atomic_read(&dst->__refcnt)) {
973 *pprev = dst->next;
974 dst_free(dst);
975 freed++;
976 } else {
977 pprev = &dst->next;
978 (*more)++;
979 }
980 }
981
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700982 spin_unlock_bh(&ndisc_lock);
983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 return freed;
985}
986
987static int ip6_dst_gc(void)
988{
989 static unsigned expire = 30*HZ;
990 static unsigned long last_gc;
991 unsigned long now = jiffies;
992
993 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
994 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
995 goto out;
996
997 expire++;
998 fib6_run_gc(expire);
999 last_gc = now;
1000 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1001 expire = ip6_rt_gc_timeout>>1;
1002
1003out:
1004 expire -= expire>>ip6_rt_gc_elasticity;
1005 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1006}
1007
1008/* Clean host part of a prefix. Not necessary in radix tree,
1009 but results in cleaner routing tables.
1010
1011 Remove it only when all the things will work!
1012 */
1013
1014static int ipv6_get_mtu(struct net_device *dev)
1015{
1016 int mtu = IPV6_MIN_MTU;
1017 struct inet6_dev *idev;
1018
1019 idev = in6_dev_get(dev);
1020 if (idev) {
1021 mtu = idev->cnf.mtu6;
1022 in6_dev_put(idev);
1023 }
1024 return mtu;
1025}
1026
1027int ipv6_get_hoplimit(struct net_device *dev)
1028{
1029 int hoplimit = ipv6_devconf.hop_limit;
1030 struct inet6_dev *idev;
1031
1032 idev = in6_dev_get(dev);
1033 if (idev) {
1034 hoplimit = idev->cnf.hop_limit;
1035 in6_dev_put(idev);
1036 }
1037 return hoplimit;
1038}
1039
1040/*
1041 *
1042 */
1043
Thomas Graf86872cb2006-08-22 00:01:08 -07001044int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045{
1046 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 struct rt6_info *rt = NULL;
1048 struct net_device *dev = NULL;
1049 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001050 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051 int addr_type;
1052
Thomas Graf86872cb2006-08-22 00:01:08 -07001053 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 return -EINVAL;
1055#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001056 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 return -EINVAL;
1058#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001059 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 err = -ENODEV;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001061 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 if (!dev)
1063 goto out;
1064 idev = in6_dev_get(dev);
1065 if (!idev)
1066 goto out;
1067 }
1068
Thomas Graf86872cb2006-08-22 00:01:08 -07001069 if (cfg->fc_metric == 0)
1070 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071
Thomas Graf86872cb2006-08-22 00:01:08 -07001072 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001073 if (table == NULL) {
1074 err = -ENOBUFS;
1075 goto out;
1076 }
1077
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 rt = ip6_dst_alloc();
1079
1080 if (rt == NULL) {
1081 err = -ENOMEM;
1082 goto out;
1083 }
1084
1085 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001086 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087
Thomas Graf86872cb2006-08-22 00:01:08 -07001088 if (cfg->fc_protocol == RTPROT_UNSPEC)
1089 cfg->fc_protocol = RTPROT_BOOT;
1090 rt->rt6i_protocol = cfg->fc_protocol;
1091
1092 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
1094 if (addr_type & IPV6_ADDR_MULTICAST)
1095 rt->u.dst.input = ip6_mc_input;
1096 else
1097 rt->u.dst.input = ip6_forward;
1098
1099 rt->u.dst.output = ip6_output;
1100
Thomas Graf86872cb2006-08-22 00:01:08 -07001101 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1102 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 if (rt->rt6i_dst.plen == 128)
1104 rt->u.dst.flags = DST_HOST;
1105
1106#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001107 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1108 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109#endif
1110
Thomas Graf86872cb2006-08-22 00:01:08 -07001111 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112
1113 /* We cannot add true routes via loopback here,
1114 they would result in kernel looping; promote them to reject routes
1115 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001116 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1118 /* hold loopback dev/idev if we haven't done so. */
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001119 if (dev != init_net.loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 if (dev) {
1121 dev_put(dev);
1122 in6_dev_put(idev);
1123 }
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001124 dev = init_net.loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 dev_hold(dev);
1126 idev = in6_dev_get(dev);
1127 if (!idev) {
1128 err = -ENODEV;
1129 goto out;
1130 }
1131 }
1132 rt->u.dst.output = ip6_pkt_discard_out;
1133 rt->u.dst.input = ip6_pkt_discard;
1134 rt->u.dst.error = -ENETUNREACH;
1135 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1136 goto install_route;
1137 }
1138
Thomas Graf86872cb2006-08-22 00:01:08 -07001139 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 struct in6_addr *gw_addr;
1141 int gwa_type;
1142
Thomas Graf86872cb2006-08-22 00:01:08 -07001143 gw_addr = &cfg->fc_gateway;
1144 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 gwa_type = ipv6_addr_type(gw_addr);
1146
1147 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1148 struct rt6_info *grt;
1149
1150 /* IPv6 strictly inhibits using not link-local
1151 addresses as nexthop address.
1152 Otherwise, router will not able to send redirects.
1153 It is very good, but in some (rare!) circumstances
1154 (SIT, PtP, NBMA NOARP links) it is handy to allow
1155 some exceptions. --ANK
1156 */
1157 err = -EINVAL;
1158 if (!(gwa_type&IPV6_ADDR_UNICAST))
1159 goto out;
1160
Thomas Graf86872cb2006-08-22 00:01:08 -07001161 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
1163 err = -EHOSTUNREACH;
1164 if (grt == NULL)
1165 goto out;
1166 if (dev) {
1167 if (dev != grt->rt6i_dev) {
1168 dst_release(&grt->u.dst);
1169 goto out;
1170 }
1171 } else {
1172 dev = grt->rt6i_dev;
1173 idev = grt->rt6i_idev;
1174 dev_hold(dev);
1175 in6_dev_hold(grt->rt6i_idev);
1176 }
1177 if (!(grt->rt6i_flags&RTF_GATEWAY))
1178 err = 0;
1179 dst_release(&grt->u.dst);
1180
1181 if (err)
1182 goto out;
1183 }
1184 err = -EINVAL;
1185 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1186 goto out;
1187 }
1188
1189 err = -ENODEV;
1190 if (dev == NULL)
1191 goto out;
1192
Thomas Graf86872cb2006-08-22 00:01:08 -07001193 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1195 if (IS_ERR(rt->rt6i_nexthop)) {
1196 err = PTR_ERR(rt->rt6i_nexthop);
1197 rt->rt6i_nexthop = NULL;
1198 goto out;
1199 }
1200 }
1201
Thomas Graf86872cb2006-08-22 00:01:08 -07001202 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
1204install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001205 if (cfg->fc_mx) {
1206 struct nlattr *nla;
1207 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208
Thomas Graf86872cb2006-08-22 00:01:08 -07001209 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001210 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001211
1212 if (type) {
1213 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 err = -EINVAL;
1215 goto out;
1216 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001217
1218 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 }
1221 }
1222
1223 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1224 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1225 if (!rt->u.dst.metrics[RTAX_MTU-1])
1226 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1227 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1228 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1229 rt->u.dst.dev = dev;
1230 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001231 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001232 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
1234out:
1235 if (dev)
1236 dev_put(dev);
1237 if (idev)
1238 in6_dev_put(idev);
1239 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001240 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 return err;
1242}
1243
Thomas Graf86872cb2006-08-22 00:01:08 -07001244static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245{
1246 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001247 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248
Patrick McHardy6c813a72006-08-06 22:22:47 -07001249 if (rt == &ip6_null_entry)
1250 return -ENOENT;
1251
Thomas Grafc71099a2006-08-04 23:20:06 -07001252 table = rt->rt6i_table;
1253 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
Thomas Graf86872cb2006-08-22 00:01:08 -07001255 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 dst_release(&rt->u.dst);
1257
Thomas Grafc71099a2006-08-04 23:20:06 -07001258 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259
1260 return err;
1261}
1262
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001263int ip6_del_rt(struct rt6_info *rt)
1264{
Thomas Graf86872cb2006-08-22 00:01:08 -07001265 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001266}
1267
Thomas Graf86872cb2006-08-22 00:01:08 -07001268static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269{
Thomas Grafc71099a2006-08-04 23:20:06 -07001270 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271 struct fib6_node *fn;
1272 struct rt6_info *rt;
1273 int err = -ESRCH;
1274
Thomas Graf86872cb2006-08-22 00:01:08 -07001275 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001276 if (table == NULL)
1277 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278
Thomas Grafc71099a2006-08-04 23:20:06 -07001279 read_lock_bh(&table->tb6_lock);
1280
1281 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001282 &cfg->fc_dst, cfg->fc_dst_len,
1283 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001284
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001286 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001287 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001289 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001291 if (cfg->fc_flags & RTF_GATEWAY &&
1292 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001294 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 continue;
1296 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001297 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298
Thomas Graf86872cb2006-08-22 00:01:08 -07001299 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 }
1301 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001302 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
1304 return err;
1305}
1306
1307/*
1308 * Handle redirects
1309 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001310struct ip6rd_flowi {
1311 struct flowi fl;
1312 struct in6_addr gateway;
1313};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001315static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1316 struct flowi *fl,
1317 int flags)
1318{
1319 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1320 struct rt6_info *rt;
1321 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001322
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001324 * Get the "current" route for this destination and
1325 * check if the redirect has come from approriate router.
1326 *
1327 * RFC 2461 specifies that redirects should only be
1328 * accepted if they come from the nexthop to the target.
1329 * Due to the way the routes are chosen, this notion
1330 * is a bit fuzzy and one might need to check all possible
1331 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
Thomas Grafc71099a2006-08-04 23:20:06 -07001334 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001335 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001336restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001337 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001338 /*
1339 * Current route is on-link; redirect is always invalid.
1340 *
1341 * Seems, previous statement is not true. It could
1342 * be node, which looks for us as on-link (f.e. proxy ndisc)
1343 * But then router serving it might decide, that we should
1344 * know truth 8)8) --ANK (980726).
1345 */
1346 if (rt6_check_expired(rt))
1347 continue;
1348 if (!(rt->rt6i_flags & RTF_GATEWAY))
1349 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001350 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001351 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001352 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001353 continue;
1354 break;
1355 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001356
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001357 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001358 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001359 BACKTRACK(&fl->fl6_src);
1360out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001361 dst_hold(&rt->u.dst);
1362
1363 read_unlock_bh(&table->tb6_lock);
1364
1365 return rt;
1366};
1367
1368static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1369 struct in6_addr *src,
1370 struct in6_addr *gateway,
1371 struct net_device *dev)
1372{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001373 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001374 struct ip6rd_flowi rdfl = {
1375 .fl = {
1376 .oif = dev->ifindex,
1377 .nl_u = {
1378 .ip6_u = {
1379 .daddr = *dest,
1380 .saddr = *src,
1381 },
1382 },
1383 },
1384 .gateway = *gateway,
1385 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001386
1387 if (rt6_need_strict(dest))
1388 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001389
1390 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1391}
1392
1393void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1394 struct in6_addr *saddr,
1395 struct neighbour *neigh, u8 *lladdr, int on_link)
1396{
1397 struct rt6_info *rt, *nrt = NULL;
1398 struct netevent_redirect netevent;
1399
1400 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1401
1402 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 if (net_ratelimit())
1404 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1405 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001406 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 }
1408
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 /*
1410 * We have finally decided to accept it.
1411 */
1412
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001413 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1415 NEIGH_UPDATE_F_OVERRIDE|
1416 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1417 NEIGH_UPDATE_F_ISROUTER))
1418 );
1419
1420 /*
1421 * Redirect received -> path was valid.
1422 * Look, redirects are sent only in response to data packets,
1423 * so that this nexthop apparently is reachable. --ANK
1424 */
1425 dst_confirm(&rt->u.dst);
1426
1427 /* Duplicate redirect: silently ignore. */
1428 if (neigh == rt->u.dst.neighbour)
1429 goto out;
1430
1431 nrt = ip6_rt_copy(rt);
1432 if (nrt == NULL)
1433 goto out;
1434
1435 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1436 if (on_link)
1437 nrt->rt6i_flags &= ~RTF_GATEWAY;
1438
1439 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1440 nrt->rt6i_dst.plen = 128;
1441 nrt->u.dst.flags |= DST_HOST;
1442
1443 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1444 nrt->rt6i_nexthop = neigh_clone(neigh);
1445 /* Reset pmtu, it may be better */
1446 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1447 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1448
Thomas Graf40e22e82006-08-22 00:00:45 -07001449 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 goto out;
1451
Tom Tucker8d717402006-07-30 20:43:36 -07001452 netevent.old = &rt->u.dst;
1453 netevent.new = &nrt->u.dst;
1454 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1455
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001457 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 return;
1459 }
1460
1461out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001462 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 return;
1464}
1465
1466/*
1467 * Handle ICMP "packet too big" messages
1468 * i.e. Path MTU discovery
1469 */
1470
1471void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1472 struct net_device *dev, u32 pmtu)
1473{
1474 struct rt6_info *rt, *nrt;
1475 int allfrag = 0;
1476
1477 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1478 if (rt == NULL)
1479 return;
1480
1481 if (pmtu >= dst_mtu(&rt->u.dst))
1482 goto out;
1483
1484 if (pmtu < IPV6_MIN_MTU) {
1485 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001486 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 * MTU (1280) and a fragment header should always be included
1488 * after a node receiving Too Big message reporting PMTU is
1489 * less than the IPv6 Minimum Link MTU.
1490 */
1491 pmtu = IPV6_MIN_MTU;
1492 allfrag = 1;
1493 }
1494
1495 /* New mtu received -> path was valid.
1496 They are sent only in response to data packets,
1497 so that this nexthop apparently is reachable. --ANK
1498 */
1499 dst_confirm(&rt->u.dst);
1500
1501 /* Host route. If it is static, it would be better
1502 not to override it, but add new one, so that
1503 when cache entry will expire old pmtu
1504 would return automatically.
1505 */
1506 if (rt->rt6i_flags & RTF_CACHE) {
1507 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1508 if (allfrag)
1509 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1510 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1511 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1512 goto out;
1513 }
1514
1515 /* Network route.
1516 Two cases are possible:
1517 1. It is connected route. Action: COW
1518 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1519 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001520 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001521 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001522 else
1523 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001524
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001525 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001526 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1527 if (allfrag)
1528 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1529
1530 /* According to RFC 1981, detecting PMTU increase shouldn't be
1531 * happened within 5 mins, the recommended timer is 10 mins.
1532 * Here this route expiration time is set to ip6_rt_mtu_expires
1533 * which is 10 mins. After 10 mins the decreased pmtu is expired
1534 * and detecting PMTU increase will be automatically happened.
1535 */
1536 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1537 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1538
Thomas Graf40e22e82006-08-22 00:00:45 -07001539 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541out:
1542 dst_release(&rt->u.dst);
1543}
1544
1545/*
1546 * Misc support functions
1547 */
1548
1549static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1550{
1551 struct rt6_info *rt = ip6_dst_alloc();
1552
1553 if (rt) {
1554 rt->u.dst.input = ort->u.dst.input;
1555 rt->u.dst.output = ort->u.dst.output;
1556
1557 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001558 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559 rt->u.dst.dev = ort->u.dst.dev;
1560 if (rt->u.dst.dev)
1561 dev_hold(rt->u.dst.dev);
1562 rt->rt6i_idev = ort->rt6i_idev;
1563 if (rt->rt6i_idev)
1564 in6_dev_hold(rt->rt6i_idev);
1565 rt->u.dst.lastuse = jiffies;
1566 rt->rt6i_expires = 0;
1567
1568 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1569 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1570 rt->rt6i_metric = 0;
1571
1572 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1573#ifdef CONFIG_IPV6_SUBTREES
1574 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1575#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001576 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577 }
1578 return rt;
1579}
1580
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001581#ifdef CONFIG_IPV6_ROUTE_INFO
1582static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1583 struct in6_addr *gwaddr, int ifindex)
1584{
1585 struct fib6_node *fn;
1586 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001587 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001588
Thomas Grafc71099a2006-08-04 23:20:06 -07001589 table = fib6_get_table(RT6_TABLE_INFO);
1590 if (table == NULL)
1591 return NULL;
1592
1593 write_lock_bh(&table->tb6_lock);
1594 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001595 if (!fn)
1596 goto out;
1597
Eric Dumazet7cc48262007-02-09 16:22:57 -08001598 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001599 if (rt->rt6i_dev->ifindex != ifindex)
1600 continue;
1601 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1602 continue;
1603 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1604 continue;
1605 dst_hold(&rt->u.dst);
1606 break;
1607 }
1608out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001609 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001610 return rt;
1611}
1612
1613static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1614 struct in6_addr *gwaddr, int ifindex,
1615 unsigned pref)
1616{
Thomas Graf86872cb2006-08-22 00:01:08 -07001617 struct fib6_config cfg = {
1618 .fc_table = RT6_TABLE_INFO,
1619 .fc_metric = 1024,
1620 .fc_ifindex = ifindex,
1621 .fc_dst_len = prefixlen,
1622 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1623 RTF_UP | RTF_PREF(pref),
1624 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001625
Thomas Graf86872cb2006-08-22 00:01:08 -07001626 ipv6_addr_copy(&cfg.fc_dst, prefix);
1627 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1628
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001629 /* We should treat it as a default route if prefix length is 0. */
1630 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001631 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001632
Thomas Graf86872cb2006-08-22 00:01:08 -07001633 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001634
1635 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1636}
1637#endif
1638
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001640{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001642 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643
Thomas Grafc71099a2006-08-04 23:20:06 -07001644 table = fib6_get_table(RT6_TABLE_DFLT);
1645 if (table == NULL)
1646 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647
Thomas Grafc71099a2006-08-04 23:20:06 -07001648 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001649 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001651 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1653 break;
1654 }
1655 if (rt)
1656 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001657 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 return rt;
1659}
1660
1661struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001662 struct net_device *dev,
1663 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664{
Thomas Graf86872cb2006-08-22 00:01:08 -07001665 struct fib6_config cfg = {
1666 .fc_table = RT6_TABLE_DFLT,
1667 .fc_metric = 1024,
1668 .fc_ifindex = dev->ifindex,
1669 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1670 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1671 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672
Thomas Graf86872cb2006-08-22 00:01:08 -07001673 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674
Thomas Graf86872cb2006-08-22 00:01:08 -07001675 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 return rt6_get_dflt_router(gwaddr, dev);
1678}
1679
1680void rt6_purge_dflt_routers(void)
1681{
1682 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001683 struct fib6_table *table;
1684
1685 /* NOTE: Keep consistent with rt6_get_dflt_router */
1686 table = fib6_get_table(RT6_TABLE_DFLT);
1687 if (table == NULL)
1688 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689
1690restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001691 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001692 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1694 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001695 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001696 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 goto restart;
1698 }
1699 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001700 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701}
1702
Thomas Graf86872cb2006-08-22 00:01:08 -07001703static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1704 struct fib6_config *cfg)
1705{
1706 memset(cfg, 0, sizeof(*cfg));
1707
1708 cfg->fc_table = RT6_TABLE_MAIN;
1709 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1710 cfg->fc_metric = rtmsg->rtmsg_metric;
1711 cfg->fc_expires = rtmsg->rtmsg_info;
1712 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1713 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1714 cfg->fc_flags = rtmsg->rtmsg_flags;
1715
1716 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1717 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1718 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1719}
1720
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1722{
Thomas Graf86872cb2006-08-22 00:01:08 -07001723 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 struct in6_rtmsg rtmsg;
1725 int err;
1726
1727 switch(cmd) {
1728 case SIOCADDRT: /* Add a route */
1729 case SIOCDELRT: /* Delete a route */
1730 if (!capable(CAP_NET_ADMIN))
1731 return -EPERM;
1732 err = copy_from_user(&rtmsg, arg,
1733 sizeof(struct in6_rtmsg));
1734 if (err)
1735 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001736
1737 rtmsg_to_fib6_config(&rtmsg, &cfg);
1738
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 rtnl_lock();
1740 switch (cmd) {
1741 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001742 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 break;
1744 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001745 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 break;
1747 default:
1748 err = -EINVAL;
1749 }
1750 rtnl_unlock();
1751
1752 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001753 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754
1755 return -EINVAL;
1756}
1757
1758/*
1759 * Drop the packet on the floor
1760 */
1761
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001762static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1763 int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001765 int type;
1766 switch (ipstats_mib_noroutes) {
1767 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001768 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001769 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1770 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1771 break;
1772 }
1773 /* FALLTHROUGH */
1774 case IPSTATS_MIB_OUTNOROUTES:
1775 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1776 break;
1777 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001778 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 kfree_skb(skb);
1780 return 0;
1781}
1782
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001783static int ip6_pkt_discard(struct sk_buff *skb)
1784{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001785 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001786}
1787
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001788static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789{
1790 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001791 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792}
1793
David S. Miller6723ab52006-10-18 21:20:57 -07001794#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1795
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001796static int ip6_pkt_prohibit(struct sk_buff *skb)
1797{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001798 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001799}
1800
1801static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1802{
1803 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001804 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001805}
1806
David S. Miller6723ab52006-10-18 21:20:57 -07001807#endif
1808
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809/*
1810 * Allocate a dst for local (unicast / anycast) address.
1811 */
1812
1813struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1814 const struct in6_addr *addr,
1815 int anycast)
1816{
1817 struct rt6_info *rt = ip6_dst_alloc();
1818
1819 if (rt == NULL)
1820 return ERR_PTR(-ENOMEM);
1821
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001822 dev_hold(init_net.loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 in6_dev_hold(idev);
1824
1825 rt->u.dst.flags = DST_HOST;
1826 rt->u.dst.input = ip6_input;
1827 rt->u.dst.output = ip6_output;
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001828 rt->rt6i_dev = init_net.loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 rt->rt6i_idev = idev;
1830 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1831 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1832 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1833 rt->u.dst.obsolete = -1;
1834
1835 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001836 if (anycast)
1837 rt->rt6i_flags |= RTF_ANYCAST;
1838 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839 rt->rt6i_flags |= RTF_LOCAL;
1840 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1841 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001842 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843 return ERR_PTR(-ENOMEM);
1844 }
1845
1846 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1847 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001848 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849
1850 atomic_set(&rt->u.dst.__refcnt, 1);
1851
1852 return rt;
1853}
1854
1855static int fib6_ifdown(struct rt6_info *rt, void *arg)
1856{
1857 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1858 rt != &ip6_null_entry) {
1859 RT6_TRACE("deleted by ifdown %p\n", rt);
1860 return -1;
1861 }
1862 return 0;
1863}
1864
1865void rt6_ifdown(struct net_device *dev)
1866{
Thomas Grafc71099a2006-08-04 23:20:06 -07001867 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868}
1869
1870struct rt6_mtu_change_arg
1871{
1872 struct net_device *dev;
1873 unsigned mtu;
1874};
1875
1876static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1877{
1878 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1879 struct inet6_dev *idev;
1880
1881 /* In IPv6 pmtu discovery is not optional,
1882 so that RTAX_MTU lock cannot disable it.
1883 We still use this lock to block changes
1884 caused by addrconf/ndisc.
1885 */
1886
1887 idev = __in6_dev_get(arg->dev);
1888 if (idev == NULL)
1889 return 0;
1890
1891 /* For administrative MTU increase, there is no way to discover
1892 IPv6 PMTU increase, so PMTU increase should be updated here.
1893 Since RFC 1981 doesn't include administrative MTU increase
1894 update PMTU increase is a MUST. (i.e. jumbo frame)
1895 */
1896 /*
1897 If new MTU is less than route PMTU, this new MTU will be the
1898 lowest MTU in the path, update the route PMTU to reflect PMTU
1899 decreases; if new MTU is greater than route PMTU, and the
1900 old MTU is the lowest MTU in the path, update the route PMTU
1901 to reflect the increase. In this case if the other nodes' MTU
1902 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1903 PMTU discouvery.
1904 */
1905 if (rt->rt6i_dev == arg->dev &&
1906 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001907 (dst_mtu(&rt->u.dst) > arg->mtu ||
1908 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001909 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Simon Arlott566cfd82007-07-26 00:09:55 -07001911 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1912 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 return 0;
1914}
1915
1916void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1917{
Thomas Grafc71099a2006-08-04 23:20:06 -07001918 struct rt6_mtu_change_arg arg = {
1919 .dev = dev,
1920 .mtu = mtu,
1921 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922
Thomas Grafc71099a2006-08-04 23:20:06 -07001923 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924}
1925
Patrick McHardyef7c79e2007-06-05 12:38:30 -07001926static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07001927 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001928 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001929 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001930 [RTA_PRIORITY] = { .type = NLA_U32 },
1931 [RTA_METRICS] = { .type = NLA_NESTED },
1932};
1933
1934static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1935 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936{
Thomas Graf86872cb2006-08-22 00:01:08 -07001937 struct rtmsg *rtm;
1938 struct nlattr *tb[RTA_MAX+1];
1939 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940
Thomas Graf86872cb2006-08-22 00:01:08 -07001941 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1942 if (err < 0)
1943 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944
Thomas Graf86872cb2006-08-22 00:01:08 -07001945 err = -EINVAL;
1946 rtm = nlmsg_data(nlh);
1947 memset(cfg, 0, sizeof(*cfg));
1948
1949 cfg->fc_table = rtm->rtm_table;
1950 cfg->fc_dst_len = rtm->rtm_dst_len;
1951 cfg->fc_src_len = rtm->rtm_src_len;
1952 cfg->fc_flags = RTF_UP;
1953 cfg->fc_protocol = rtm->rtm_protocol;
1954
1955 if (rtm->rtm_type == RTN_UNREACHABLE)
1956 cfg->fc_flags |= RTF_REJECT;
1957
1958 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1959 cfg->fc_nlinfo.nlh = nlh;
1960
1961 if (tb[RTA_GATEWAY]) {
1962 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1963 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001965
1966 if (tb[RTA_DST]) {
1967 int plen = (rtm->rtm_dst_len + 7) >> 3;
1968
1969 if (nla_len(tb[RTA_DST]) < plen)
1970 goto errout;
1971
1972 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001974
1975 if (tb[RTA_SRC]) {
1976 int plen = (rtm->rtm_src_len + 7) >> 3;
1977
1978 if (nla_len(tb[RTA_SRC]) < plen)
1979 goto errout;
1980
1981 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001983
1984 if (tb[RTA_OIF])
1985 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1986
1987 if (tb[RTA_PRIORITY])
1988 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1989
1990 if (tb[RTA_METRICS]) {
1991 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1992 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001994
1995 if (tb[RTA_TABLE])
1996 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1997
1998 err = 0;
1999errout:
2000 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001}
2002
Thomas Grafc127ea22007-03-22 11:58:32 -07002003static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004{
Thomas Graf86872cb2006-08-22 00:01:08 -07002005 struct fib6_config cfg;
2006 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007
Thomas Graf86872cb2006-08-22 00:01:08 -07002008 err = rtm_to_fib6_config(skb, nlh, &cfg);
2009 if (err < 0)
2010 return err;
2011
2012 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013}
2014
Thomas Grafc127ea22007-03-22 11:58:32 -07002015static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016{
Thomas Graf86872cb2006-08-22 00:01:08 -07002017 struct fib6_config cfg;
2018 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019
Thomas Graf86872cb2006-08-22 00:01:08 -07002020 err = rtm_to_fib6_config(skb, nlh, &cfg);
2021 if (err < 0)
2022 return err;
2023
2024 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025}
2026
Thomas Graf339bf982006-11-10 14:10:15 -08002027static inline size_t rt6_nlmsg_size(void)
2028{
2029 return NLMSG_ALIGN(sizeof(struct rtmsg))
2030 + nla_total_size(16) /* RTA_SRC */
2031 + nla_total_size(16) /* RTA_DST */
2032 + nla_total_size(16) /* RTA_GATEWAY */
2033 + nla_total_size(16) /* RTA_PREFSRC */
2034 + nla_total_size(4) /* RTA_TABLE */
2035 + nla_total_size(4) /* RTA_IIF */
2036 + nla_total_size(4) /* RTA_OIF */
2037 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002038 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002039 + nla_total_size(sizeof(struct rta_cacheinfo));
2040}
2041
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002043 struct in6_addr *dst, struct in6_addr *src,
2044 int iif, int type, u32 pid, u32 seq,
2045 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046{
2047 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002048 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002049 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002050 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051
2052 if (prefix) { /* user wants prefix routes only */
2053 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2054 /* success since this is not a prefix route */
2055 return 1;
2056 }
2057 }
2058
Thomas Graf2d7202b2006-08-22 00:01:27 -07002059 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2060 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002061 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002062
2063 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 rtm->rtm_family = AF_INET6;
2065 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2066 rtm->rtm_src_len = rt->rt6i_src.plen;
2067 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002068 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002069 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002070 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002071 table = RT6_TABLE_UNSPEC;
2072 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002073 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 if (rt->rt6i_flags&RTF_REJECT)
2075 rtm->rtm_type = RTN_UNREACHABLE;
2076 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2077 rtm->rtm_type = RTN_LOCAL;
2078 else
2079 rtm->rtm_type = RTN_UNICAST;
2080 rtm->rtm_flags = 0;
2081 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2082 rtm->rtm_protocol = rt->rt6i_protocol;
2083 if (rt->rt6i_flags&RTF_DYNAMIC)
2084 rtm->rtm_protocol = RTPROT_REDIRECT;
2085 else if (rt->rt6i_flags & RTF_ADDRCONF)
2086 rtm->rtm_protocol = RTPROT_KERNEL;
2087 else if (rt->rt6i_flags&RTF_DEFAULT)
2088 rtm->rtm_protocol = RTPROT_RA;
2089
2090 if (rt->rt6i_flags&RTF_CACHE)
2091 rtm->rtm_flags |= RTM_F_CLONED;
2092
2093 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002094 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002095 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002097 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098#ifdef CONFIG_IPV6_SUBTREES
2099 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002100 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002101 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002103 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104#endif
2105 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002106 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107 else if (dst) {
2108 struct in6_addr saddr_buf;
2109 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002110 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002112
Linus Torvalds1da177e2005-04-16 15:20:36 -07002113 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002114 goto nla_put_failure;
2115
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002117 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2118
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002120 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2121
2122 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002123
2124 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2125 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2126 expires, rt->u.dst.error) < 0)
2127 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128
Thomas Graf2d7202b2006-08-22 00:01:27 -07002129 return nlmsg_end(skb, nlh);
2130
2131nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002132 nlmsg_cancel(skb, nlh);
2133 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134}
2135
Patrick McHardy1b43af52006-08-10 23:11:17 -07002136int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137{
2138 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2139 int prefix;
2140
Thomas Graf2d7202b2006-08-22 00:01:27 -07002141 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2142 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2144 } else
2145 prefix = 0;
2146
2147 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2148 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002149 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150}
2151
Thomas Grafc127ea22007-03-22 11:58:32 -07002152static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153{
Thomas Grafab364a62006-08-22 00:01:47 -07002154 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002156 struct sk_buff *skb;
2157 struct rtmsg *rtm;
2158 struct flowi fl;
2159 int err, iif = 0;
2160
2161 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2162 if (err < 0)
2163 goto errout;
2164
2165 err = -EINVAL;
2166 memset(&fl, 0, sizeof(fl));
2167
2168 if (tb[RTA_SRC]) {
2169 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2170 goto errout;
2171
2172 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2173 }
2174
2175 if (tb[RTA_DST]) {
2176 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2177 goto errout;
2178
2179 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2180 }
2181
2182 if (tb[RTA_IIF])
2183 iif = nla_get_u32(tb[RTA_IIF]);
2184
2185 if (tb[RTA_OIF])
2186 fl.oif = nla_get_u32(tb[RTA_OIF]);
2187
2188 if (iif) {
2189 struct net_device *dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002190 dev = __dev_get_by_index(&init_net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002191 if (!dev) {
2192 err = -ENODEV;
2193 goto errout;
2194 }
2195 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196
2197 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002198 if (skb == NULL) {
2199 err = -ENOBUFS;
2200 goto errout;
2201 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202
2203 /* Reserve room for dummy headers, this skb can pass
2204 through good chunk of routing engine.
2205 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002206 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2208
Thomas Grafab364a62006-08-22 00:01:47 -07002209 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 skb->dst = &rt->u.dst;
2211
Thomas Grafab364a62006-08-22 00:01:47 -07002212 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002214 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002216 kfree_skb(skb);
2217 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 }
2219
Thomas Graf2942e902006-08-15 00:30:25 -07002220 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002221errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223}
2224
Thomas Graf86872cb2006-08-22 00:01:08 -07002225void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226{
2227 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002228 u32 pid = 0, seq = 0;
2229 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002230 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231
Thomas Graf86872cb2006-08-22 00:01:08 -07002232 if (info) {
2233 pid = info->pid;
2234 nlh = info->nlh;
2235 if (nlh)
2236 seq = nlh->nlmsg_seq;
2237 }
2238
Thomas Graf339bf982006-11-10 14:10:15 -08002239 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002240 if (skb == NULL)
2241 goto errout;
2242
2243 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002244 if (err < 0) {
2245 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2246 WARN_ON(err == -EMSGSIZE);
2247 kfree_skb(skb);
2248 goto errout;
2249 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002250 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2251errout:
2252 if (err < 0)
2253 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254}
2255
2256/*
2257 * /proc
2258 */
2259
2260#ifdef CONFIG_PROC_FS
2261
2262#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2263
2264struct rt6_proc_arg
2265{
2266 char *buffer;
2267 int offset;
2268 int length;
2269 int skip;
2270 int len;
2271};
2272
2273static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2274{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002275 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002277 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2278 rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279
2280#ifdef CONFIG_IPV6_SUBTREES
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002281 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2282 rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002284 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285#endif
2286
2287 if (rt->rt6i_nexthop) {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002288 seq_printf(m, NIP6_SEQFMT,
2289 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002291 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002293 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2294 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2295 rt->u.dst.__use, rt->rt6i_flags,
2296 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 return 0;
2298}
2299
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002300static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002302 fib6_clean_all(rt6_info_route, 0, m);
2303 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304}
2305
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002306static int ipv6_route_open(struct inode *inode, struct file *file)
2307{
2308 return single_open(file, ipv6_route_show, NULL);
2309}
2310
2311static const struct file_operations ipv6_route_proc_fops = {
2312 .owner = THIS_MODULE,
2313 .open = ipv6_route_open,
2314 .read = seq_read,
2315 .llseek = seq_lseek,
2316 .release = single_release,
2317};
2318
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2320{
2321 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2322 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2323 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2324 rt6_stats.fib_rt_cache,
2325 atomic_read(&ip6_dst_ops.entries),
2326 rt6_stats.fib_discarded_routes);
2327
2328 return 0;
2329}
2330
2331static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2332{
2333 return single_open(file, rt6_stats_seq_show, NULL);
2334}
2335
Arjan van de Ven9a321442007-02-12 00:55:35 -08002336static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337 .owner = THIS_MODULE,
2338 .open = rt6_stats_seq_open,
2339 .read = seq_read,
2340 .llseek = seq_lseek,
2341 .release = single_release,
2342};
2343#endif /* CONFIG_PROC_FS */
2344
2345#ifdef CONFIG_SYSCTL
2346
2347static int flush_delay;
2348
2349static
2350int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2351 void __user *buffer, size_t *lenp, loff_t *ppos)
2352{
2353 if (write) {
2354 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2355 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2356 return 0;
2357 } else
2358 return -EINVAL;
2359}
2360
2361ctl_table ipv6_route_table[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002362 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 .procname = "flush",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002364 .data = &flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002366 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002367 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368 },
2369 {
2370 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2371 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002372 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373 .maxlen = sizeof(int),
2374 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002375 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 },
2377 {
2378 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2379 .procname = "max_size",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002380 .data = &ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 .maxlen = sizeof(int),
2382 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002383 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002384 },
2385 {
2386 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2387 .procname = "gc_min_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002388 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389 .maxlen = sizeof(int),
2390 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002391 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002392 .strategy = &sysctl_jiffies,
2393 },
2394 {
2395 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2396 .procname = "gc_timeout",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002397 .data = &ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 .maxlen = sizeof(int),
2399 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002400 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 .strategy = &sysctl_jiffies,
2402 },
2403 {
2404 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2405 .procname = "gc_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002406 .data = &ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 .maxlen = sizeof(int),
2408 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002409 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410 .strategy = &sysctl_jiffies,
2411 },
2412 {
2413 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2414 .procname = "gc_elasticity",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002415 .data = &ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 .maxlen = sizeof(int),
2417 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002418 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 .strategy = &sysctl_jiffies,
2420 },
2421 {
2422 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2423 .procname = "mtu_expires",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002424 .data = &ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002425 .maxlen = sizeof(int),
2426 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002427 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 .strategy = &sysctl_jiffies,
2429 },
2430 {
2431 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2432 .procname = "min_adv_mss",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002433 .data = &ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 .maxlen = sizeof(int),
2435 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002436 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 .strategy = &sysctl_jiffies,
2438 },
2439 {
2440 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2441 .procname = "gc_min_interval_ms",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002442 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443 .maxlen = sizeof(int),
2444 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002445 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 .strategy = &sysctl_ms_jiffies,
2447 },
2448 { .ctl_name = 0 }
2449};
2450
2451#endif
2452
2453void __init ip6_route_init(void)
2454{
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002455 ip6_dst_ops.kmem_cachep =
2456 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002457 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
David S. Miller14e50e52007-05-24 18:17:54 -07002458 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2459
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 fib6_init();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002461 proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002462 proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463#ifdef CONFIG_XFRM
2464 xfrm6_init();
2465#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002466#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2467 fib6_rules_init();
2468#endif
Thomas Grafc127ea22007-03-22 11:58:32 -07002469
2470 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2471 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2472 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473}
2474
2475void ip6_route_cleanup(void)
2476{
Thomas Graf101367c2006-08-04 03:39:02 -07002477#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2478 fib6_rules_cleanup();
2479#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480#ifdef CONFIG_PROC_FS
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002481 proc_net_remove(&init_net, "ipv6_route");
2482 proc_net_remove(&init_net, "rt6_stats");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483#endif
2484#ifdef CONFIG_XFRM
2485 xfrm6_fini();
2486#endif
2487 rt6_ifdown(NULL);
2488 fib6_gc_cleanup();
2489 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2490}