blob: 6ff19f9eb9ee43c85633ab391fa45cdb42ff5ccf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020047#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070058#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070059#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
67/* Set to 3 to get tracing. */
68#define RT6_DEBUG 2
69
70#if RT6_DEBUG >= 3
71#define RDBG(x) printk x
72#define RT6_TRACE(x...) printk(KERN_DEBUG x)
73#else
74#define RDBG(x)
75#define RT6_TRACE(x...) do { ; } while (0)
76#endif
77
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080078#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070079
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
David S. Miller14e50e52007-05-24 18:17:54 -0700123static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
124{
125}
126
127static struct dst_ops ip6_dst_blackhole_ops = {
128 .family = AF_INET6,
129 .protocol = __constant_htons(ETH_P_IPV6),
130 .destroy = ip6_dst_destroy,
131 .check = ip6_dst_check,
132 .update_pmtu = ip6_rt_blackhole_update_pmtu,
133 .entry_size = sizeof(struct rt6_info),
134};
135
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136struct rt6_info ip6_null_entry = {
137 .u = {
138 .dst = {
139 .__refcnt = ATOMIC_INIT(1),
140 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 .obsolete = -1,
142 .error = -ENETUNREACH,
143 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
144 .input = ip6_pkt_discard,
145 .output = ip6_pkt_discard_out,
146 .ops = &ip6_dst_ops,
147 .path = (struct dst_entry*)&ip6_null_entry,
148 }
149 },
150 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
151 .rt6i_metric = ~(u32) 0,
152 .rt6i_ref = ATOMIC_INIT(1),
153};
154
Thomas Graf101367c2006-08-04 03:39:02 -0700155#ifdef CONFIG_IPV6_MULTIPLE_TABLES
156
David S. Miller6723ab52006-10-18 21:20:57 -0700157static int ip6_pkt_prohibit(struct sk_buff *skb);
158static int ip6_pkt_prohibit_out(struct sk_buff *skb);
159static int ip6_pkt_blk_hole(struct sk_buff *skb);
160
Thomas Graf101367c2006-08-04 03:39:02 -0700161struct rt6_info ip6_prohibit_entry = {
162 .u = {
163 .dst = {
164 .__refcnt = ATOMIC_INIT(1),
165 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700166 .obsolete = -1,
167 .error = -EACCES,
168 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700169 .input = ip6_pkt_prohibit,
170 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700171 .ops = &ip6_dst_ops,
172 .path = (struct dst_entry*)&ip6_prohibit_entry,
173 }
174 },
175 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
176 .rt6i_metric = ~(u32) 0,
177 .rt6i_ref = ATOMIC_INIT(1),
178};
179
180struct rt6_info ip6_blk_hole_entry = {
181 .u = {
182 .dst = {
183 .__refcnt = ATOMIC_INIT(1),
184 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700185 .obsolete = -1,
186 .error = -EINVAL,
187 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700188 .input = ip6_pkt_blk_hole,
189 .output = ip6_pkt_blk_hole,
Thomas Graf101367c2006-08-04 03:39:02 -0700190 .ops = &ip6_dst_ops,
191 .path = (struct dst_entry*)&ip6_blk_hole_entry,
192 }
193 },
194 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
195 .rt6i_metric = ~(u32) 0,
196 .rt6i_ref = ATOMIC_INIT(1),
197};
198
199#endif
200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201/* allocate dst with ip6_dst_ops */
202static __inline__ struct rt6_info *ip6_dst_alloc(void)
203{
204 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
205}
206
207static void ip6_dst_destroy(struct dst_entry *dst)
208{
209 struct rt6_info *rt = (struct rt6_info *)dst;
210 struct inet6_dev *idev = rt->rt6i_idev;
211
212 if (idev != NULL) {
213 rt->rt6i_idev = NULL;
214 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900215 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216}
217
218static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
219 int how)
220{
221 struct rt6_info *rt = (struct rt6_info *)dst;
222 struct inet6_dev *idev = rt->rt6i_idev;
223
Eric W. Biederman2774c7a2007-09-26 22:10:56 -0700224 if (dev != init_net.loopback_dev && idev != NULL && idev->dev == dev) {
225 struct inet6_dev *loopback_idev = in6_dev_get(init_net.loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 if (loopback_idev != NULL) {
227 rt->rt6i_idev = loopback_idev;
228 in6_dev_put(idev);
229 }
230 }
231}
232
233static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234{
235 return (rt->rt6i_flags & RTF_EXPIRES &&
236 time_after(jiffies, rt->rt6i_expires));
237}
238
Thomas Grafc71099a2006-08-04 23:20:06 -0700239static inline int rt6_need_strict(struct in6_addr *daddr)
240{
241 return (ipv6_addr_type(daddr) &
242 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
243}
244
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700246 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 */
248
249static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
250 int oif,
251 int strict)
252{
253 struct rt6_info *local = NULL;
254 struct rt6_info *sprt;
255
256 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800257 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 struct net_device *dev = sprt->rt6i_dev;
259 if (dev->ifindex == oif)
260 return sprt;
261 if (dev->flags & IFF_LOOPBACK) {
262 if (sprt->rt6i_idev == NULL ||
263 sprt->rt6i_idev->dev->ifindex != oif) {
264 if (strict && oif)
265 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900266 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 local->rt6i_idev->dev->ifindex == oif))
268 continue;
269 }
270 local = sprt;
271 }
272 }
273
274 if (local)
275 return local;
276
277 if (strict)
278 return &ip6_null_entry;
279 }
280 return rt;
281}
282
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800283#ifdef CONFIG_IPV6_ROUTER_PREF
284static void rt6_probe(struct rt6_info *rt)
285{
286 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
287 /*
288 * Okay, this does not seem to be appropriate
289 * for now, however, we need to check if it
290 * is really so; aka Router Reachability Probing.
291 *
292 * Router Reachability Probe MUST be rate-limited
293 * to no more than one per minute.
294 */
295 if (!neigh || (neigh->nud_state & NUD_VALID))
296 return;
297 read_lock_bh(&neigh->lock);
298 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800299 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800300 struct in6_addr mcaddr;
301 struct in6_addr *target;
302
303 neigh->updated = jiffies;
304 read_unlock_bh(&neigh->lock);
305
306 target = (struct in6_addr *)&neigh->primary_key;
307 addrconf_addr_solict_mult(target, &mcaddr);
308 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309 } else
310 read_unlock_bh(&neigh->lock);
311}
312#else
313static inline void rt6_probe(struct rt6_info *rt)
314{
315 return;
316}
317#endif
318
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800320 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700322static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800324 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700325 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800326 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700327 if ((dev->flags & IFF_LOOPBACK) &&
328 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329 return 1;
330 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331}
332
Dave Jonesb6f99a22007-03-22 12:27:49 -0700333static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 struct neighbour *neigh = rt->rt6i_nexthop;
336 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700337 if (rt->rt6i_flags & RTF_NONEXTHOP ||
338 !(rt->rt6i_flags & RTF_GATEWAY))
339 m = 1;
340 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 read_lock_bh(&neigh->lock);
342 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700343 m = 2;
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800344 else if (!(neigh->nud_state & NUD_FAILED))
345 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800346 read_unlock_bh(&neigh->lock);
347 }
348 return m;
349}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800351static int rt6_score_route(struct rt6_info *rt, int oif,
352 int strict)
353{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700354 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900355
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700356 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800358 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800359#ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700362 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800364 return -1;
365 return m;
366}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
David S. Millerf11e6652007-03-24 20:36:25 -0700368static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800370{
David S. Millerf11e6652007-03-24 20:36:25 -0700371 int m;
372
373 if (rt6_check_expired(rt))
374 goto out;
375
376 m = rt6_score_route(rt, oif, strict);
377 if (m < 0)
378 goto out;
379
380 if (m > *mpri) {
381 if (strict & RT6_LOOKUP_F_REACHABLE)
382 rt6_probe(match);
383 *mpri = m;
384 match = rt;
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 rt6_probe(rt);
387 }
388
389out:
390 return match;
391}
392
393static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
396{
397 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800398 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
David S. Millerf11e6652007-03-24 20:36:25 -0700400 match = NULL;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
402 rt = rt->u.dst.rt6_next)
403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
405 rt = rt->u.dst.rt6_next)
406 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800407
David S. Millerf11e6652007-03-24 20:36:25 -0700408 return match;
409}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800410
David S. Millerf11e6652007-03-24 20:36:25 -0700411static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412{
413 struct rt6_info *match, *rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414
David S. Millerf11e6652007-03-24 20:36:25 -0700415 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
David S. Millerf11e6652007-03-24 20:36:25 -0700418 rt0 = fn->rr_ptr;
419 if (!rt0)
420 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
David S. Millerf11e6652007-03-24 20:36:25 -0700422 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800424 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700425 (strict & RT6_LOOKUP_F_REACHABLE)) {
426 struct rt6_info *next = rt0->u.dst.rt6_next;
427
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800428 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700429 if (!next || next->rt6i_metric != rt0->rt6i_metric)
430 next = fn->leaf;
431
432 if (next != rt0)
433 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 }
435
David S. Millerf11e6652007-03-24 20:36:25 -0700436 RT6_TRACE("%s() => %p\n",
437 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800439 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440}
441
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800442#ifdef CONFIG_IPV6_ROUTE_INFO
443int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
444 struct in6_addr *gwaddr)
445{
446 struct route_info *rinfo = (struct route_info *) opt;
447 struct in6_addr prefix_buf, *prefix;
448 unsigned int pref;
449 u32 lifetime;
450 struct rt6_info *rt;
451
452 if (len < sizeof(struct route_info)) {
453 return -EINVAL;
454 }
455
456 /* Sanity check for prefix_len and length */
457 if (rinfo->length > 3) {
458 return -EINVAL;
459 } else if (rinfo->prefix_len > 128) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 64) {
462 if (rinfo->length < 2) {
463 return -EINVAL;
464 }
465 } else if (rinfo->prefix_len > 0) {
466 if (rinfo->length < 1) {
467 return -EINVAL;
468 }
469 }
470
471 pref = rinfo->route_pref;
472 if (pref == ICMPV6_ROUTER_PREF_INVALID)
473 pref = ICMPV6_ROUTER_PREF_MEDIUM;
474
Al Viroe69a4ad2006-11-14 20:56:00 -0800475 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800476 if (lifetime == 0xffffffff) {
477 /* infinity */
478 } else if (lifetime > 0x7fffffff/HZ) {
479 /* Avoid arithmetic overflow */
480 lifetime = 0x7fffffff/HZ - 1;
481 }
482
483 if (rinfo->length == 3)
484 prefix = (struct in6_addr *)rinfo->prefix;
485 else {
486 /* this function is safe */
487 ipv6_addr_prefix(&prefix_buf,
488 (struct in6_addr *)rinfo->prefix,
489 rinfo->prefix_len);
490 prefix = &prefix_buf;
491 }
492
493 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
494
495 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700496 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800497 rt = NULL;
498 }
499
500 if (!rt && lifetime)
501 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
502 pref);
503 else if (rt)
504 rt->rt6i_flags = RTF_ROUTEINFO |
505 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506
507 if (rt) {
508 if (lifetime == 0xffffffff) {
509 rt->rt6i_flags &= ~RTF_EXPIRES;
510 } else {
511 rt->rt6i_expires = jiffies + HZ * lifetime;
512 rt->rt6i_flags |= RTF_EXPIRES;
513 }
514 dst_release(&rt->u.dst);
515 }
516 return 0;
517}
518#endif
519
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700520#define BACKTRACK(saddr) \
521do { \
522 if (rt == &ip6_null_entry) { \
523 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700524 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700525 if (fn->fn_flags & RTN_TL_ROOT) \
526 goto out; \
527 pn = fn->parent; \
528 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800529 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700530 else \
531 fn = pn; \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700534 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700535 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700536} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700537
538static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
539 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540{
541 struct fib6_node *fn;
542 struct rt6_info *rt;
543
Thomas Grafc71099a2006-08-04 23:20:06 -0700544 read_lock_bh(&table->tb6_lock);
545 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
546restart:
547 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700548 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700549 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700550out:
YOSHIFUJI Hideaki33cc4892006-08-28 13:19:30 -0700551 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700552 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553
554 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700555 rt->u.dst.__use++;
556
557 return rt;
558
559}
560
561struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
562 int oif, int strict)
563{
564 struct flowi fl = {
565 .oif = oif,
566 .nl_u = {
567 .ip6_u = {
568 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700569 },
570 },
571 };
572 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700574
Thomas Grafadaa70b2006-10-13 15:01:03 -0700575 if (saddr) {
576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 flags |= RT6_LOOKUP_F_HAS_SADDR;
578 }
579
Thomas Grafc71099a2006-08-04 23:20:06 -0700580 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
581 if (dst->error == 0)
582 return (struct rt6_info *) dst;
583
584 dst_release(dst);
585
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 return NULL;
587}
588
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900589EXPORT_SYMBOL(rt6_lookup);
590
Thomas Grafc71099a2006-08-04 23:20:06 -0700591/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 It takes new route entry, the addition fails by any reason the
593 route is freed. In any case, if caller does not hold it, it may
594 be destroyed.
595 */
596
Thomas Graf86872cb2006-08-22 00:01:08 -0700597static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598{
599 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700600 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
Thomas Grafc71099a2006-08-04 23:20:06 -0700602 table = rt->rt6i_table;
603 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700604 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700605 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606
607 return err;
608}
609
Thomas Graf40e22e82006-08-22 00:00:45 -0700610int ip6_ins_rt(struct rt6_info *rt)
611{
Thomas Graf86872cb2006-08-22 00:01:08 -0700612 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700613}
614
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800615static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 struct rt6_info *rt;
619
620 /*
621 * Clone the route.
622 */
623
624 rt = ip6_rt_copy(ort);
625
626 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900627 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
628 if (rt->rt6i_dst.plen != 128 &&
629 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
630 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900632 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900634 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 rt->rt6i_dst.plen = 128;
636 rt->rt6i_flags |= RTF_CACHE;
637 rt->u.dst.flags |= DST_HOST;
638
639#ifdef CONFIG_IPV6_SUBTREES
640 if (rt->rt6i_src.plen && saddr) {
641 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
642 rt->rt6i_src.plen = 128;
643 }
644#endif
645
646 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
647
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800648 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800650 return rt;
651}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800653static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
654{
655 struct rt6_info *rt = ip6_rt_copy(ort);
656 if (rt) {
657 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
658 rt->rt6i_dst.plen = 128;
659 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800660 rt->u.dst.flags |= DST_HOST;
661 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
662 }
663 return rt;
664}
665
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700666static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
667 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668{
669 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800670 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700671 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800673 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800674 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700676 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
678relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700679 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800681restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700682 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683
684restart:
David S. Millerf11e6652007-03-24 20:36:25 -0700685 rt = rt6_select(fn, fl->iif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700686 BACKTRACK(&fl->fl6_src);
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800687 if (rt == &ip6_null_entry ||
688 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800689 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800691 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700692 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800693
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800694 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800695 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800696 else {
697#if CLONE_OFFLINK_ROUTE
698 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
699#else
700 goto out2;
701#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800703
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800704 dst_release(&rt->u.dst);
705 rt = nrt ? : &ip6_null_entry;
706
707 dst_hold(&rt->u.dst);
708 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700709 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710 if (!err)
711 goto out2;
712 }
713
714 if (--attempts <= 0)
715 goto out2;
716
717 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700718 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800719 * released someone could insert this route. Relookup.
720 */
721 dst_release(&rt->u.dst);
722 goto relookup;
723
724out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800725 if (reachable) {
726 reachable = 0;
727 goto restart_2;
728 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800729 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700730 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731out2:
732 rt->u.dst.lastuse = jiffies;
733 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700734
735 return rt;
736}
737
738void ip6_route_input(struct sk_buff *skb)
739{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700740 struct ipv6hdr *iph = ipv6_hdr(skb);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700741 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700742 struct flowi fl = {
743 .iif = skb->dev->ifindex,
744 .nl_u = {
745 .ip6_u = {
746 .daddr = iph->daddr,
747 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800748 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700749 },
750 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900751 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700752 .proto = iph->nexthdr,
753 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700754
755 if (rt6_need_strict(&iph->daddr))
756 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700757
758 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
759}
760
761static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
762 struct flowi *fl, int flags)
763{
764 struct fib6_node *fn;
765 struct rt6_info *rt, *nrt;
766 int strict = 0;
767 int attempts = 3;
768 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800769 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700770
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700771 strict |= flags & RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700772
773relookup:
774 read_lock_bh(&table->tb6_lock);
775
776restart_2:
777 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
778
779restart:
David S. Millerf11e6652007-03-24 20:36:25 -0700780 rt = rt6_select(fn, fl->oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700781 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700782 if (rt == &ip6_null_entry ||
783 rt->rt6i_flags & RTF_CACHE)
784 goto out;
785
786 dst_hold(&rt->u.dst);
787 read_unlock_bh(&table->tb6_lock);
788
789 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
790 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
791 else {
792#if CLONE_OFFLINK_ROUTE
793 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
794#else
795 goto out2;
796#endif
797 }
798
799 dst_release(&rt->u.dst);
800 rt = nrt ? : &ip6_null_entry;
801
802 dst_hold(&rt->u.dst);
803 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700804 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700805 if (!err)
806 goto out2;
807 }
808
809 if (--attempts <= 0)
810 goto out2;
811
812 /*
813 * Race condition! In the gap, when table->tb6_lock was
814 * released someone could insert this route. Relookup.
815 */
816 dst_release(&rt->u.dst);
817 goto relookup;
818
819out:
820 if (reachable) {
821 reachable = 0;
822 goto restart_2;
823 }
824 dst_hold(&rt->u.dst);
825 read_unlock_bh(&table->tb6_lock);
826out2:
827 rt->u.dst.lastuse = jiffies;
828 rt->u.dst.__use++;
829 return rt;
830}
831
832struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
833{
834 int flags = 0;
835
836 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700837 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700838
Thomas Grafadaa70b2006-10-13 15:01:03 -0700839 if (!ipv6_addr_any(&fl->fl6_src))
840 flags |= RT6_LOOKUP_F_HAS_SADDR;
841
Thomas Grafc71099a2006-08-04 23:20:06 -0700842 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843}
844
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900845EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846
David S. Miller14e50e52007-05-24 18:17:54 -0700847static int ip6_blackhole_output(struct sk_buff *skb)
848{
849 kfree_skb(skb);
850 return 0;
851}
852
853int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
854{
855 struct rt6_info *ort = (struct rt6_info *) *dstp;
856 struct rt6_info *rt = (struct rt6_info *)
857 dst_alloc(&ip6_dst_blackhole_ops);
858 struct dst_entry *new = NULL;
859
860 if (rt) {
861 new = &rt->u.dst;
862
863 atomic_set(&new->__refcnt, 1);
864 new->__use = 1;
865 new->input = ip6_blackhole_output;
866 new->output = ip6_blackhole_output;
867
868 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
869 new->dev = ort->u.dst.dev;
870 if (new->dev)
871 dev_hold(new->dev);
872 rt->rt6i_idev = ort->rt6i_idev;
873 if (rt->rt6i_idev)
874 in6_dev_hold(rt->rt6i_idev);
875 rt->rt6i_expires = 0;
876
877 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
878 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
879 rt->rt6i_metric = 0;
880
881 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
882#ifdef CONFIG_IPV6_SUBTREES
883 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
884#endif
885
886 dst_free(new);
887 }
888
889 dst_release(*dstp);
890 *dstp = new;
891 return (new ? 0 : -ENOMEM);
892}
893EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895/*
896 * Destination cache support functions
897 */
898
899static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
900{
901 struct rt6_info *rt;
902
903 rt = (struct rt6_info *) dst;
904
905 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
906 return dst;
907
908 return NULL;
909}
910
911static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
912{
913 struct rt6_info *rt = (struct rt6_info *) dst;
914
915 if (rt) {
916 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700917 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 else
919 dst_release(dst);
920 }
921 return NULL;
922}
923
924static void ip6_link_failure(struct sk_buff *skb)
925{
926 struct rt6_info *rt;
927
928 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
929
930 rt = (struct rt6_info *) skb->dst;
931 if (rt) {
932 if (rt->rt6i_flags&RTF_CACHE) {
933 dst_set_expires(&rt->u.dst, 0);
934 rt->rt6i_flags |= RTF_EXPIRES;
935 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
936 rt->rt6i_node->fn_sernum = -1;
937 }
938}
939
940static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
941{
942 struct rt6_info *rt6 = (struct rt6_info*)dst;
943
944 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
945 rt6->rt6i_flags |= RTF_MODIFIED;
946 if (mtu < IPV6_MIN_MTU) {
947 mtu = IPV6_MIN_MTU;
948 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
949 }
950 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700951 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 }
953}
954
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955static int ipv6_get_mtu(struct net_device *dev);
956
957static inline unsigned int ipv6_advmss(unsigned int mtu)
958{
959 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
960
961 if (mtu < ip6_rt_min_advmss)
962 mtu = ip6_rt_min_advmss;
963
964 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900965 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
966 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
967 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 * rely only on pmtu discovery"
969 */
970 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
971 mtu = IPV6_MAXPLEN;
972 return mtu;
973}
974
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700975static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700976static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700977
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900978struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 struct neighbour *neigh,
980 struct in6_addr *addr,
981 int (*output)(struct sk_buff *))
982{
983 struct rt6_info *rt;
984 struct inet6_dev *idev = in6_dev_get(dev);
985
986 if (unlikely(idev == NULL))
987 return NULL;
988
989 rt = ip6_dst_alloc();
990 if (unlikely(rt == NULL)) {
991 in6_dev_put(idev);
992 goto out;
993 }
994
995 dev_hold(dev);
996 if (neigh)
997 neigh_hold(neigh);
998 else
999 neigh = ndisc_get_neigh(dev, addr);
1000
1001 rt->rt6i_dev = dev;
1002 rt->rt6i_idev = idev;
1003 rt->rt6i_nexthop = neigh;
1004 atomic_set(&rt->u.dst.__refcnt, 1);
1005 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
1006 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1007 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1008 rt->u.dst.output = output;
1009
1010#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001011 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1012 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 : 0;
1014 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1015 rt->rt6i_dst.plen = 128;
1016#endif
1017
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001018 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 rt->u.dst.next = ndisc_dst_gc_list;
1020 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001021 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022
1023 fib6_force_start_gc();
1024
1025out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001026 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027}
1028
1029int ndisc_dst_gc(int *more)
1030{
1031 struct dst_entry *dst, *next, **pprev;
1032 int freed;
1033
1034 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001035 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001036
1037 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001039
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 while ((dst = *pprev) != NULL) {
1041 if (!atomic_read(&dst->__refcnt)) {
1042 *pprev = dst->next;
1043 dst_free(dst);
1044 freed++;
1045 } else {
1046 pprev = &dst->next;
1047 (*more)++;
1048 }
1049 }
1050
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001051 spin_unlock_bh(&ndisc_lock);
1052
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 return freed;
1054}
1055
1056static int ip6_dst_gc(void)
1057{
1058 static unsigned expire = 30*HZ;
1059 static unsigned long last_gc;
1060 unsigned long now = jiffies;
1061
1062 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
1063 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
1064 goto out;
1065
1066 expire++;
1067 fib6_run_gc(expire);
1068 last_gc = now;
1069 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1070 expire = ip6_rt_gc_timeout>>1;
1071
1072out:
1073 expire -= expire>>ip6_rt_gc_elasticity;
1074 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1075}
1076
1077/* Clean host part of a prefix. Not necessary in radix tree,
1078 but results in cleaner routing tables.
1079
1080 Remove it only when all the things will work!
1081 */
1082
1083static int ipv6_get_mtu(struct net_device *dev)
1084{
1085 int mtu = IPV6_MIN_MTU;
1086 struct inet6_dev *idev;
1087
1088 idev = in6_dev_get(dev);
1089 if (idev) {
1090 mtu = idev->cnf.mtu6;
1091 in6_dev_put(idev);
1092 }
1093 return mtu;
1094}
1095
1096int ipv6_get_hoplimit(struct net_device *dev)
1097{
1098 int hoplimit = ipv6_devconf.hop_limit;
1099 struct inet6_dev *idev;
1100
1101 idev = in6_dev_get(dev);
1102 if (idev) {
1103 hoplimit = idev->cnf.hop_limit;
1104 in6_dev_put(idev);
1105 }
1106 return hoplimit;
1107}
1108
1109/*
1110 *
1111 */
1112
Thomas Graf86872cb2006-08-22 00:01:08 -07001113int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114{
1115 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 struct rt6_info *rt = NULL;
1117 struct net_device *dev = NULL;
1118 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001119 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 int addr_type;
1121
Thomas Graf86872cb2006-08-22 00:01:08 -07001122 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 return -EINVAL;
1124#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001125 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 return -EINVAL;
1127#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001128 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 err = -ENODEV;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001130 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 if (!dev)
1132 goto out;
1133 idev = in6_dev_get(dev);
1134 if (!idev)
1135 goto out;
1136 }
1137
Thomas Graf86872cb2006-08-22 00:01:08 -07001138 if (cfg->fc_metric == 0)
1139 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140
Thomas Graf86872cb2006-08-22 00:01:08 -07001141 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001142 if (table == NULL) {
1143 err = -ENOBUFS;
1144 goto out;
1145 }
1146
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 rt = ip6_dst_alloc();
1148
1149 if (rt == NULL) {
1150 err = -ENOMEM;
1151 goto out;
1152 }
1153
1154 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001155 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
Thomas Graf86872cb2006-08-22 00:01:08 -07001157 if (cfg->fc_protocol == RTPROT_UNSPEC)
1158 cfg->fc_protocol = RTPROT_BOOT;
1159 rt->rt6i_protocol = cfg->fc_protocol;
1160
1161 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
1163 if (addr_type & IPV6_ADDR_MULTICAST)
1164 rt->u.dst.input = ip6_mc_input;
1165 else
1166 rt->u.dst.input = ip6_forward;
1167
1168 rt->u.dst.output = ip6_output;
1169
Thomas Graf86872cb2006-08-22 00:01:08 -07001170 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1171 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172 if (rt->rt6i_dst.plen == 128)
1173 rt->u.dst.flags = DST_HOST;
1174
1175#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001176 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1177 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178#endif
1179
Thomas Graf86872cb2006-08-22 00:01:08 -07001180 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181
1182 /* We cannot add true routes via loopback here,
1183 they would result in kernel looping; promote them to reject routes
1184 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001185 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1187 /* hold loopback dev/idev if we haven't done so. */
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001188 if (dev != init_net.loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 if (dev) {
1190 dev_put(dev);
1191 in6_dev_put(idev);
1192 }
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001193 dev = init_net.loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 dev_hold(dev);
1195 idev = in6_dev_get(dev);
1196 if (!idev) {
1197 err = -ENODEV;
1198 goto out;
1199 }
1200 }
1201 rt->u.dst.output = ip6_pkt_discard_out;
1202 rt->u.dst.input = ip6_pkt_discard;
1203 rt->u.dst.error = -ENETUNREACH;
1204 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1205 goto install_route;
1206 }
1207
Thomas Graf86872cb2006-08-22 00:01:08 -07001208 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 struct in6_addr *gw_addr;
1210 int gwa_type;
1211
Thomas Graf86872cb2006-08-22 00:01:08 -07001212 gw_addr = &cfg->fc_gateway;
1213 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 gwa_type = ipv6_addr_type(gw_addr);
1215
1216 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1217 struct rt6_info *grt;
1218
1219 /* IPv6 strictly inhibits using not link-local
1220 addresses as nexthop address.
1221 Otherwise, router will not able to send redirects.
1222 It is very good, but in some (rare!) circumstances
1223 (SIT, PtP, NBMA NOARP links) it is handy to allow
1224 some exceptions. --ANK
1225 */
1226 err = -EINVAL;
1227 if (!(gwa_type&IPV6_ADDR_UNICAST))
1228 goto out;
1229
Thomas Graf86872cb2006-08-22 00:01:08 -07001230 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
1232 err = -EHOSTUNREACH;
1233 if (grt == NULL)
1234 goto out;
1235 if (dev) {
1236 if (dev != grt->rt6i_dev) {
1237 dst_release(&grt->u.dst);
1238 goto out;
1239 }
1240 } else {
1241 dev = grt->rt6i_dev;
1242 idev = grt->rt6i_idev;
1243 dev_hold(dev);
1244 in6_dev_hold(grt->rt6i_idev);
1245 }
1246 if (!(grt->rt6i_flags&RTF_GATEWAY))
1247 err = 0;
1248 dst_release(&grt->u.dst);
1249
1250 if (err)
1251 goto out;
1252 }
1253 err = -EINVAL;
1254 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1255 goto out;
1256 }
1257
1258 err = -ENODEV;
1259 if (dev == NULL)
1260 goto out;
1261
Thomas Graf86872cb2006-08-22 00:01:08 -07001262 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1264 if (IS_ERR(rt->rt6i_nexthop)) {
1265 err = PTR_ERR(rt->rt6i_nexthop);
1266 rt->rt6i_nexthop = NULL;
1267 goto out;
1268 }
1269 }
1270
Thomas Graf86872cb2006-08-22 00:01:08 -07001271 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272
1273install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 if (cfg->fc_mx) {
1275 struct nlattr *nla;
1276 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277
Thomas Graf86872cb2006-08-22 00:01:08 -07001278 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001279 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001280
1281 if (type) {
1282 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 err = -EINVAL;
1284 goto out;
1285 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001286
1287 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 }
1290 }
1291
1292 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1293 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1294 if (!rt->u.dst.metrics[RTAX_MTU-1])
1295 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1296 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1297 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1298 rt->u.dst.dev = dev;
1299 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001300 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001301 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302
1303out:
1304 if (dev)
1305 dev_put(dev);
1306 if (idev)
1307 in6_dev_put(idev);
1308 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001309 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 return err;
1311}
1312
Thomas Graf86872cb2006-08-22 00:01:08 -07001313static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314{
1315 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001316 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317
Patrick McHardy6c813a72006-08-06 22:22:47 -07001318 if (rt == &ip6_null_entry)
1319 return -ENOENT;
1320
Thomas Grafc71099a2006-08-04 23:20:06 -07001321 table = rt->rt6i_table;
1322 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001323
Thomas Graf86872cb2006-08-22 00:01:08 -07001324 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 dst_release(&rt->u.dst);
1326
Thomas Grafc71099a2006-08-04 23:20:06 -07001327 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328
1329 return err;
1330}
1331
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001332int ip6_del_rt(struct rt6_info *rt)
1333{
Thomas Graf86872cb2006-08-22 00:01:08 -07001334 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001335}
1336
Thomas Graf86872cb2006-08-22 00:01:08 -07001337static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338{
Thomas Grafc71099a2006-08-04 23:20:06 -07001339 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 struct fib6_node *fn;
1341 struct rt6_info *rt;
1342 int err = -ESRCH;
1343
Thomas Graf86872cb2006-08-22 00:01:08 -07001344 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001345 if (table == NULL)
1346 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347
Thomas Grafc71099a2006-08-04 23:20:06 -07001348 read_lock_bh(&table->tb6_lock);
1349
1350 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001351 &cfg->fc_dst, cfg->fc_dst_len,
1352 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001353
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001355 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001356 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001358 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001360 if (cfg->fc_flags & RTF_GATEWAY &&
1361 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001363 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 continue;
1365 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001366 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
Thomas Graf86872cb2006-08-22 00:01:08 -07001368 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 }
1370 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001371 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372
1373 return err;
1374}
1375
1376/*
1377 * Handle redirects
1378 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001379struct ip6rd_flowi {
1380 struct flowi fl;
1381 struct in6_addr gateway;
1382};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001384static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1385 struct flowi *fl,
1386 int flags)
1387{
1388 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1389 struct rt6_info *rt;
1390 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001391
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001393 * Get the "current" route for this destination and
1394 * check if the redirect has come from approriate router.
1395 *
1396 * RFC 2461 specifies that redirects should only be
1397 * accepted if they come from the nexthop to the target.
1398 * Due to the way the routes are chosen, this notion
1399 * is a bit fuzzy and one might need to check all possible
1400 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001401 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402
Thomas Grafc71099a2006-08-04 23:20:06 -07001403 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001404 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001405restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001406 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001407 /*
1408 * Current route is on-link; redirect is always invalid.
1409 *
1410 * Seems, previous statement is not true. It could
1411 * be node, which looks for us as on-link (f.e. proxy ndisc)
1412 * But then router serving it might decide, that we should
1413 * know truth 8)8) --ANK (980726).
1414 */
1415 if (rt6_check_expired(rt))
1416 continue;
1417 if (!(rt->rt6i_flags & RTF_GATEWAY))
1418 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001419 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001420 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001421 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001422 continue;
1423 break;
1424 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001425
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001426 if (!rt)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001427 rt = &ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001428 BACKTRACK(&fl->fl6_src);
1429out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001430 dst_hold(&rt->u.dst);
1431
1432 read_unlock_bh(&table->tb6_lock);
1433
1434 return rt;
1435};
1436
1437static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1438 struct in6_addr *src,
1439 struct in6_addr *gateway,
1440 struct net_device *dev)
1441{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001442 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001443 struct ip6rd_flowi rdfl = {
1444 .fl = {
1445 .oif = dev->ifindex,
1446 .nl_u = {
1447 .ip6_u = {
1448 .daddr = *dest,
1449 .saddr = *src,
1450 },
1451 },
1452 },
1453 .gateway = *gateway,
1454 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001455
1456 if (rt6_need_strict(dest))
1457 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001458
1459 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1460}
1461
1462void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1463 struct in6_addr *saddr,
1464 struct neighbour *neigh, u8 *lladdr, int on_link)
1465{
1466 struct rt6_info *rt, *nrt = NULL;
1467 struct netevent_redirect netevent;
1468
1469 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1470
1471 if (rt == &ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 if (net_ratelimit())
1473 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1474 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001475 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 }
1477
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 /*
1479 * We have finally decided to accept it.
1480 */
1481
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001482 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1484 NEIGH_UPDATE_F_OVERRIDE|
1485 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1486 NEIGH_UPDATE_F_ISROUTER))
1487 );
1488
1489 /*
1490 * Redirect received -> path was valid.
1491 * Look, redirects are sent only in response to data packets,
1492 * so that this nexthop apparently is reachable. --ANK
1493 */
1494 dst_confirm(&rt->u.dst);
1495
1496 /* Duplicate redirect: silently ignore. */
1497 if (neigh == rt->u.dst.neighbour)
1498 goto out;
1499
1500 nrt = ip6_rt_copy(rt);
1501 if (nrt == NULL)
1502 goto out;
1503
1504 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1505 if (on_link)
1506 nrt->rt6i_flags &= ~RTF_GATEWAY;
1507
1508 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1509 nrt->rt6i_dst.plen = 128;
1510 nrt->u.dst.flags |= DST_HOST;
1511
1512 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1513 nrt->rt6i_nexthop = neigh_clone(neigh);
1514 /* Reset pmtu, it may be better */
1515 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1516 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1517
Thomas Graf40e22e82006-08-22 00:00:45 -07001518 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 goto out;
1520
Tom Tucker8d717402006-07-30 20:43:36 -07001521 netevent.old = &rt->u.dst;
1522 netevent.new = &nrt->u.dst;
1523 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1524
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001526 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 return;
1528 }
1529
1530out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001531 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 return;
1533}
1534
1535/*
1536 * Handle ICMP "packet too big" messages
1537 * i.e. Path MTU discovery
1538 */
1539
1540void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1541 struct net_device *dev, u32 pmtu)
1542{
1543 struct rt6_info *rt, *nrt;
1544 int allfrag = 0;
1545
1546 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1547 if (rt == NULL)
1548 return;
1549
1550 if (pmtu >= dst_mtu(&rt->u.dst))
1551 goto out;
1552
1553 if (pmtu < IPV6_MIN_MTU) {
1554 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001555 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556 * MTU (1280) and a fragment header should always be included
1557 * after a node receiving Too Big message reporting PMTU is
1558 * less than the IPv6 Minimum Link MTU.
1559 */
1560 pmtu = IPV6_MIN_MTU;
1561 allfrag = 1;
1562 }
1563
1564 /* New mtu received -> path was valid.
1565 They are sent only in response to data packets,
1566 so that this nexthop apparently is reachable. --ANK
1567 */
1568 dst_confirm(&rt->u.dst);
1569
1570 /* Host route. If it is static, it would be better
1571 not to override it, but add new one, so that
1572 when cache entry will expire old pmtu
1573 would return automatically.
1574 */
1575 if (rt->rt6i_flags & RTF_CACHE) {
1576 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1577 if (allfrag)
1578 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1579 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1580 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1581 goto out;
1582 }
1583
1584 /* Network route.
1585 Two cases are possible:
1586 1. It is connected route. Action: COW
1587 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1588 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001589 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001590 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001591 else
1592 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001593
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001594 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001595 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1596 if (allfrag)
1597 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1598
1599 /* According to RFC 1981, detecting PMTU increase shouldn't be
1600 * happened within 5 mins, the recommended timer is 10 mins.
1601 * Here this route expiration time is set to ip6_rt_mtu_expires
1602 * which is 10 mins. After 10 mins the decreased pmtu is expired
1603 * and detecting PMTU increase will be automatically happened.
1604 */
1605 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1606 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1607
Thomas Graf40e22e82006-08-22 00:00:45 -07001608 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610out:
1611 dst_release(&rt->u.dst);
1612}
1613
1614/*
1615 * Misc support functions
1616 */
1617
1618static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1619{
1620 struct rt6_info *rt = ip6_dst_alloc();
1621
1622 if (rt) {
1623 rt->u.dst.input = ort->u.dst.input;
1624 rt->u.dst.output = ort->u.dst.output;
1625
1626 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001627 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 rt->u.dst.dev = ort->u.dst.dev;
1629 if (rt->u.dst.dev)
1630 dev_hold(rt->u.dst.dev);
1631 rt->rt6i_idev = ort->rt6i_idev;
1632 if (rt->rt6i_idev)
1633 in6_dev_hold(rt->rt6i_idev);
1634 rt->u.dst.lastuse = jiffies;
1635 rt->rt6i_expires = 0;
1636
1637 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1638 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1639 rt->rt6i_metric = 0;
1640
1641 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1642#ifdef CONFIG_IPV6_SUBTREES
1643 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1644#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001645 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 }
1647 return rt;
1648}
1649
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001650#ifdef CONFIG_IPV6_ROUTE_INFO
1651static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1652 struct in6_addr *gwaddr, int ifindex)
1653{
1654 struct fib6_node *fn;
1655 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001656 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001657
Thomas Grafc71099a2006-08-04 23:20:06 -07001658 table = fib6_get_table(RT6_TABLE_INFO);
1659 if (table == NULL)
1660 return NULL;
1661
1662 write_lock_bh(&table->tb6_lock);
1663 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001664 if (!fn)
1665 goto out;
1666
Eric Dumazet7cc48262007-02-09 16:22:57 -08001667 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001668 if (rt->rt6i_dev->ifindex != ifindex)
1669 continue;
1670 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1671 continue;
1672 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1673 continue;
1674 dst_hold(&rt->u.dst);
1675 break;
1676 }
1677out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001678 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001679 return rt;
1680}
1681
1682static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1683 struct in6_addr *gwaddr, int ifindex,
1684 unsigned pref)
1685{
Thomas Graf86872cb2006-08-22 00:01:08 -07001686 struct fib6_config cfg = {
1687 .fc_table = RT6_TABLE_INFO,
1688 .fc_metric = 1024,
1689 .fc_ifindex = ifindex,
1690 .fc_dst_len = prefixlen,
1691 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1692 RTF_UP | RTF_PREF(pref),
1693 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001694
Thomas Graf86872cb2006-08-22 00:01:08 -07001695 ipv6_addr_copy(&cfg.fc_dst, prefix);
1696 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1697
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001698 /* We should treat it as a default route if prefix length is 0. */
1699 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001700 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001701
Thomas Graf86872cb2006-08-22 00:01:08 -07001702 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001703
1704 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1705}
1706#endif
1707
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001709{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001711 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712
Thomas Grafc71099a2006-08-04 23:20:06 -07001713 table = fib6_get_table(RT6_TABLE_DFLT);
1714 if (table == NULL)
1715 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716
Thomas Grafc71099a2006-08-04 23:20:06 -07001717 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001718 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001720 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1722 break;
1723 }
1724 if (rt)
1725 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001726 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 return rt;
1728}
1729
1730struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001731 struct net_device *dev,
1732 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733{
Thomas Graf86872cb2006-08-22 00:01:08 -07001734 struct fib6_config cfg = {
1735 .fc_table = RT6_TABLE_DFLT,
1736 .fc_metric = 1024,
1737 .fc_ifindex = dev->ifindex,
1738 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1739 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1740 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741
Thomas Graf86872cb2006-08-22 00:01:08 -07001742 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743
Thomas Graf86872cb2006-08-22 00:01:08 -07001744 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 return rt6_get_dflt_router(gwaddr, dev);
1747}
1748
1749void rt6_purge_dflt_routers(void)
1750{
1751 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001752 struct fib6_table *table;
1753
1754 /* NOTE: Keep consistent with rt6_get_dflt_router */
1755 table = fib6_get_table(RT6_TABLE_DFLT);
1756 if (table == NULL)
1757 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758
1759restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001760 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001761 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1763 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001764 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001765 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766 goto restart;
1767 }
1768 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001769 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770}
1771
Thomas Graf86872cb2006-08-22 00:01:08 -07001772static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1773 struct fib6_config *cfg)
1774{
1775 memset(cfg, 0, sizeof(*cfg));
1776
1777 cfg->fc_table = RT6_TABLE_MAIN;
1778 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1779 cfg->fc_metric = rtmsg->rtmsg_metric;
1780 cfg->fc_expires = rtmsg->rtmsg_info;
1781 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1782 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1783 cfg->fc_flags = rtmsg->rtmsg_flags;
1784
1785 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1786 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1787 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1788}
1789
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1791{
Thomas Graf86872cb2006-08-22 00:01:08 -07001792 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 struct in6_rtmsg rtmsg;
1794 int err;
1795
1796 switch(cmd) {
1797 case SIOCADDRT: /* Add a route */
1798 case SIOCDELRT: /* Delete a route */
1799 if (!capable(CAP_NET_ADMIN))
1800 return -EPERM;
1801 err = copy_from_user(&rtmsg, arg,
1802 sizeof(struct in6_rtmsg));
1803 if (err)
1804 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001805
1806 rtmsg_to_fib6_config(&rtmsg, &cfg);
1807
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808 rtnl_lock();
1809 switch (cmd) {
1810 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001811 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 break;
1813 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001814 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 break;
1816 default:
1817 err = -EINVAL;
1818 }
1819 rtnl_unlock();
1820
1821 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001822 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823
1824 return -EINVAL;
1825}
1826
1827/*
1828 * Drop the packet on the floor
1829 */
1830
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001831static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1832 int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001834 int type;
1835 switch (ipstats_mib_noroutes) {
1836 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001837 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001838 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1839 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1840 break;
1841 }
1842 /* FALLTHROUGH */
1843 case IPSTATS_MIB_OUTNOROUTES:
1844 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1845 break;
1846 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001847 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 kfree_skb(skb);
1849 return 0;
1850}
1851
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001852static int ip6_pkt_discard(struct sk_buff *skb)
1853{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001854 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001855}
1856
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001857static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858{
1859 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001860 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861}
1862
David S. Miller6723ab52006-10-18 21:20:57 -07001863#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1864
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001865static int ip6_pkt_prohibit(struct sk_buff *skb)
1866{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001867 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001868}
1869
1870static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1871{
1872 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001873 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001874}
1875
1876static int ip6_pkt_blk_hole(struct sk_buff *skb)
1877{
1878 kfree_skb(skb);
1879 return 0;
1880}
1881
David S. Miller6723ab52006-10-18 21:20:57 -07001882#endif
1883
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884/*
1885 * Allocate a dst for local (unicast / anycast) address.
1886 */
1887
1888struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1889 const struct in6_addr *addr,
1890 int anycast)
1891{
1892 struct rt6_info *rt = ip6_dst_alloc();
1893
1894 if (rt == NULL)
1895 return ERR_PTR(-ENOMEM);
1896
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001897 dev_hold(init_net.loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898 in6_dev_hold(idev);
1899
1900 rt->u.dst.flags = DST_HOST;
1901 rt->u.dst.input = ip6_input;
1902 rt->u.dst.output = ip6_output;
Eric W. Biederman2774c7a2007-09-26 22:10:56 -07001903 rt->rt6i_dev = init_net.loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 rt->rt6i_idev = idev;
1905 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1906 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1907 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1908 rt->u.dst.obsolete = -1;
1909
1910 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001911 if (anycast)
1912 rt->rt6i_flags |= RTF_ANYCAST;
1913 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 rt->rt6i_flags |= RTF_LOCAL;
1915 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1916 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001917 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918 return ERR_PTR(-ENOMEM);
1919 }
1920
1921 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1922 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001923 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924
1925 atomic_set(&rt->u.dst.__refcnt, 1);
1926
1927 return rt;
1928}
1929
1930static int fib6_ifdown(struct rt6_info *rt, void *arg)
1931{
1932 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1933 rt != &ip6_null_entry) {
1934 RT6_TRACE("deleted by ifdown %p\n", rt);
1935 return -1;
1936 }
1937 return 0;
1938}
1939
1940void rt6_ifdown(struct net_device *dev)
1941{
Thomas Grafc71099a2006-08-04 23:20:06 -07001942 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943}
1944
1945struct rt6_mtu_change_arg
1946{
1947 struct net_device *dev;
1948 unsigned mtu;
1949};
1950
1951static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1952{
1953 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1954 struct inet6_dev *idev;
1955
1956 /* In IPv6 pmtu discovery is not optional,
1957 so that RTAX_MTU lock cannot disable it.
1958 We still use this lock to block changes
1959 caused by addrconf/ndisc.
1960 */
1961
1962 idev = __in6_dev_get(arg->dev);
1963 if (idev == NULL)
1964 return 0;
1965
1966 /* For administrative MTU increase, there is no way to discover
1967 IPv6 PMTU increase, so PMTU increase should be updated here.
1968 Since RFC 1981 doesn't include administrative MTU increase
1969 update PMTU increase is a MUST. (i.e. jumbo frame)
1970 */
1971 /*
1972 If new MTU is less than route PMTU, this new MTU will be the
1973 lowest MTU in the path, update the route PMTU to reflect PMTU
1974 decreases; if new MTU is greater than route PMTU, and the
1975 old MTU is the lowest MTU in the path, update the route PMTU
1976 to reflect the increase. In this case if the other nodes' MTU
1977 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1978 PMTU discouvery.
1979 */
1980 if (rt->rt6i_dev == arg->dev &&
1981 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001982 (dst_mtu(&rt->u.dst) > arg->mtu ||
1983 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001984 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Simon Arlott566cfd82007-07-26 00:09:55 -07001986 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1987 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988 return 0;
1989}
1990
1991void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1992{
Thomas Grafc71099a2006-08-04 23:20:06 -07001993 struct rt6_mtu_change_arg arg = {
1994 .dev = dev,
1995 .mtu = mtu,
1996 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997
Thomas Grafc71099a2006-08-04 23:20:06 -07001998 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999}
2000
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002001static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002002 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002003 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002004 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002005 [RTA_PRIORITY] = { .type = NLA_U32 },
2006 [RTA_METRICS] = { .type = NLA_NESTED },
2007};
2008
2009static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2010 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011{
Thomas Graf86872cb2006-08-22 00:01:08 -07002012 struct rtmsg *rtm;
2013 struct nlattr *tb[RTA_MAX+1];
2014 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015
Thomas Graf86872cb2006-08-22 00:01:08 -07002016 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2017 if (err < 0)
2018 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019
Thomas Graf86872cb2006-08-22 00:01:08 -07002020 err = -EINVAL;
2021 rtm = nlmsg_data(nlh);
2022 memset(cfg, 0, sizeof(*cfg));
2023
2024 cfg->fc_table = rtm->rtm_table;
2025 cfg->fc_dst_len = rtm->rtm_dst_len;
2026 cfg->fc_src_len = rtm->rtm_src_len;
2027 cfg->fc_flags = RTF_UP;
2028 cfg->fc_protocol = rtm->rtm_protocol;
2029
2030 if (rtm->rtm_type == RTN_UNREACHABLE)
2031 cfg->fc_flags |= RTF_REJECT;
2032
2033 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2034 cfg->fc_nlinfo.nlh = nlh;
2035
2036 if (tb[RTA_GATEWAY]) {
2037 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2038 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002040
2041 if (tb[RTA_DST]) {
2042 int plen = (rtm->rtm_dst_len + 7) >> 3;
2043
2044 if (nla_len(tb[RTA_DST]) < plen)
2045 goto errout;
2046
2047 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002049
2050 if (tb[RTA_SRC]) {
2051 int plen = (rtm->rtm_src_len + 7) >> 3;
2052
2053 if (nla_len(tb[RTA_SRC]) < plen)
2054 goto errout;
2055
2056 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002058
2059 if (tb[RTA_OIF])
2060 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2061
2062 if (tb[RTA_PRIORITY])
2063 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2064
2065 if (tb[RTA_METRICS]) {
2066 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2067 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002069
2070 if (tb[RTA_TABLE])
2071 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2072
2073 err = 0;
2074errout:
2075 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076}
2077
Thomas Grafc127ea22007-03-22 11:58:32 -07002078static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079{
Thomas Graf86872cb2006-08-22 00:01:08 -07002080 struct fib6_config cfg;
2081 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082
Thomas Graf86872cb2006-08-22 00:01:08 -07002083 err = rtm_to_fib6_config(skb, nlh, &cfg);
2084 if (err < 0)
2085 return err;
2086
2087 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088}
2089
Thomas Grafc127ea22007-03-22 11:58:32 -07002090static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091{
Thomas Graf86872cb2006-08-22 00:01:08 -07002092 struct fib6_config cfg;
2093 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094
Thomas Graf86872cb2006-08-22 00:01:08 -07002095 err = rtm_to_fib6_config(skb, nlh, &cfg);
2096 if (err < 0)
2097 return err;
2098
2099 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100}
2101
Thomas Graf339bf982006-11-10 14:10:15 -08002102static inline size_t rt6_nlmsg_size(void)
2103{
2104 return NLMSG_ALIGN(sizeof(struct rtmsg))
2105 + nla_total_size(16) /* RTA_SRC */
2106 + nla_total_size(16) /* RTA_DST */
2107 + nla_total_size(16) /* RTA_GATEWAY */
2108 + nla_total_size(16) /* RTA_PREFSRC */
2109 + nla_total_size(4) /* RTA_TABLE */
2110 + nla_total_size(4) /* RTA_IIF */
2111 + nla_total_size(4) /* RTA_OIF */
2112 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002113 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002114 + nla_total_size(sizeof(struct rta_cacheinfo));
2115}
2116
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002118 struct in6_addr *dst, struct in6_addr *src,
2119 int iif, int type, u32 pid, u32 seq,
2120 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121{
2122 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002123 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002124 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002125 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126
2127 if (prefix) { /* user wants prefix routes only */
2128 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2129 /* success since this is not a prefix route */
2130 return 1;
2131 }
2132 }
2133
Thomas Graf2d7202b2006-08-22 00:01:27 -07002134 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2135 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002136 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002137
2138 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 rtm->rtm_family = AF_INET6;
2140 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2141 rtm->rtm_src_len = rt->rt6i_src.plen;
2142 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002143 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002144 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002145 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002146 table = RT6_TABLE_UNSPEC;
2147 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002148 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 if (rt->rt6i_flags&RTF_REJECT)
2150 rtm->rtm_type = RTN_UNREACHABLE;
2151 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2152 rtm->rtm_type = RTN_LOCAL;
2153 else
2154 rtm->rtm_type = RTN_UNICAST;
2155 rtm->rtm_flags = 0;
2156 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2157 rtm->rtm_protocol = rt->rt6i_protocol;
2158 if (rt->rt6i_flags&RTF_DYNAMIC)
2159 rtm->rtm_protocol = RTPROT_REDIRECT;
2160 else if (rt->rt6i_flags & RTF_ADDRCONF)
2161 rtm->rtm_protocol = RTPROT_KERNEL;
2162 else if (rt->rt6i_flags&RTF_DEFAULT)
2163 rtm->rtm_protocol = RTPROT_RA;
2164
2165 if (rt->rt6i_flags&RTF_CACHE)
2166 rtm->rtm_flags |= RTM_F_CLONED;
2167
2168 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002169 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002170 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002172 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173#ifdef CONFIG_IPV6_SUBTREES
2174 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002175 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002176 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002178 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179#endif
2180 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002181 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182 else if (dst) {
2183 struct in6_addr saddr_buf;
2184 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002185 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002187
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002189 goto nla_put_failure;
2190
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002192 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2193
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002195 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2196
2197 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002198
2199 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2200 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2201 expires, rt->u.dst.error) < 0)
2202 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203
Thomas Graf2d7202b2006-08-22 00:01:27 -07002204 return nlmsg_end(skb, nlh);
2205
2206nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002207 nlmsg_cancel(skb, nlh);
2208 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209}
2210
Patrick McHardy1b43af52006-08-10 23:11:17 -07002211int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212{
2213 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2214 int prefix;
2215
Thomas Graf2d7202b2006-08-22 00:01:27 -07002216 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2217 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2219 } else
2220 prefix = 0;
2221
2222 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2223 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002224 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225}
2226
Thomas Grafc127ea22007-03-22 11:58:32 -07002227static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228{
Thomas Grafab364a62006-08-22 00:01:47 -07002229 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002231 struct sk_buff *skb;
2232 struct rtmsg *rtm;
2233 struct flowi fl;
2234 int err, iif = 0;
2235
2236 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2237 if (err < 0)
2238 goto errout;
2239
2240 err = -EINVAL;
2241 memset(&fl, 0, sizeof(fl));
2242
2243 if (tb[RTA_SRC]) {
2244 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2245 goto errout;
2246
2247 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2248 }
2249
2250 if (tb[RTA_DST]) {
2251 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2252 goto errout;
2253
2254 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2255 }
2256
2257 if (tb[RTA_IIF])
2258 iif = nla_get_u32(tb[RTA_IIF]);
2259
2260 if (tb[RTA_OIF])
2261 fl.oif = nla_get_u32(tb[RTA_OIF]);
2262
2263 if (iif) {
2264 struct net_device *dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002265 dev = __dev_get_by_index(&init_net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002266 if (!dev) {
2267 err = -ENODEV;
2268 goto errout;
2269 }
2270 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271
2272 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002273 if (skb == NULL) {
2274 err = -ENOBUFS;
2275 goto errout;
2276 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277
2278 /* Reserve room for dummy headers, this skb can pass
2279 through good chunk of routing engine.
2280 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002281 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2283
Thomas Grafab364a62006-08-22 00:01:47 -07002284 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 skb->dst = &rt->u.dst;
2286
Thomas Grafab364a62006-08-22 00:01:47 -07002287 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002289 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002291 kfree_skb(skb);
2292 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 }
2294
Thomas Graf2942e902006-08-15 00:30:25 -07002295 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002296errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002297 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002298}
2299
Thomas Graf86872cb2006-08-22 00:01:08 -07002300void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301{
2302 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002303 u32 pid = 0, seq = 0;
2304 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002305 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306
Thomas Graf86872cb2006-08-22 00:01:08 -07002307 if (info) {
2308 pid = info->pid;
2309 nlh = info->nlh;
2310 if (nlh)
2311 seq = nlh->nlmsg_seq;
2312 }
2313
Thomas Graf339bf982006-11-10 14:10:15 -08002314 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002315 if (skb == NULL)
2316 goto errout;
2317
2318 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002319 if (err < 0) {
2320 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2321 WARN_ON(err == -EMSGSIZE);
2322 kfree_skb(skb);
2323 goto errout;
2324 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002325 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2326errout:
2327 if (err < 0)
2328 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329}
2330
2331/*
2332 * /proc
2333 */
2334
2335#ifdef CONFIG_PROC_FS
2336
2337#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2338
2339struct rt6_proc_arg
2340{
2341 char *buffer;
2342 int offset;
2343 int length;
2344 int skip;
2345 int len;
2346};
2347
2348static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2349{
2350 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351
2352 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2353 arg->skip++;
2354 return 0;
2355 }
2356
2357 if (arg->len >= arg->length)
2358 return 0;
2359
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002360 arg->len += sprintf(arg->buffer + arg->len,
2361 NIP6_SEQFMT " %02x ",
2362 NIP6(rt->rt6i_dst.addr),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 rt->rt6i_dst.plen);
2364
2365#ifdef CONFIG_IPV6_SUBTREES
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002366 arg->len += sprintf(arg->buffer + arg->len,
2367 NIP6_SEQFMT " %02x ",
2368 NIP6(rt->rt6i_src.addr),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 rt->rt6i_src.plen);
2370#else
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002371 arg->len += sprintf(arg->buffer + arg->len,
2372 "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373#endif
2374
2375 if (rt->rt6i_nexthop) {
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002376 arg->len += sprintf(arg->buffer + arg->len,
2377 NIP6_SEQFMT,
2378 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379 } else {
YOSHIFUJI Hideaki33e93c92006-10-14 02:00:56 +09002380 arg->len += sprintf(arg->buffer + arg->len,
2381 "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 }
2383 arg->len += sprintf(arg->buffer + arg->len,
2384 " %08x %08x %08x %08x %8s\n",
2385 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002386 rt->u.dst.__use, rt->rt6i_flags,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2388 return 0;
2389}
2390
2391static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2392{
Thomas Grafc71099a2006-08-04 23:20:06 -07002393 struct rt6_proc_arg arg = {
2394 .buffer = buffer,
2395 .offset = offset,
2396 .length = length,
2397 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398
Thomas Grafc71099a2006-08-04 23:20:06 -07002399 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400
2401 *start = buffer;
2402 if (offset)
2403 *start += offset % RT6_INFO_LEN;
2404
2405 arg.len -= offset % RT6_INFO_LEN;
2406
2407 if (arg.len > length)
2408 arg.len = length;
2409 if (arg.len < 0)
2410 arg.len = 0;
2411
2412 return arg.len;
2413}
2414
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2416{
2417 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2418 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2419 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2420 rt6_stats.fib_rt_cache,
2421 atomic_read(&ip6_dst_ops.entries),
2422 rt6_stats.fib_discarded_routes);
2423
2424 return 0;
2425}
2426
2427static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2428{
2429 return single_open(file, rt6_stats_seq_show, NULL);
2430}
2431
Arjan van de Ven9a321442007-02-12 00:55:35 -08002432static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 .owner = THIS_MODULE,
2434 .open = rt6_stats_seq_open,
2435 .read = seq_read,
2436 .llseek = seq_lseek,
2437 .release = single_release,
2438};
2439#endif /* CONFIG_PROC_FS */
2440
2441#ifdef CONFIG_SYSCTL
2442
2443static int flush_delay;
2444
2445static
2446int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2447 void __user *buffer, size_t *lenp, loff_t *ppos)
2448{
2449 if (write) {
2450 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2451 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2452 return 0;
2453 } else
2454 return -EINVAL;
2455}
2456
2457ctl_table ipv6_route_table[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002458 {
2459 .ctl_name = NET_IPV6_ROUTE_FLUSH,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 .procname = "flush",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002461 .data = &flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002463 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002464 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002465 },
2466 {
2467 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2468 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002469 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 .maxlen = sizeof(int),
2471 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002472 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 },
2474 {
2475 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2476 .procname = "max_size",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002477 .data = &ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 .maxlen = sizeof(int),
2479 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002480 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481 },
2482 {
2483 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2484 .procname = "gc_min_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002485 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 .maxlen = sizeof(int),
2487 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002488 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 .strategy = &sysctl_jiffies,
2490 },
2491 {
2492 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2493 .procname = "gc_timeout",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002494 .data = &ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 .maxlen = sizeof(int),
2496 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002497 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 .strategy = &sysctl_jiffies,
2499 },
2500 {
2501 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2502 .procname = "gc_interval",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002503 .data = &ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504 .maxlen = sizeof(int),
2505 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002506 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507 .strategy = &sysctl_jiffies,
2508 },
2509 {
2510 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2511 .procname = "gc_elasticity",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002512 .data = &ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 .maxlen = sizeof(int),
2514 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002515 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 .strategy = &sysctl_jiffies,
2517 },
2518 {
2519 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2520 .procname = "mtu_expires",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002521 .data = &ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 .maxlen = sizeof(int),
2523 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002524 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525 .strategy = &sysctl_jiffies,
2526 },
2527 {
2528 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2529 .procname = "min_adv_mss",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002530 .data = &ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 .maxlen = sizeof(int),
2532 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002533 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534 .strategy = &sysctl_jiffies,
2535 },
2536 {
2537 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2538 .procname = "gc_min_interval_ms",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002539 .data = &ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 .maxlen = sizeof(int),
2541 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002542 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 .strategy = &sysctl_ms_jiffies,
2544 },
2545 { .ctl_name = 0 }
2546};
2547
2548#endif
2549
2550void __init ip6_route_init(void)
2551{
YOSHIFUJI Hideaki952a10b2007-04-21 20:13:44 +09002552#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 struct proc_dir_entry *p;
YOSHIFUJI Hideaki952a10b2007-04-21 20:13:44 +09002554#endif
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002555 ip6_dst_ops.kmem_cachep =
2556 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002557 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
David S. Miller14e50e52007-05-24 18:17:54 -07002558 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2559
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 fib6_init();
2561#ifdef CONFIG_PROC_FS
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002562 p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 if (p)
2564 p->owner = THIS_MODULE;
2565
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002566 proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567#endif
2568#ifdef CONFIG_XFRM
2569 xfrm6_init();
2570#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002571#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2572 fib6_rules_init();
2573#endif
Thomas Grafc127ea22007-03-22 11:58:32 -07002574
2575 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
2576 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
2577 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578}
2579
2580void ip6_route_cleanup(void)
2581{
Thomas Graf101367c2006-08-04 03:39:02 -07002582#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2583 fib6_rules_cleanup();
2584#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585#ifdef CONFIG_PROC_FS
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002586 proc_net_remove(&init_net, "ipv6_route");
2587 proc_net_remove(&init_net, "rt6_stats");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588#endif
2589#ifdef CONFIG_XFRM
2590 xfrm6_fini();
2591#endif
2592 rt6_ifdown(NULL);
2593 fib6_gc_cleanup();
2594 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2595}