blob: 8f954c1e961faab83a78e9abd96cf48d8ff4e1da [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100static struct dst_ops ip6_dst_ops = {
101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu862b82c2007-11-13 21:43:11 -0800111 .local_out = ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800113 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114};
115
David S. Miller14e50e52007-05-24 18:17:54 -0700116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800127 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700128};
129
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800130static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 .u = {
132 .dst = {
133 .__refcnt = ATOMIC_INIT(1),
134 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 .obsolete = -1,
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
140 .ops = &ip6_dst_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 }
142 },
143 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
144 .rt6i_metric = ~(u32) 0,
145 .rt6i_ref = ATOMIC_INIT(1),
146};
147
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800148struct rt6_info *ip6_null_entry;
149
Thomas Graf101367c2006-08-04 03:39:02 -0700150#ifdef CONFIG_IPV6_MULTIPLE_TABLES
151
David S. Miller6723ab52006-10-18 21:20:57 -0700152static int ip6_pkt_prohibit(struct sk_buff *skb);
153static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700154
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800155struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700156 .u = {
157 .dst = {
158 .__refcnt = ATOMIC_INIT(1),
159 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700160 .obsolete = -1,
161 .error = -EACCES,
162 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700163 .input = ip6_pkt_prohibit,
164 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700165 .ops = &ip6_dst_ops,
Thomas Graf101367c2006-08-04 03:39:02 -0700166 }
167 },
168 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
169 .rt6i_metric = ~(u32) 0,
170 .rt6i_ref = ATOMIC_INIT(1),
171};
172
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800173struct rt6_info *ip6_prohibit_entry;
174
175static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700176 .u = {
177 .dst = {
178 .__refcnt = ATOMIC_INIT(1),
179 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700180 .obsolete = -1,
181 .error = -EINVAL,
182 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800183 .input = dst_discard,
184 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700185 .ops = &ip6_dst_ops,
Thomas Graf101367c2006-08-04 03:39:02 -0700186 }
187 },
188 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
189 .rt6i_metric = ~(u32) 0,
190 .rt6i_ref = ATOMIC_INIT(1),
191};
192
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800193struct rt6_info *ip6_blk_hole_entry;
194
Thomas Graf101367c2006-08-04 03:39:02 -0700195#endif
196
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197/* allocate dst with ip6_dst_ops */
198static __inline__ struct rt6_info *ip6_dst_alloc(void)
199{
200 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
201}
202
203static void ip6_dst_destroy(struct dst_entry *dst)
204{
205 struct rt6_info *rt = (struct rt6_info *)dst;
206 struct inet6_dev *idev = rt->rt6i_idev;
207
208 if (idev != NULL) {
209 rt->rt6i_idev = NULL;
210 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900211 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212}
213
214static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
215 int how)
216{
217 struct rt6_info *rt = (struct rt6_info *)dst;
218 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800219 struct net_device *loopback_dev =
220 dev->nd_net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800222 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
223 struct inet6_dev *loopback_idev =
224 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 if (loopback_idev != NULL) {
226 rt->rt6i_idev = loopback_idev;
227 in6_dev_put(idev);
228 }
229 }
230}
231
232static __inline__ int rt6_check_expired(const struct rt6_info *rt)
233{
234 return (rt->rt6i_flags & RTF_EXPIRES &&
235 time_after(jiffies, rt->rt6i_expires));
236}
237
Thomas Grafc71099a2006-08-04 23:20:06 -0700238static inline int rt6_need_strict(struct in6_addr *daddr)
239{
240 return (ipv6_addr_type(daddr) &
241 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
242}
243
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700245 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 */
247
248static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
249 int oif,
250 int strict)
251{
252 struct rt6_info *local = NULL;
253 struct rt6_info *sprt;
254
255 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800256 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 struct net_device *dev = sprt->rt6i_dev;
258 if (dev->ifindex == oif)
259 return sprt;
260 if (dev->flags & IFF_LOOPBACK) {
261 if (sprt->rt6i_idev == NULL ||
262 sprt->rt6i_idev->dev->ifindex != oif) {
263 if (strict && oif)
264 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900265 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 local->rt6i_idev->dev->ifindex == oif))
267 continue;
268 }
269 local = sprt;
270 }
271 }
272
273 if (local)
274 return local;
275
276 if (strict)
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800277 return ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 }
279 return rt;
280}
281
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800282#ifdef CONFIG_IPV6_ROUTER_PREF
283static void rt6_probe(struct rt6_info *rt)
284{
285 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
286 /*
287 * Okay, this does not seem to be appropriate
288 * for now, however, we need to check if it
289 * is really so; aka Router Reachability Probing.
290 *
291 * Router Reachability Probe MUST be rate-limited
292 * to no more than one per minute.
293 */
294 if (!neigh || (neigh->nud_state & NUD_VALID))
295 return;
296 read_lock_bh(&neigh->lock);
297 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800298 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800299 struct in6_addr mcaddr;
300 struct in6_addr *target;
301
302 neigh->updated = jiffies;
303 read_unlock_bh(&neigh->lock);
304
305 target = (struct in6_addr *)&neigh->primary_key;
306 addrconf_addr_solict_mult(target, &mcaddr);
307 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
308 } else
309 read_unlock_bh(&neigh->lock);
310}
311#else
312static inline void rt6_probe(struct rt6_info *rt)
313{
314 return;
315}
316#endif
317
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800319 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700321static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800323 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700324 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800325 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700326 if ((dev->flags & IFF_LOOPBACK) &&
327 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
328 return 1;
329 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330}
331
Dave Jonesb6f99a22007-03-22 12:27:49 -0700332static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800334 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800335 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700336 if (rt->rt6i_flags & RTF_NONEXTHOP ||
337 !(rt->rt6i_flags & RTF_GATEWAY))
338 m = 1;
339 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800340 read_lock_bh(&neigh->lock);
341 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700342 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800343#ifdef CONFIG_IPV6_ROUTER_PREF
344 else if (neigh->nud_state & NUD_FAILED)
345 m = 0;
346#endif
347 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800348 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800349 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800350 } else
351 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800352 return m;
353}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800355static int rt6_score_route(struct rt6_info *rt, int oif,
356 int strict)
357{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700358 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900359
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700360 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700361 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800362 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800363#ifdef CONFIG_IPV6_ROUTER_PREF
364 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
365#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700366 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800367 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800368 return -1;
369 return m;
370}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371
David S. Millerf11e6652007-03-24 20:36:25 -0700372static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
373 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800374{
David S. Millerf11e6652007-03-24 20:36:25 -0700375 int m;
376
377 if (rt6_check_expired(rt))
378 goto out;
379
380 m = rt6_score_route(rt, oif, strict);
381 if (m < 0)
382 goto out;
383
384 if (m > *mpri) {
385 if (strict & RT6_LOOKUP_F_REACHABLE)
386 rt6_probe(match);
387 *mpri = m;
388 match = rt;
389 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
390 rt6_probe(rt);
391 }
392
393out:
394 return match;
395}
396
397static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
398 struct rt6_info *rr_head,
399 u32 metric, int oif, int strict)
400{
401 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800402 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403
David S. Millerf11e6652007-03-24 20:36:25 -0700404 match = NULL;
405 for (rt = rr_head; rt && rt->rt6i_metric == metric;
406 rt = rt->u.dst.rt6_next)
407 match = find_match(rt, oif, strict, &mpri, match);
408 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
409 rt = rt->u.dst.rt6_next)
410 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800411
David S. Millerf11e6652007-03-24 20:36:25 -0700412 return match;
413}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800414
David S. Millerf11e6652007-03-24 20:36:25 -0700415static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
416{
417 struct rt6_info *match, *rt0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
David S. Millerf11e6652007-03-24 20:36:25 -0700419 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
420 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
David S. Millerf11e6652007-03-24 20:36:25 -0700422 rt0 = fn->rr_ptr;
423 if (!rt0)
424 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
David S. Millerf11e6652007-03-24 20:36:25 -0700426 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800428 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700429 (strict & RT6_LOOKUP_F_REACHABLE)) {
430 struct rt6_info *next = rt0->u.dst.rt6_next;
431
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800432 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700433 if (!next || next->rt6i_metric != rt0->rt6i_metric)
434 next = fn->leaf;
435
436 if (next != rt0)
437 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 }
439
David S. Millerf11e6652007-03-24 20:36:25 -0700440 RT6_TRACE("%s() => %p\n",
441 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800443 return (match ? match : ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444}
445
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800446#ifdef CONFIG_IPV6_ROUTE_INFO
447int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
448 struct in6_addr *gwaddr)
449{
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800450 struct net *net = dev->nd_net;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800451 struct route_info *rinfo = (struct route_info *) opt;
452 struct in6_addr prefix_buf, *prefix;
453 unsigned int pref;
454 u32 lifetime;
455 struct rt6_info *rt;
456
457 if (len < sizeof(struct route_info)) {
458 return -EINVAL;
459 }
460
461 /* Sanity check for prefix_len and length */
462 if (rinfo->length > 3) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 128) {
465 return -EINVAL;
466 } else if (rinfo->prefix_len > 64) {
467 if (rinfo->length < 2) {
468 return -EINVAL;
469 }
470 } else if (rinfo->prefix_len > 0) {
471 if (rinfo->length < 1) {
472 return -EINVAL;
473 }
474 }
475
476 pref = rinfo->route_pref;
477 if (pref == ICMPV6_ROUTER_PREF_INVALID)
478 pref = ICMPV6_ROUTER_PREF_MEDIUM;
479
Al Viroe69a4ad2006-11-14 20:56:00 -0800480 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800481 if (lifetime == 0xffffffff) {
482 /* infinity */
483 } else if (lifetime > 0x7fffffff/HZ) {
484 /* Avoid arithmetic overflow */
485 lifetime = 0x7fffffff/HZ - 1;
486 }
487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500
501 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700502 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
514 if (lifetime == 0xffffffff) {
515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700526#define BACKTRACK(saddr) \
527do { \
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800528 if (rt == ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700529 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700530 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700540 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700541 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700542} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700543
544static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
545 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546{
547 struct fib6_node *fn;
548 struct rt6_info *rt;
549
Thomas Grafc71099a2006-08-04 23:20:06 -0700550 read_lock_bh(&table->tb6_lock);
551 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552restart:
553 rt = fn->leaf;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700554 rt = rt6_device_match(rt, fl->oif, flags);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700555 BACKTRACK(&fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700556out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800557 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700558 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 return rt;
560
561}
562
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800563struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
564 struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700565{
566 struct flowi fl = {
567 .oif = oif,
568 .nl_u = {
569 .ip6_u = {
570 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700571 },
572 },
573 };
574 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700576
Thomas Grafadaa70b2006-10-13 15:01:03 -0700577 if (saddr) {
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 }
581
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800582 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700583 if (dst->error == 0)
584 return (struct rt6_info *) dst;
585
586 dst_release(dst);
587
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 return NULL;
589}
590
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900591EXPORT_SYMBOL(rt6_lookup);
592
Thomas Grafc71099a2006-08-04 23:20:06 -0700593/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
596 be destroyed.
597 */
598
Thomas Graf86872cb2006-08-22 00:01:08 -0700599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600{
601 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700602 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
Thomas Grafc71099a2006-08-04 23:20:06 -0700604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700606 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700607 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 return err;
610}
611
Thomas Graf40e22e82006-08-22 00:00:45 -0700612int ip6_ins_rt(struct rt6_info *rt)
613{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800614 struct nl_info info = {
Daniel Lezcano55786892008-03-04 13:47:47 -0800615 .nl_net = rt->rt6i_dev->nd_net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800616 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800617 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700618}
619
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800620static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
621 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 struct rt6_info *rt;
624
625 /*
626 * Clone the route.
627 */
628
629 rt = ip6_rt_copy(ort);
630
631 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900632 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
633 if (rt->rt6i_dst.plen != 128 &&
634 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
635 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900637 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900639 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 rt->rt6i_dst.plen = 128;
641 rt->rt6i_flags |= RTF_CACHE;
642 rt->u.dst.flags |= DST_HOST;
643
644#ifdef CONFIG_IPV6_SUBTREES
645 if (rt->rt6i_src.plen && saddr) {
646 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
647 rt->rt6i_src.plen = 128;
648 }
649#endif
650
651 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
652
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800653 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800655 return rt;
656}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800658static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659{
660 struct rt6_info *rt = ip6_rt_copy(ort);
661 if (rt) {
662 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663 rt->rt6i_dst.plen = 128;
664 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800665 rt->u.dst.flags |= DST_HOST;
666 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667 }
668 return rt;
669}
670
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700671static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700672 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673{
674 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700676 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800678 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800679 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700681 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700684 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800686restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700687 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688
689restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700690 rt = rt6_select(fn, oif, strict | reachable);
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700691 BACKTRACK(&fl->fl6_src);
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800692 if (rt == ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800693 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800694 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800696 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700697 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800698
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800699 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800700 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800701 else {
702#if CLONE_OFFLINK_ROUTE
703 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
704#else
705 goto out2;
706#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800708
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800709 dst_release(&rt->u.dst);
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800710 rt = nrt ? : ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800711
712 dst_hold(&rt->u.dst);
713 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700714 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800715 if (!err)
716 goto out2;
717 }
718
719 if (--attempts <= 0)
720 goto out2;
721
722 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700723 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800724 * released someone could insert this route. Relookup.
725 */
726 dst_release(&rt->u.dst);
727 goto relookup;
728
729out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800730 if (reachable) {
731 reachable = 0;
732 goto restart_2;
733 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800734 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700735 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736out2:
737 rt->u.dst.lastuse = jiffies;
738 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700739
740 return rt;
741}
742
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700743static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
744 struct flowi *fl, int flags)
745{
746 return ip6_pol_route(table, fl->iif, fl, flags);
747}
748
Thomas Grafc71099a2006-08-04 23:20:06 -0700749void ip6_route_input(struct sk_buff *skb)
750{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700751 struct ipv6hdr *iph = ipv6_hdr(skb);
Daniel Lezcano55786892008-03-04 13:47:47 -0800752 struct net *net = skb->dev->nd_net;
Thomas Grafadaa70b2006-10-13 15:01:03 -0700753 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700754 struct flowi fl = {
755 .iif = skb->dev->ifindex,
756 .nl_u = {
757 .ip6_u = {
758 .daddr = iph->daddr,
759 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800760 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700761 },
762 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900763 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700764 .proto = iph->nexthdr,
765 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700766
767 if (rt6_need_strict(&iph->daddr))
768 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700769
Daniel Lezcano55786892008-03-04 13:47:47 -0800770 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700771}
772
773static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
774 struct flowi *fl, int flags)
775{
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700776 return ip6_pol_route(table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700777}
778
779struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
780{
781 int flags = 0;
782
783 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700784 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700785
Thomas Grafadaa70b2006-10-13 15:01:03 -0700786 if (!ipv6_addr_any(&fl->fl6_src))
787 flags |= RT6_LOOKUP_F_HAS_SADDR;
788
Daniel Lezcano58f09b72008-03-03 23:25:27 -0800789 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790}
791
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900792EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793
David S. Miller14e50e52007-05-24 18:17:54 -0700794int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
795{
796 struct rt6_info *ort = (struct rt6_info *) *dstp;
797 struct rt6_info *rt = (struct rt6_info *)
798 dst_alloc(&ip6_dst_blackhole_ops);
799 struct dst_entry *new = NULL;
800
801 if (rt) {
802 new = &rt->u.dst;
803
804 atomic_set(&new->__refcnt, 1);
805 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800806 new->input = dst_discard;
807 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700808
809 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
810 new->dev = ort->u.dst.dev;
811 if (new->dev)
812 dev_hold(new->dev);
813 rt->rt6i_idev = ort->rt6i_idev;
814 if (rt->rt6i_idev)
815 in6_dev_hold(rt->rt6i_idev);
816 rt->rt6i_expires = 0;
817
818 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
819 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
820 rt->rt6i_metric = 0;
821
822 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
823#ifdef CONFIG_IPV6_SUBTREES
824 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
825#endif
826
827 dst_free(new);
828 }
829
830 dst_release(*dstp);
831 *dstp = new;
832 return (new ? 0 : -ENOMEM);
833}
834EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
835
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836/*
837 * Destination cache support functions
838 */
839
840static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
841{
842 struct rt6_info *rt;
843
844 rt = (struct rt6_info *) dst;
845
846 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
847 return dst;
848
849 return NULL;
850}
851
852static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
853{
854 struct rt6_info *rt = (struct rt6_info *) dst;
855
856 if (rt) {
857 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700858 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 else
860 dst_release(dst);
861 }
862 return NULL;
863}
864
865static void ip6_link_failure(struct sk_buff *skb)
866{
867 struct rt6_info *rt;
868
869 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
870
871 rt = (struct rt6_info *) skb->dst;
872 if (rt) {
873 if (rt->rt6i_flags&RTF_CACHE) {
874 dst_set_expires(&rt->u.dst, 0);
875 rt->rt6i_flags |= RTF_EXPIRES;
876 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
877 rt->rt6i_node->fn_sernum = -1;
878 }
879}
880
881static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
882{
883 struct rt6_info *rt6 = (struct rt6_info*)dst;
884
885 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
886 rt6->rt6i_flags |= RTF_MODIFIED;
887 if (mtu < IPV6_MIN_MTU) {
888 mtu = IPV6_MIN_MTU;
889 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
890 }
891 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700892 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893 }
894}
895
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896static int ipv6_get_mtu(struct net_device *dev);
897
Daniel Lezcano55786892008-03-04 13:47:47 -0800898static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899{
900 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
901
Daniel Lezcano55786892008-03-04 13:47:47 -0800902 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
903 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
905 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900906 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
907 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
908 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 * rely only on pmtu discovery"
910 */
911 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
912 mtu = IPV6_MAXPLEN;
913 return mtu;
914}
915
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800916static struct dst_entry *icmp6_dst_gc_list;
917static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700918
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800919struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 struct neighbour *neigh,
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800921 struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922{
923 struct rt6_info *rt;
924 struct inet6_dev *idev = in6_dev_get(dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800925 struct net *net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926
927 if (unlikely(idev == NULL))
928 return NULL;
929
930 rt = ip6_dst_alloc();
931 if (unlikely(rt == NULL)) {
932 in6_dev_put(idev);
933 goto out;
934 }
935
936 dev_hold(dev);
937 if (neigh)
938 neigh_hold(neigh);
939 else
940 neigh = ndisc_get_neigh(dev, addr);
941
942 rt->rt6i_dev = dev;
943 rt->rt6i_idev = idev;
944 rt->rt6i_nexthop = neigh;
945 atomic_set(&rt->u.dst.__refcnt, 1);
946 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
947 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800948 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800949 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
951#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900952 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
953 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 : 0;
955 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
956 rt->rt6i_dst.plen = 128;
957#endif
958
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800959 spin_lock_bh(&icmp6_dst_lock);
960 rt->u.dst.next = icmp6_dst_gc_list;
961 icmp6_dst_gc_list = &rt->u.dst;
962 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
Daniel Lezcano55786892008-03-04 13:47:47 -0800964 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965
966out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900967 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968}
969
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800970int icmp6_dst_gc(int *more)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971{
972 struct dst_entry *dst, *next, **pprev;
973 int freed;
974
975 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900976 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700977
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800978 spin_lock_bh(&icmp6_dst_lock);
979 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700980
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 while ((dst = *pprev) != NULL) {
982 if (!atomic_read(&dst->__refcnt)) {
983 *pprev = dst->next;
984 dst_free(dst);
985 freed++;
986 } else {
987 pprev = &dst->next;
988 (*more)++;
989 }
990 }
991
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800992 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700993
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 return freed;
995}
996
Daniel Lezcano569d3642008-01-18 03:56:57 -0800997static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998{
999 static unsigned expire = 30*HZ;
1000 static unsigned long last_gc;
1001 unsigned long now = jiffies;
1002
Daniel Lezcano49905092008-01-10 03:01:01 -08001003 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1004 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 goto out;
1006
1007 expire++;
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08001008 fib6_run_gc(expire, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 last_gc = now;
1010 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
Daniel Lezcano49905092008-01-10 03:01:01 -08001011 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012
1013out:
Daniel Lezcano49905092008-01-10 03:01:01 -08001014 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1015 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016}
1017
1018/* Clean host part of a prefix. Not necessary in radix tree,
1019 but results in cleaner routing tables.
1020
1021 Remove it only when all the things will work!
1022 */
1023
1024static int ipv6_get_mtu(struct net_device *dev)
1025{
1026 int mtu = IPV6_MIN_MTU;
1027 struct inet6_dev *idev;
1028
1029 idev = in6_dev_get(dev);
1030 if (idev) {
1031 mtu = idev->cnf.mtu6;
1032 in6_dev_put(idev);
1033 }
1034 return mtu;
1035}
1036
1037int ipv6_get_hoplimit(struct net_device *dev)
1038{
1039 int hoplimit = ipv6_devconf.hop_limit;
1040 struct inet6_dev *idev;
1041
1042 idev = in6_dev_get(dev);
1043 if (idev) {
1044 hoplimit = idev->cnf.hop_limit;
1045 in6_dev_put(idev);
1046 }
1047 return hoplimit;
1048}
1049
1050/*
1051 *
1052 */
1053
Thomas Graf86872cb2006-08-22 00:01:08 -07001054int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055{
1056 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001057 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 struct rt6_info *rt = NULL;
1059 struct net_device *dev = NULL;
1060 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001061 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 int addr_type;
1063
Thomas Graf86872cb2006-08-22 00:01:08 -07001064 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 return -EINVAL;
1066#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001067 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 return -EINVAL;
1069#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001070 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001072 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 if (!dev)
1074 goto out;
1075 idev = in6_dev_get(dev);
1076 if (!idev)
1077 goto out;
1078 }
1079
Thomas Graf86872cb2006-08-22 00:01:08 -07001080 if (cfg->fc_metric == 0)
1081 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082
Daniel Lezcano55786892008-03-04 13:47:47 -08001083 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001084 if (table == NULL) {
1085 err = -ENOBUFS;
1086 goto out;
1087 }
1088
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 rt = ip6_dst_alloc();
1090
1091 if (rt == NULL) {
1092 err = -ENOMEM;
1093 goto out;
1094 }
1095
1096 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001097 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098
Thomas Graf86872cb2006-08-22 00:01:08 -07001099 if (cfg->fc_protocol == RTPROT_UNSPEC)
1100 cfg->fc_protocol = RTPROT_BOOT;
1101 rt->rt6i_protocol = cfg->fc_protocol;
1102
1103 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104
1105 if (addr_type & IPV6_ADDR_MULTICAST)
1106 rt->u.dst.input = ip6_mc_input;
1107 else
1108 rt->u.dst.input = ip6_forward;
1109
1110 rt->u.dst.output = ip6_output;
1111
Thomas Graf86872cb2006-08-22 00:01:08 -07001112 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1113 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 if (rt->rt6i_dst.plen == 128)
1115 rt->u.dst.flags = DST_HOST;
1116
1117#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001118 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1119 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120#endif
1121
Thomas Graf86872cb2006-08-22 00:01:08 -07001122 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123
1124 /* We cannot add true routes via loopback here,
1125 they would result in kernel looping; promote them to reject routes
1126 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001127 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1129 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001130 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 if (dev) {
1132 dev_put(dev);
1133 in6_dev_put(idev);
1134 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001135 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136 dev_hold(dev);
1137 idev = in6_dev_get(dev);
1138 if (!idev) {
1139 err = -ENODEV;
1140 goto out;
1141 }
1142 }
1143 rt->u.dst.output = ip6_pkt_discard_out;
1144 rt->u.dst.input = ip6_pkt_discard;
1145 rt->u.dst.error = -ENETUNREACH;
1146 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1147 goto install_route;
1148 }
1149
Thomas Graf86872cb2006-08-22 00:01:08 -07001150 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 struct in6_addr *gw_addr;
1152 int gwa_type;
1153
Thomas Graf86872cb2006-08-22 00:01:08 -07001154 gw_addr = &cfg->fc_gateway;
1155 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 gwa_type = ipv6_addr_type(gw_addr);
1157
1158 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1159 struct rt6_info *grt;
1160
1161 /* IPv6 strictly inhibits using not link-local
1162 addresses as nexthop address.
1163 Otherwise, router will not able to send redirects.
1164 It is very good, but in some (rare!) circumstances
1165 (SIT, PtP, NBMA NOARP links) it is handy to allow
1166 some exceptions. --ANK
1167 */
1168 err = -EINVAL;
1169 if (!(gwa_type&IPV6_ADDR_UNICAST))
1170 goto out;
1171
Daniel Lezcano55786892008-03-04 13:47:47 -08001172 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173
1174 err = -EHOSTUNREACH;
1175 if (grt == NULL)
1176 goto out;
1177 if (dev) {
1178 if (dev != grt->rt6i_dev) {
1179 dst_release(&grt->u.dst);
1180 goto out;
1181 }
1182 } else {
1183 dev = grt->rt6i_dev;
1184 idev = grt->rt6i_idev;
1185 dev_hold(dev);
1186 in6_dev_hold(grt->rt6i_idev);
1187 }
1188 if (!(grt->rt6i_flags&RTF_GATEWAY))
1189 err = 0;
1190 dst_release(&grt->u.dst);
1191
1192 if (err)
1193 goto out;
1194 }
1195 err = -EINVAL;
1196 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1197 goto out;
1198 }
1199
1200 err = -ENODEV;
1201 if (dev == NULL)
1202 goto out;
1203
Thomas Graf86872cb2006-08-22 00:01:08 -07001204 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1206 if (IS_ERR(rt->rt6i_nexthop)) {
1207 err = PTR_ERR(rt->rt6i_nexthop);
1208 rt->rt6i_nexthop = NULL;
1209 goto out;
1210 }
1211 }
1212
Thomas Graf86872cb2006-08-22 00:01:08 -07001213 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214
1215install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001216 if (cfg->fc_mx) {
1217 struct nlattr *nla;
1218 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219
Thomas Graf86872cb2006-08-22 00:01:08 -07001220 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001221 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001222
1223 if (type) {
1224 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 err = -EINVAL;
1226 goto out;
1227 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001228
1229 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 }
1232 }
1233
1234 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1235 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1236 if (!rt->u.dst.metrics[RTAX_MTU-1])
1237 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1238 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
Daniel Lezcano55786892008-03-04 13:47:47 -08001239 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 rt->u.dst.dev = dev;
1241 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001242 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001243
1244 cfg->fc_nlinfo.nl_net = dev->nd_net;
1245
Thomas Graf86872cb2006-08-22 00:01:08 -07001246 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247
1248out:
1249 if (dev)
1250 dev_put(dev);
1251 if (idev)
1252 in6_dev_put(idev);
1253 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001254 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 return err;
1256}
1257
Thomas Graf86872cb2006-08-22 00:01:08 -07001258static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259{
1260 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001261 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262
Daniel Lezcanobdb32892008-03-04 13:48:10 -08001263 if (rt == ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001264 return -ENOENT;
1265
Thomas Grafc71099a2006-08-04 23:20:06 -07001266 table = rt->rt6i_table;
1267 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
Thomas Graf86872cb2006-08-22 00:01:08 -07001269 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 dst_release(&rt->u.dst);
1271
Thomas Grafc71099a2006-08-04 23:20:06 -07001272 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273
1274 return err;
1275}
1276
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001277int ip6_del_rt(struct rt6_info *rt)
1278{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001279 struct nl_info info = {
Daniel Lezcano55786892008-03-04 13:47:47 -08001280 .nl_net = rt->rt6i_dev->nd_net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001281 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001282 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001283}
1284
Thomas Graf86872cb2006-08-22 00:01:08 -07001285static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286{
Thomas Grafc71099a2006-08-04 23:20:06 -07001287 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 struct fib6_node *fn;
1289 struct rt6_info *rt;
1290 int err = -ESRCH;
1291
Daniel Lezcano55786892008-03-04 13:47:47 -08001292 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001293 if (table == NULL)
1294 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295
Thomas Grafc71099a2006-08-04 23:20:06 -07001296 read_lock_bh(&table->tb6_lock);
1297
1298 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001299 &cfg->fc_dst, cfg->fc_dst_len,
1300 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001301
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001303 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001304 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001306 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001308 if (cfg->fc_flags & RTF_GATEWAY &&
1309 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001311 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 continue;
1313 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001314 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315
Thomas Graf86872cb2006-08-22 00:01:08 -07001316 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 }
1318 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001319 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320
1321 return err;
1322}
1323
1324/*
1325 * Handle redirects
1326 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001327struct ip6rd_flowi {
1328 struct flowi fl;
1329 struct in6_addr gateway;
1330};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001332static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1333 struct flowi *fl,
1334 int flags)
1335{
1336 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1337 struct rt6_info *rt;
1338 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001339
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001341 * Get the "current" route for this destination and
1342 * check if the redirect has come from approriate router.
1343 *
1344 * RFC 2461 specifies that redirects should only be
1345 * accepted if they come from the nexthop to the target.
1346 * Due to the way the routes are chosen, this notion
1347 * is a bit fuzzy and one might need to check all possible
1348 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350
Thomas Grafc71099a2006-08-04 23:20:06 -07001351 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001352 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001353restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001354 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001355 /*
1356 * Current route is on-link; redirect is always invalid.
1357 *
1358 * Seems, previous statement is not true. It could
1359 * be node, which looks for us as on-link (f.e. proxy ndisc)
1360 * But then router serving it might decide, that we should
1361 * know truth 8)8) --ANK (980726).
1362 */
1363 if (rt6_check_expired(rt))
1364 continue;
1365 if (!(rt->rt6i_flags & RTF_GATEWAY))
1366 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001367 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001368 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001369 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001370 continue;
1371 break;
1372 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001373
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001374 if (!rt)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08001375 rt = ip6_null_entry;
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001376 BACKTRACK(&fl->fl6_src);
1377out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001378 dst_hold(&rt->u.dst);
1379
1380 read_unlock_bh(&table->tb6_lock);
1381
1382 return rt;
1383};
1384
1385static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1386 struct in6_addr *src,
1387 struct in6_addr *gateway,
1388 struct net_device *dev)
1389{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001390 int flags = RT6_LOOKUP_F_HAS_SADDR;
Daniel Lezcano55786892008-03-04 13:47:47 -08001391 struct net *net = dev->nd_net;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001392 struct ip6rd_flowi rdfl = {
1393 .fl = {
1394 .oif = dev->ifindex,
1395 .nl_u = {
1396 .ip6_u = {
1397 .daddr = *dest,
1398 .saddr = *src,
1399 },
1400 },
1401 },
1402 .gateway = *gateway,
1403 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001404
1405 if (rt6_need_strict(dest))
1406 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001407
Daniel Lezcano55786892008-03-04 13:47:47 -08001408 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001409 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001410}
1411
1412void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1413 struct in6_addr *saddr,
1414 struct neighbour *neigh, u8 *lladdr, int on_link)
1415{
1416 struct rt6_info *rt, *nrt = NULL;
1417 struct netevent_redirect netevent;
1418
1419 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1420
Daniel Lezcanobdb32892008-03-04 13:48:10 -08001421 if (rt == ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 if (net_ratelimit())
1423 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1424 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001425 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 }
1427
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 /*
1429 * We have finally decided to accept it.
1430 */
1431
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001432 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1434 NEIGH_UPDATE_F_OVERRIDE|
1435 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1436 NEIGH_UPDATE_F_ISROUTER))
1437 );
1438
1439 /*
1440 * Redirect received -> path was valid.
1441 * Look, redirects are sent only in response to data packets,
1442 * so that this nexthop apparently is reachable. --ANK
1443 */
1444 dst_confirm(&rt->u.dst);
1445
1446 /* Duplicate redirect: silently ignore. */
1447 if (neigh == rt->u.dst.neighbour)
1448 goto out;
1449
1450 nrt = ip6_rt_copy(rt);
1451 if (nrt == NULL)
1452 goto out;
1453
1454 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1455 if (on_link)
1456 nrt->rt6i_flags &= ~RTF_GATEWAY;
1457
1458 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1459 nrt->rt6i_dst.plen = 128;
1460 nrt->u.dst.flags |= DST_HOST;
1461
1462 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1463 nrt->rt6i_nexthop = neigh_clone(neigh);
1464 /* Reset pmtu, it may be better */
1465 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001466 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1467 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468
Thomas Graf40e22e82006-08-22 00:00:45 -07001469 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 goto out;
1471
Tom Tucker8d717402006-07-30 20:43:36 -07001472 netevent.old = &rt->u.dst;
1473 netevent.new = &nrt->u.dst;
1474 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1475
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001477 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 return;
1479 }
1480
1481out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001482 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 return;
1484}
1485
1486/*
1487 * Handle ICMP "packet too big" messages
1488 * i.e. Path MTU discovery
1489 */
1490
1491void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1492 struct net_device *dev, u32 pmtu)
1493{
1494 struct rt6_info *rt, *nrt;
Daniel Lezcano55786892008-03-04 13:47:47 -08001495 struct net *net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 int allfrag = 0;
1497
Daniel Lezcano55786892008-03-04 13:47:47 -08001498 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 if (rt == NULL)
1500 return;
1501
1502 if (pmtu >= dst_mtu(&rt->u.dst))
1503 goto out;
1504
1505 if (pmtu < IPV6_MIN_MTU) {
1506 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001507 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 * MTU (1280) and a fragment header should always be included
1509 * after a node receiving Too Big message reporting PMTU is
1510 * less than the IPv6 Minimum Link MTU.
1511 */
1512 pmtu = IPV6_MIN_MTU;
1513 allfrag = 1;
1514 }
1515
1516 /* New mtu received -> path was valid.
1517 They are sent only in response to data packets,
1518 so that this nexthop apparently is reachable. --ANK
1519 */
1520 dst_confirm(&rt->u.dst);
1521
1522 /* Host route. If it is static, it would be better
1523 not to override it, but add new one, so that
1524 when cache entry will expire old pmtu
1525 would return automatically.
1526 */
1527 if (rt->rt6i_flags & RTF_CACHE) {
1528 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1529 if (allfrag)
1530 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001531 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1533 goto out;
1534 }
1535
1536 /* Network route.
1537 Two cases are possible:
1538 1. It is connected route. Action: COW
1539 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1540 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001541 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001542 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001543 else
1544 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001545
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001546 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001547 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1548 if (allfrag)
1549 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1550
1551 /* According to RFC 1981, detecting PMTU increase shouldn't be
1552 * happened within 5 mins, the recommended timer is 10 mins.
1553 * Here this route expiration time is set to ip6_rt_mtu_expires
1554 * which is 10 mins. After 10 mins the decreased pmtu is expired
1555 * and detecting PMTU increase will be automatically happened.
1556 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001557 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001558 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1559
Thomas Graf40e22e82006-08-22 00:00:45 -07001560 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562out:
1563 dst_release(&rt->u.dst);
1564}
1565
1566/*
1567 * Misc support functions
1568 */
1569
1570static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1571{
1572 struct rt6_info *rt = ip6_dst_alloc();
1573
1574 if (rt) {
1575 rt->u.dst.input = ort->u.dst.input;
1576 rt->u.dst.output = ort->u.dst.output;
1577
1578 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001579 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 rt->u.dst.dev = ort->u.dst.dev;
1581 if (rt->u.dst.dev)
1582 dev_hold(rt->u.dst.dev);
1583 rt->rt6i_idev = ort->rt6i_idev;
1584 if (rt->rt6i_idev)
1585 in6_dev_hold(rt->rt6i_idev);
1586 rt->u.dst.lastuse = jiffies;
1587 rt->rt6i_expires = 0;
1588
1589 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1590 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1591 rt->rt6i_metric = 0;
1592
1593 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1594#ifdef CONFIG_IPV6_SUBTREES
1595 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1596#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001597 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 }
1599 return rt;
1600}
1601
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001602#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001603static struct rt6_info *rt6_get_route_info(struct net *net,
1604 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001605 struct in6_addr *gwaddr, int ifindex)
1606{
1607 struct fib6_node *fn;
1608 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001609 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001610
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001611 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001612 if (table == NULL)
1613 return NULL;
1614
1615 write_lock_bh(&table->tb6_lock);
1616 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001617 if (!fn)
1618 goto out;
1619
Eric Dumazet7cc48262007-02-09 16:22:57 -08001620 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001621 if (rt->rt6i_dev->ifindex != ifindex)
1622 continue;
1623 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1624 continue;
1625 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1626 continue;
1627 dst_hold(&rt->u.dst);
1628 break;
1629 }
1630out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001631 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001632 return rt;
1633}
1634
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001635static struct rt6_info *rt6_add_route_info(struct net *net,
1636 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001637 struct in6_addr *gwaddr, int ifindex,
1638 unsigned pref)
1639{
Thomas Graf86872cb2006-08-22 00:01:08 -07001640 struct fib6_config cfg = {
1641 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001642 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001643 .fc_ifindex = ifindex,
1644 .fc_dst_len = prefixlen,
1645 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1646 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001647 .fc_nlinfo.pid = 0,
1648 .fc_nlinfo.nlh = NULL,
1649 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001650 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001651
Thomas Graf86872cb2006-08-22 00:01:08 -07001652 ipv6_addr_copy(&cfg.fc_dst, prefix);
1653 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1654
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001655 /* We should treat it as a default route if prefix length is 0. */
1656 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001657 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001658
Thomas Graf86872cb2006-08-22 00:01:08 -07001659 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001660
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001661 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001662}
1663#endif
1664
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001666{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001668 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669
Daniel Lezcano55786892008-03-04 13:47:47 -08001670 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001671 if (table == NULL)
1672 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673
Thomas Grafc71099a2006-08-04 23:20:06 -07001674 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001675 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001677 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1679 break;
1680 }
1681 if (rt)
1682 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001683 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 return rt;
1685}
1686
Fred L. Templinc7dc89c2007-11-29 22:11:40 +11001687EXPORT_SYMBOL(rt6_get_dflt_router);
1688
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001690 struct net_device *dev,
1691 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692{
Thomas Graf86872cb2006-08-22 00:01:08 -07001693 struct fib6_config cfg = {
1694 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001695 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001696 .fc_ifindex = dev->ifindex,
1697 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1698 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001699 .fc_nlinfo.pid = 0,
1700 .fc_nlinfo.nlh = NULL,
1701 .fc_nlinfo.nl_net = dev->nd_net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001702 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703
Thomas Graf86872cb2006-08-22 00:01:08 -07001704 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705
Thomas Graf86872cb2006-08-22 00:01:08 -07001706 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 return rt6_get_dflt_router(gwaddr, dev);
1709}
1710
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001711void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712{
1713 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001714 struct fib6_table *table;
1715
1716 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001717 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001718 if (table == NULL)
1719 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720
1721restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001722 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001723 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1725 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001726 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001727 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 goto restart;
1729 }
1730 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001731 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732}
1733
Daniel Lezcano55786892008-03-04 13:47:47 -08001734static void rtmsg_to_fib6_config(struct net *net,
1735 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001736 struct fib6_config *cfg)
1737{
1738 memset(cfg, 0, sizeof(*cfg));
1739
1740 cfg->fc_table = RT6_TABLE_MAIN;
1741 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1742 cfg->fc_metric = rtmsg->rtmsg_metric;
1743 cfg->fc_expires = rtmsg->rtmsg_info;
1744 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1745 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1746 cfg->fc_flags = rtmsg->rtmsg_flags;
1747
Daniel Lezcano55786892008-03-04 13:47:47 -08001748 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001749
Thomas Graf86872cb2006-08-22 00:01:08 -07001750 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1751 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1752 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1753}
1754
Daniel Lezcano55786892008-03-04 13:47:47 -08001755int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756{
Thomas Graf86872cb2006-08-22 00:01:08 -07001757 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 struct in6_rtmsg rtmsg;
1759 int err;
1760
1761 switch(cmd) {
1762 case SIOCADDRT: /* Add a route */
1763 case SIOCDELRT: /* Delete a route */
1764 if (!capable(CAP_NET_ADMIN))
1765 return -EPERM;
1766 err = copy_from_user(&rtmsg, arg,
1767 sizeof(struct in6_rtmsg));
1768 if (err)
1769 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001770
Daniel Lezcano55786892008-03-04 13:47:47 -08001771 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001772
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773 rtnl_lock();
1774 switch (cmd) {
1775 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001776 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 break;
1778 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001779 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 break;
1781 default:
1782 err = -EINVAL;
1783 }
1784 rtnl_unlock();
1785
1786 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001787 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788
1789 return -EINVAL;
1790}
1791
1792/*
1793 * Drop the packet on the floor
1794 */
1795
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001796static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001798 int type;
1799 switch (ipstats_mib_noroutes) {
1800 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001801 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001802 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1803 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1804 break;
1805 }
1806 /* FALLTHROUGH */
1807 case IPSTATS_MIB_OUTNOROUTES:
1808 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1809 break;
1810 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001811 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 kfree_skb(skb);
1813 return 0;
1814}
1815
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001816static int ip6_pkt_discard(struct sk_buff *skb)
1817{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001818 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001819}
1820
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001821static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822{
1823 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001824 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825}
1826
David S. Miller6723ab52006-10-18 21:20:57 -07001827#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1828
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001829static int ip6_pkt_prohibit(struct sk_buff *skb)
1830{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001831 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001832}
1833
1834static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1835{
1836 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001837 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001838}
1839
David S. Miller6723ab52006-10-18 21:20:57 -07001840#endif
1841
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842/*
1843 * Allocate a dst for local (unicast / anycast) address.
1844 */
1845
1846struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1847 const struct in6_addr *addr,
1848 int anycast)
1849{
Daniel Lezcano55786892008-03-04 13:47:47 -08001850 struct net *net = idev->dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 struct rt6_info *rt = ip6_dst_alloc();
1852
1853 if (rt == NULL)
1854 return ERR_PTR(-ENOMEM);
1855
Daniel Lezcano55786892008-03-04 13:47:47 -08001856 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 in6_dev_hold(idev);
1858
1859 rt->u.dst.flags = DST_HOST;
1860 rt->u.dst.input = ip6_input;
1861 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001862 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863 rt->rt6i_idev = idev;
1864 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001865 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1867 rt->u.dst.obsolete = -1;
1868
1869 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001870 if (anycast)
1871 rt->rt6i_flags |= RTF_ANYCAST;
1872 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873 rt->rt6i_flags |= RTF_LOCAL;
1874 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1875 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001876 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 return ERR_PTR(-ENOMEM);
1878 }
1879
1880 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1881 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001882 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883
1884 atomic_set(&rt->u.dst.__refcnt, 1);
1885
1886 return rt;
1887}
1888
1889static int fib6_ifdown(struct rt6_info *rt, void *arg)
1890{
1891 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
Daniel Lezcanobdb32892008-03-04 13:48:10 -08001892 rt != ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 RT6_TRACE("deleted by ifdown %p\n", rt);
1894 return -1;
1895 }
1896 return 0;
1897}
1898
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001899void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001901 fib6_clean_all(net, fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902}
1903
1904struct rt6_mtu_change_arg
1905{
1906 struct net_device *dev;
1907 unsigned mtu;
1908};
1909
1910static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1911{
1912 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1913 struct inet6_dev *idev;
Daniel Lezcano55786892008-03-04 13:47:47 -08001914 struct net *net = arg->dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915
1916 /* In IPv6 pmtu discovery is not optional,
1917 so that RTAX_MTU lock cannot disable it.
1918 We still use this lock to block changes
1919 caused by addrconf/ndisc.
1920 */
1921
1922 idev = __in6_dev_get(arg->dev);
1923 if (idev == NULL)
1924 return 0;
1925
1926 /* For administrative MTU increase, there is no way to discover
1927 IPv6 PMTU increase, so PMTU increase should be updated here.
1928 Since RFC 1981 doesn't include administrative MTU increase
1929 update PMTU increase is a MUST. (i.e. jumbo frame)
1930 */
1931 /*
1932 If new MTU is less than route PMTU, this new MTU will be the
1933 lowest MTU in the path, update the route PMTU to reflect PMTU
1934 decreases; if new MTU is greater than route PMTU, and the
1935 old MTU is the lowest MTU in the path, update the route PMTU
1936 to reflect the increase. In this case if the other nodes' MTU
1937 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1938 PMTU discouvery.
1939 */
1940 if (rt->rt6i_dev == arg->dev &&
1941 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08001942 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001943 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001944 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08001946 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07001947 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 return 0;
1949}
1950
1951void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1952{
Thomas Grafc71099a2006-08-04 23:20:06 -07001953 struct rt6_mtu_change_arg arg = {
1954 .dev = dev,
1955 .mtu = mtu,
1956 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001958 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959}
1960
Patrick McHardyef7c79e2007-06-05 12:38:30 -07001961static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07001962 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001963 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001964 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001965 [RTA_PRIORITY] = { .type = NLA_U32 },
1966 [RTA_METRICS] = { .type = NLA_NESTED },
1967};
1968
1969static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1970 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971{
Thomas Graf86872cb2006-08-22 00:01:08 -07001972 struct rtmsg *rtm;
1973 struct nlattr *tb[RTA_MAX+1];
1974 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975
Thomas Graf86872cb2006-08-22 00:01:08 -07001976 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1977 if (err < 0)
1978 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979
Thomas Graf86872cb2006-08-22 00:01:08 -07001980 err = -EINVAL;
1981 rtm = nlmsg_data(nlh);
1982 memset(cfg, 0, sizeof(*cfg));
1983
1984 cfg->fc_table = rtm->rtm_table;
1985 cfg->fc_dst_len = rtm->rtm_dst_len;
1986 cfg->fc_src_len = rtm->rtm_src_len;
1987 cfg->fc_flags = RTF_UP;
1988 cfg->fc_protocol = rtm->rtm_protocol;
1989
1990 if (rtm->rtm_type == RTN_UNREACHABLE)
1991 cfg->fc_flags |= RTF_REJECT;
1992
1993 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1994 cfg->fc_nlinfo.nlh = nlh;
Benjamin Thery2216b482008-01-30 19:09:35 -08001995 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07001996
1997 if (tb[RTA_GATEWAY]) {
1998 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1999 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002001
2002 if (tb[RTA_DST]) {
2003 int plen = (rtm->rtm_dst_len + 7) >> 3;
2004
2005 if (nla_len(tb[RTA_DST]) < plen)
2006 goto errout;
2007
2008 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002010
2011 if (tb[RTA_SRC]) {
2012 int plen = (rtm->rtm_src_len + 7) >> 3;
2013
2014 if (nla_len(tb[RTA_SRC]) < plen)
2015 goto errout;
2016
2017 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002019
2020 if (tb[RTA_OIF])
2021 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2022
2023 if (tb[RTA_PRIORITY])
2024 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2025
2026 if (tb[RTA_METRICS]) {
2027 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2028 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002030
2031 if (tb[RTA_TABLE])
2032 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2033
2034 err = 0;
2035errout:
2036 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037}
2038
Thomas Grafc127ea22007-03-22 11:58:32 -07002039static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040{
Thomas Graf86872cb2006-08-22 00:01:08 -07002041 struct fib6_config cfg;
2042 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043
Thomas Graf86872cb2006-08-22 00:01:08 -07002044 err = rtm_to_fib6_config(skb, nlh, &cfg);
2045 if (err < 0)
2046 return err;
2047
2048 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049}
2050
Thomas Grafc127ea22007-03-22 11:58:32 -07002051static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052{
Thomas Graf86872cb2006-08-22 00:01:08 -07002053 struct fib6_config cfg;
2054 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055
Thomas Graf86872cb2006-08-22 00:01:08 -07002056 err = rtm_to_fib6_config(skb, nlh, &cfg);
2057 if (err < 0)
2058 return err;
2059
2060 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061}
2062
Thomas Graf339bf982006-11-10 14:10:15 -08002063static inline size_t rt6_nlmsg_size(void)
2064{
2065 return NLMSG_ALIGN(sizeof(struct rtmsg))
2066 + nla_total_size(16) /* RTA_SRC */
2067 + nla_total_size(16) /* RTA_DST */
2068 + nla_total_size(16) /* RTA_GATEWAY */
2069 + nla_total_size(16) /* RTA_PREFSRC */
2070 + nla_total_size(4) /* RTA_TABLE */
2071 + nla_total_size(4) /* RTA_IIF */
2072 + nla_total_size(4) /* RTA_OIF */
2073 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002074 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002075 + nla_total_size(sizeof(struct rta_cacheinfo));
2076}
2077
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002079 struct in6_addr *dst, struct in6_addr *src,
2080 int iif, int type, u32 pid, u32 seq,
2081 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082{
2083 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002084 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002085 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002086 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087
2088 if (prefix) { /* user wants prefix routes only */
2089 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2090 /* success since this is not a prefix route */
2091 return 1;
2092 }
2093 }
2094
Thomas Graf2d7202b2006-08-22 00:01:27 -07002095 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2096 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002097 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002098
2099 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 rtm->rtm_family = AF_INET6;
2101 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2102 rtm->rtm_src_len = rt->rt6i_src.plen;
2103 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002104 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002105 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002106 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002107 table = RT6_TABLE_UNSPEC;
2108 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002109 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 if (rt->rt6i_flags&RTF_REJECT)
2111 rtm->rtm_type = RTN_UNREACHABLE;
2112 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2113 rtm->rtm_type = RTN_LOCAL;
2114 else
2115 rtm->rtm_type = RTN_UNICAST;
2116 rtm->rtm_flags = 0;
2117 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2118 rtm->rtm_protocol = rt->rt6i_protocol;
2119 if (rt->rt6i_flags&RTF_DYNAMIC)
2120 rtm->rtm_protocol = RTPROT_REDIRECT;
2121 else if (rt->rt6i_flags & RTF_ADDRCONF)
2122 rtm->rtm_protocol = RTPROT_KERNEL;
2123 else if (rt->rt6i_flags&RTF_DEFAULT)
2124 rtm->rtm_protocol = RTPROT_RA;
2125
2126 if (rt->rt6i_flags&RTF_CACHE)
2127 rtm->rtm_flags |= RTM_F_CLONED;
2128
2129 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002130 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002131 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002133 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134#ifdef CONFIG_IPV6_SUBTREES
2135 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002136 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002137 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002138 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002139 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140#endif
2141 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002142 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 else if (dst) {
2144 struct in6_addr saddr_buf;
YOSHIFUJI Hideaki5e5f3f02008-03-03 21:44:34 +09002145 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2146 dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002147 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002149
Linus Torvalds1da177e2005-04-16 15:20:36 -07002150 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002151 goto nla_put_failure;
2152
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002154 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2155
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002157 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2158
2159 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002160
2161 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2162 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2163 expires, rt->u.dst.error) < 0)
2164 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165
Thomas Graf2d7202b2006-08-22 00:01:27 -07002166 return nlmsg_end(skb, nlh);
2167
2168nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002169 nlmsg_cancel(skb, nlh);
2170 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171}
2172
Patrick McHardy1b43af52006-08-10 23:11:17 -07002173int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174{
2175 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2176 int prefix;
2177
Thomas Graf2d7202b2006-08-22 00:01:27 -07002178 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2179 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2181 } else
2182 prefix = 0;
2183
2184 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2185 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002186 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187}
2188
Thomas Grafc127ea22007-03-22 11:58:32 -07002189static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002191 struct net *net = in_skb->sk->sk_net;
Thomas Grafab364a62006-08-22 00:01:47 -07002192 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002194 struct sk_buff *skb;
2195 struct rtmsg *rtm;
2196 struct flowi fl;
2197 int err, iif = 0;
2198
2199 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2200 if (err < 0)
2201 goto errout;
2202
2203 err = -EINVAL;
2204 memset(&fl, 0, sizeof(fl));
2205
2206 if (tb[RTA_SRC]) {
2207 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2208 goto errout;
2209
2210 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2211 }
2212
2213 if (tb[RTA_DST]) {
2214 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2215 goto errout;
2216
2217 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2218 }
2219
2220 if (tb[RTA_IIF])
2221 iif = nla_get_u32(tb[RTA_IIF]);
2222
2223 if (tb[RTA_OIF])
2224 fl.oif = nla_get_u32(tb[RTA_OIF]);
2225
2226 if (iif) {
2227 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002228 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002229 if (!dev) {
2230 err = -ENODEV;
2231 goto errout;
2232 }
2233 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234
2235 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002236 if (skb == NULL) {
2237 err = -ENOBUFS;
2238 goto errout;
2239 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240
2241 /* Reserve room for dummy headers, this skb can pass
2242 through good chunk of routing engine.
2243 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002244 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2246
Thomas Grafab364a62006-08-22 00:01:47 -07002247 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 skb->dst = &rt->u.dst;
2249
Thomas Grafab364a62006-08-22 00:01:47 -07002250 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002252 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002254 kfree_skb(skb);
2255 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 }
2257
Daniel Lezcano55786892008-03-04 13:47:47 -08002258 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002259errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261}
2262
Thomas Graf86872cb2006-08-22 00:01:08 -07002263void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264{
2265 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002266 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002267 u32 seq;
2268 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002270 err = -ENOBUFS;
2271 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002272
Thomas Graf339bf982006-11-10 14:10:15 -08002273 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002274 if (skb == NULL)
2275 goto errout;
2276
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002277 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2278 event, info->pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002279 if (err < 0) {
2280 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2281 WARN_ON(err == -EMSGSIZE);
2282 kfree_skb(skb);
2283 goto errout;
2284 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002285 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2286 info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002287errout:
2288 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002289 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290}
2291
2292/*
2293 * /proc
2294 */
2295
2296#ifdef CONFIG_PROC_FS
2297
2298#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2299
2300struct rt6_proc_arg
2301{
2302 char *buffer;
2303 int offset;
2304 int length;
2305 int skip;
2306 int len;
2307};
2308
2309static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2310{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002311 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002313 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2314 rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315
2316#ifdef CONFIG_IPV6_SUBTREES
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002317 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2318 rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002320 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321#endif
2322
2323 if (rt->rt6i_nexthop) {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002324 seq_printf(m, NIP6_SEQFMT,
2325 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002327 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002329 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2330 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2331 rt->u.dst.__use, rt->rt6i_flags,
2332 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002333 return 0;
2334}
2335
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002336static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002337{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002338 struct net *net = (struct net *)m->private;
2339 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002340 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341}
2342
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002343static int ipv6_route_open(struct inode *inode, struct file *file)
2344{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002345 struct net *net = get_proc_net(inode);
2346 if (!net)
2347 return -ENXIO;
2348 return single_open(file, ipv6_route_show, net);
2349}
2350
2351static int ipv6_route_release(struct inode *inode, struct file *file)
2352{
2353 struct seq_file *seq = file->private_data;
2354 struct net *net = seq->private;
2355 put_net(net);
2356 return single_release(inode, file);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002357}
2358
2359static const struct file_operations ipv6_route_proc_fops = {
2360 .owner = THIS_MODULE,
2361 .open = ipv6_route_open,
2362 .read = seq_read,
2363 .llseek = seq_lseek,
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002364 .release = ipv6_route_release,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002365};
2366
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2368{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002369 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002371 net->ipv6.rt6_stats->fib_nodes,
2372 net->ipv6.rt6_stats->fib_route_nodes,
2373 net->ipv6.rt6_stats->fib_rt_alloc,
2374 net->ipv6.rt6_stats->fib_rt_entries,
2375 net->ipv6.rt6_stats->fib_rt_cache,
Benjamin Theryc5728722008-03-03 23:34:17 -08002376 atomic_read(&ip6_dst_ops.entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002377 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378
2379 return 0;
2380}
2381
2382static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2383{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002384 struct net *net = get_proc_net(inode);
2385 return single_open(file, rt6_stats_seq_show, net);
2386}
2387
2388static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2389{
2390 struct seq_file *seq = file->private_data;
2391 struct net *net = (struct net *)seq->private;
2392 put_net(net);
2393 return single_release(inode, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394}
2395
Arjan van de Ven9a321442007-02-12 00:55:35 -08002396static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 .owner = THIS_MODULE,
2398 .open = rt6_stats_seq_open,
2399 .read = seq_read,
2400 .llseek = seq_lseek,
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002401 .release = rt6_stats_seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402};
2403#endif /* CONFIG_PROC_FS */
2404
2405#ifdef CONFIG_SYSCTL
2406
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407static
2408int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2409 void __user *buffer, size_t *lenp, loff_t *ppos)
2410{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002411 struct net *net = current->nsproxy->net_ns;
2412 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 if (write) {
2414 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002415 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416 return 0;
2417 } else
2418 return -EINVAL;
2419}
2420
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002421ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002422 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002424 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002425 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002426 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002427 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 },
2429 {
2430 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2431 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002432 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 .maxlen = sizeof(int),
2434 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002435 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436 },
2437 {
2438 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2439 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002440 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 .maxlen = sizeof(int),
2442 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002443 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 },
2445 {
2446 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2447 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002448 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 .maxlen = sizeof(int),
2450 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002451 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 .strategy = &sysctl_jiffies,
2453 },
2454 {
2455 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2456 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002457 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458 .maxlen = sizeof(int),
2459 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002460 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461 .strategy = &sysctl_jiffies,
2462 },
2463 {
2464 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2465 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002466 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 .maxlen = sizeof(int),
2468 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002469 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 .strategy = &sysctl_jiffies,
2471 },
2472 {
2473 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2474 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002475 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 .maxlen = sizeof(int),
2477 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002478 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 .strategy = &sysctl_jiffies,
2480 },
2481 {
2482 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2483 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002484 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485 .maxlen = sizeof(int),
2486 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002487 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 .strategy = &sysctl_jiffies,
2489 },
2490 {
2491 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2492 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002493 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002494 .maxlen = sizeof(int),
2495 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002496 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497 .strategy = &sysctl_jiffies,
2498 },
2499 {
2500 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2501 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002502 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 .maxlen = sizeof(int),
2504 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002505 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 .strategy = &sysctl_ms_jiffies,
2507 },
2508 { .ctl_name = 0 }
2509};
2510
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002511struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2512{
2513 struct ctl_table *table;
2514
2515 table = kmemdup(ipv6_route_table_template,
2516 sizeof(ipv6_route_table_template),
2517 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002518
2519 if (table) {
2520 table[0].data = &net->ipv6.sysctl.flush_delay;
2521 /* table[1].data will be handled when we have
2522 routes per namespace */
2523 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2524 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2525 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2526 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2527 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2528 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2529 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2530 }
2531
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002532 return table;
2533}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534#endif
2535
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002536static int ip6_route_net_init(struct net *net)
2537{
2538#ifdef CONFIG_PROC_FS
2539 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2540 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2541#endif
2542 return 0;
2543}
2544
2545static void ip6_route_net_exit(struct net *net)
2546{
2547#ifdef CONFIG_PROC_FS
2548 proc_net_remove(net, "ipv6_route");
2549 proc_net_remove(net, "rt6_stats");
2550#endif
Daniel Lezcano55786892008-03-04 13:47:47 -08002551 rt6_ifdown(net, NULL);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002552}
2553
2554static struct pernet_operations ip6_route_net_ops = {
2555 .init = ip6_route_net_init,
2556 .exit = ip6_route_net_exit,
2557};
2558
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002559int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002561 int ret;
2562
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002563 ip6_dst_ops.kmem_cachep =
2564 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
Daniel Lezcanof845ab62007-12-07 00:45:16 -08002565 SLAB_HWCACHE_ALIGN, NULL);
2566 if (!ip6_dst_ops.kmem_cachep)
2567 return -ENOMEM;
2568
David S. Miller14e50e52007-05-24 18:17:54 -07002569 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2570
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002571 ret = -ENOMEM;
2572 ip6_null_entry = kmemdup(&ip6_null_entry_template,
2573 sizeof(*ip6_null_entry), GFP_KERNEL);
2574 if (!ip6_null_entry)
2575 goto out_kmem_cache;
2576 ip6_null_entry->u.dst.path = (struct dst_entry *)ip6_null_entry;
2577
2578#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2579 ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2580 sizeof(*ip6_prohibit_entry), GFP_KERNEL);
2581 if (!ip6_prohibit_entry)
2582 goto out_ip6_null_entry;
2583 ip6_prohibit_entry->u.dst.path = (struct dst_entry *)ip6_prohibit_entry;
2584
2585 ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2586 sizeof(*ip6_blk_hole_entry), GFP_KERNEL);
2587 if (!ip6_blk_hole_entry)
2588 goto out_ip6_prohibit_entry;
2589 ip6_blk_hole_entry->u.dst.path = (struct dst_entry *)ip6_blk_hole_entry;
2590#endif
2591
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002592 ret = fib6_init();
2593 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002594 goto out_ip6_blk_hole_entry;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002595
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002596 ret = xfrm6_init();
2597 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002598 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002599
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002600 ret = fib6_rules_init();
2601 if (ret)
2602 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002603
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002604 ret = -ENOBUFS;
2605 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2606 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2607 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2608 goto fib6_rules_init;
2609
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002610 ret = register_pernet_subsys(&ip6_route_net_ops);
2611 if (ret)
2612 goto fib6_rules_init;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002613out:
2614 return ret;
2615
2616fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002617 fib6_rules_cleanup();
2618xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002619 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002620out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002621 fib6_gc_cleanup();
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002622out_ip6_blk_hole_entry:
2623#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2624 kfree(ip6_blk_hole_entry);
2625out_ip6_prohibit_entry:
2626 kfree(ip6_prohibit_entry);
2627out_ip6_null_entry:
2628#endif
2629 kfree(ip6_null_entry);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002630out_kmem_cache:
2631 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2632 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633}
2634
2635void ip6_route_cleanup(void)
2636{
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002637 unregister_pernet_subsys(&ip6_route_net_ops);
Thomas Graf101367c2006-08-04 03:39:02 -07002638 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640 fib6_gc_cleanup();
2641 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002642
2643 kfree(ip6_null_entry);
2644#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2645 kfree(ip6_prohibit_entry);
2646 kfree(ip6_blk_hole_entry);
2647#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648}