blob: 7ff66cebe77cff96777775a6e0bed97cd0be3f63 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070025 * Ville Nuorvala
26 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070027 */
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100static struct dst_ops ip6_dst_ops = {
101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu862b82c2007-11-13 21:43:11 -0800111 .local_out = ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800113 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114};
115
David S. Miller14e50e52007-05-24 18:17:54 -0700116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800127 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700128};
129
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800130static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131 .u = {
132 .dst = {
133 .__refcnt = ATOMIC_INIT(1),
134 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 .obsolete = -1,
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
140 .ops = &ip6_dst_ops,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 }
142 },
143 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
144 .rt6i_metric = ~(u32) 0,
145 .rt6i_ref = ATOMIC_INIT(1),
146};
147
Thomas Graf101367c2006-08-04 03:39:02 -0700148#ifdef CONFIG_IPV6_MULTIPLE_TABLES
149
David S. Miller6723ab52006-10-18 21:20:57 -0700150static int ip6_pkt_prohibit(struct sk_buff *skb);
151static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700152
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800153struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700154 .u = {
155 .dst = {
156 .__refcnt = ATOMIC_INIT(1),
157 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700158 .obsolete = -1,
159 .error = -EACCES,
160 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700161 .input = ip6_pkt_prohibit,
162 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700163 .ops = &ip6_dst_ops,
Thomas Graf101367c2006-08-04 03:39:02 -0700164 }
165 },
166 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
167 .rt6i_metric = ~(u32) 0,
168 .rt6i_ref = ATOMIC_INIT(1),
169};
170
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800171static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700172 .u = {
173 .dst = {
174 .__refcnt = ATOMIC_INIT(1),
175 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700176 .obsolete = -1,
177 .error = -EINVAL,
178 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800179 .input = dst_discard,
180 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700181 .ops = &ip6_dst_ops,
Thomas Graf101367c2006-08-04 03:39:02 -0700182 }
183 },
184 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
185 .rt6i_metric = ~(u32) 0,
186 .rt6i_ref = ATOMIC_INIT(1),
187};
188
189#endif
190
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191/* allocate dst with ip6_dst_ops */
192static __inline__ struct rt6_info *ip6_dst_alloc(void)
193{
194 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
195}
196
197static void ip6_dst_destroy(struct dst_entry *dst)
198{
199 struct rt6_info *rt = (struct rt6_info *)dst;
200 struct inet6_dev *idev = rt->rt6i_idev;
201
202 if (idev != NULL) {
203 rt->rt6i_idev = NULL;
204 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900205 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206}
207
208static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
209 int how)
210{
211 struct rt6_info *rt = (struct rt6_info *)dst;
212 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800213 struct net_device *loopback_dev =
214 dev->nd_net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800216 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
217 struct inet6_dev *loopback_idev =
218 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219 if (loopback_idev != NULL) {
220 rt->rt6i_idev = loopback_idev;
221 in6_dev_put(idev);
222 }
223 }
224}
225
226static __inline__ int rt6_check_expired(const struct rt6_info *rt)
227{
228 return (rt->rt6i_flags & RTF_EXPIRES &&
229 time_after(jiffies, rt->rt6i_expires));
230}
231
Thomas Grafc71099a2006-08-04 23:20:06 -0700232static inline int rt6_need_strict(struct in6_addr *daddr)
233{
234 return (ipv6_addr_type(daddr) &
235 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
236}
237
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700239 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 */
241
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800242static inline struct rt6_info *rt6_device_match(struct net *net,
243 struct rt6_info *rt,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 int oif,
245 int strict)
246{
247 struct rt6_info *local = NULL;
248 struct rt6_info *sprt;
249
250 if (oif) {
Eric Dumazet7cc48262007-02-09 16:22:57 -0800251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 struct net_device *dev = sprt->rt6i_dev;
253 if (dev->ifindex == oif)
254 return sprt;
255 if (dev->flags & IFF_LOOPBACK) {
256 if (sprt->rt6i_idev == NULL ||
257 sprt->rt6i_idev->dev->ifindex != oif) {
258 if (strict && oif)
259 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900260 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 local->rt6i_idev->dev->ifindex == oif))
262 continue;
263 }
264 local = sprt;
265 }
266 }
267
268 if (local)
269 return local;
270
271 if (strict)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800272 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 }
274 return rt;
275}
276
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800277#ifdef CONFIG_IPV6_ROUTER_PREF
278static void rt6_probe(struct rt6_info *rt)
279{
280 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
281 /*
282 * Okay, this does not seem to be appropriate
283 * for now, however, we need to check if it
284 * is really so; aka Router Reachability Probing.
285 *
286 * Router Reachability Probe MUST be rate-limited
287 * to no more than one per minute.
288 */
289 if (!neigh || (neigh->nud_state & NUD_VALID))
290 return;
291 read_lock_bh(&neigh->lock);
292 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800293 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800294 struct in6_addr mcaddr;
295 struct in6_addr *target;
296
297 neigh->updated = jiffies;
298 read_unlock_bh(&neigh->lock);
299
300 target = (struct in6_addr *)&neigh->primary_key;
301 addrconf_addr_solict_mult(target, &mcaddr);
302 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
303 } else
304 read_unlock_bh(&neigh->lock);
305}
306#else
307static inline void rt6_probe(struct rt6_info *rt)
308{
309 return;
310}
311#endif
312
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800314 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700316static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800318 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700319 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800320 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700321 if ((dev->flags & IFF_LOOPBACK) &&
322 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
323 return 1;
324 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325}
326
Dave Jonesb6f99a22007-03-22 12:27:49 -0700327static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800329 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800330 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700331 if (rt->rt6i_flags & RTF_NONEXTHOP ||
332 !(rt->rt6i_flags & RTF_GATEWAY))
333 m = 1;
334 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335 read_lock_bh(&neigh->lock);
336 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700337 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800338#ifdef CONFIG_IPV6_ROUTER_PREF
339 else if (neigh->nud_state & NUD_FAILED)
340 m = 0;
341#endif
342 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800343 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800344 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800345 } else
346 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800347 return m;
348}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800350static int rt6_score_route(struct rt6_info *rt, int oif,
351 int strict)
352{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700353 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900354
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700355 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700356 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800357 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800358#ifdef CONFIG_IPV6_ROUTER_PREF
359 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
360#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700361 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800362 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800363 return -1;
364 return m;
365}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366
David S. Millerf11e6652007-03-24 20:36:25 -0700367static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
368 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800369{
David S. Millerf11e6652007-03-24 20:36:25 -0700370 int m;
371
372 if (rt6_check_expired(rt))
373 goto out;
374
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
377 goto out;
378
379 if (m > *mpri) {
380 if (strict & RT6_LOOKUP_F_REACHABLE)
381 rt6_probe(match);
382 *mpri = m;
383 match = rt;
384 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
385 rt6_probe(rt);
386 }
387
388out:
389 return match;
390}
391
392static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
393 struct rt6_info *rr_head,
394 u32 metric, int oif, int strict)
395{
396 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800397 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
David S. Millerf11e6652007-03-24 20:36:25 -0700399 match = NULL;
400 for (rt = rr_head; rt && rt->rt6i_metric == metric;
401 rt = rt->u.dst.rt6_next)
402 match = find_match(rt, oif, strict, &mpri, match);
403 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
404 rt = rt->u.dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800406
David S. Millerf11e6652007-03-24 20:36:25 -0700407 return match;
408}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800409
David S. Millerf11e6652007-03-24 20:36:25 -0700410static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
411{
412 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800413 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414
David S. Millerf11e6652007-03-24 20:36:25 -0700415 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416 __FUNCTION__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417
David S. Millerf11e6652007-03-24 20:36:25 -0700418 rt0 = fn->rr_ptr;
419 if (!rt0)
420 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421
David S. Millerf11e6652007-03-24 20:36:25 -0700422 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800424 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700425 (strict & RT6_LOOKUP_F_REACHABLE)) {
426 struct rt6_info *next = rt0->u.dst.rt6_next;
427
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800428 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700429 if (!next || next->rt6i_metric != rt0->rt6i_metric)
430 next = fn->leaf;
431
432 if (next != rt0)
433 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 }
435
David S. Millerf11e6652007-03-24 20:36:25 -0700436 RT6_TRACE("%s() => %p\n",
437 __FUNCTION__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800439 net = rt0->rt6i_dev->nd_net;
440 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441}
442
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800443#ifdef CONFIG_IPV6_ROUTE_INFO
444int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445 struct in6_addr *gwaddr)
446{
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800447 struct net *net = dev->nd_net;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
450 unsigned int pref;
451 u32 lifetime;
452 struct rt6_info *rt;
453
454 if (len < sizeof(struct route_info)) {
455 return -EINVAL;
456 }
457
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 128) {
462 return -EINVAL;
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
465 return -EINVAL;
466 }
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
469 return -EINVAL;
470 }
471 }
472
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
Al Viroe69a4ad2006-11-14 20:56:00 -0800477 lifetime = ntohl(rinfo->lifetime);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800478 if (lifetime == 0xffffffff) {
479 /* infinity */
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
483 }
484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800497
498 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700499 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800523#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700524do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800525 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700526 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700527 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700537 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700538 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700539} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700540
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800541static struct rt6_info *ip6_pol_route_lookup(struct net *net,
542 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700543 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544{
545 struct fib6_node *fn;
546 struct rt6_info *rt;
547
Thomas Grafc71099a2006-08-04 23:20:06 -0700548 read_lock_bh(&table->tb6_lock);
549 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
550restart:
551 rt = fn->leaf;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800552 rt = rt6_device_match(net, rt, fl->oif, flags);
553 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700554out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800555 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700556 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700557 return rt;
558
559}
560
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800561struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
562 struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700563{
564 struct flowi fl = {
565 .oif = oif,
566 .nl_u = {
567 .ip6_u = {
568 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700569 },
570 },
571 };
572 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700574
Thomas Grafadaa70b2006-10-13 15:01:03 -0700575 if (saddr) {
576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 flags |= RT6_LOOKUP_F_HAS_SADDR;
578 }
579
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800580 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700581 if (dst->error == 0)
582 return (struct rt6_info *) dst;
583
584 dst_release(dst);
585
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 return NULL;
587}
588
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900589EXPORT_SYMBOL(rt6_lookup);
590
Thomas Grafc71099a2006-08-04 23:20:06 -0700591/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 It takes new route entry, the addition fails by any reason the
593 route is freed. In any case, if caller does not hold it, it may
594 be destroyed.
595 */
596
Thomas Graf86872cb2006-08-22 00:01:08 -0700597static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598{
599 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700600 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
Thomas Grafc71099a2006-08-04 23:20:06 -0700602 table = rt->rt6i_table;
603 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700604 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700605 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606
607 return err;
608}
609
Thomas Graf40e22e82006-08-22 00:00:45 -0700610int ip6_ins_rt(struct rt6_info *rt)
611{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800612 struct nl_info info = {
Daniel Lezcano55786892008-03-04 13:47:47 -0800613 .nl_net = rt->rt6i_dev->nd_net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800614 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800615 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700616}
617
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800618static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 struct rt6_info *rt;
622
623 /*
624 * Clone the route.
625 */
626
627 rt = ip6_rt_copy(ort);
628
629 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900630 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 if (rt->rt6i_dst.plen != 128 &&
632 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900635 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 rt->rt6i_dst.plen = 128;
639 rt->rt6i_flags |= RTF_CACHE;
640 rt->u.dst.flags |= DST_HOST;
641
642#ifdef CONFIG_IPV6_SUBTREES
643 if (rt->rt6i_src.plen && saddr) {
644 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645 rt->rt6i_src.plen = 128;
646 }
647#endif
648
649 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
650
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800651 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800653 return rt;
654}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800656static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
657{
658 struct rt6_info *rt = ip6_rt_copy(ort);
659 if (rt) {
660 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
661 rt->rt6i_dst.plen = 128;
662 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800663 rt->u.dst.flags |= DST_HOST;
664 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
665 }
666 return rt;
667}
668
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800669static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
670 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671{
672 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800673 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700674 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700675 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800676 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800677 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700679 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
681relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700682 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800684restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700685 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686
687restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700688 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800689
690 BACKTRACK(net, &fl->fl6_src);
691 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800692 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800693 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800695 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700696 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800697
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800698 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800699 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800700 else {
701#if CLONE_OFFLINK_ROUTE
702 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
703#else
704 goto out2;
705#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800707
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800708 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800709 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710
711 dst_hold(&rt->u.dst);
712 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700713 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800714 if (!err)
715 goto out2;
716 }
717
718 if (--attempts <= 0)
719 goto out2;
720
721 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700722 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800723 * released someone could insert this route. Relookup.
724 */
725 dst_release(&rt->u.dst);
726 goto relookup;
727
728out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800729 if (reachable) {
730 reachable = 0;
731 goto restart_2;
732 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800733 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700734 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735out2:
736 rt->u.dst.lastuse = jiffies;
737 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700738
739 return rt;
740}
741
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800742static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700743 struct flowi *fl, int flags)
744{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800745 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700746}
747
Thomas Grafc71099a2006-08-04 23:20:06 -0700748void ip6_route_input(struct sk_buff *skb)
749{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700750 struct ipv6hdr *iph = ipv6_hdr(skb);
Daniel Lezcano55786892008-03-04 13:47:47 -0800751 struct net *net = skb->dev->nd_net;
Thomas Grafadaa70b2006-10-13 15:01:03 -0700752 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700753 struct flowi fl = {
754 .iif = skb->dev->ifindex,
755 .nl_u = {
756 .ip6_u = {
757 .daddr = iph->daddr,
758 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800759 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700760 },
761 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900762 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700763 .proto = iph->nexthdr,
764 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700765
766 if (rt6_need_strict(&iph->daddr))
767 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700768
Daniel Lezcano55786892008-03-04 13:47:47 -0800769 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700770}
771
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800772static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700773 struct flowi *fl, int flags)
774{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800775 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700776}
777
778struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
779{
780 int flags = 0;
781
782 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700783 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700784
Thomas Grafadaa70b2006-10-13 15:01:03 -0700785 if (!ipv6_addr_any(&fl->fl6_src))
786 flags |= RT6_LOOKUP_F_HAS_SADDR;
787
Daniel Lezcano58f09b72008-03-03 23:25:27 -0800788 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900791EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
David S. Miller14e50e52007-05-24 18:17:54 -0700793int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
794{
795 struct rt6_info *ort = (struct rt6_info *) *dstp;
796 struct rt6_info *rt = (struct rt6_info *)
797 dst_alloc(&ip6_dst_blackhole_ops);
798 struct dst_entry *new = NULL;
799
800 if (rt) {
801 new = &rt->u.dst;
802
803 atomic_set(&new->__refcnt, 1);
804 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800805 new->input = dst_discard;
806 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700807
808 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
809 new->dev = ort->u.dst.dev;
810 if (new->dev)
811 dev_hold(new->dev);
812 rt->rt6i_idev = ort->rt6i_idev;
813 if (rt->rt6i_idev)
814 in6_dev_hold(rt->rt6i_idev);
815 rt->rt6i_expires = 0;
816
817 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
818 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
819 rt->rt6i_metric = 0;
820
821 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
822#ifdef CONFIG_IPV6_SUBTREES
823 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
824#endif
825
826 dst_free(new);
827 }
828
829 dst_release(*dstp);
830 *dstp = new;
831 return (new ? 0 : -ENOMEM);
832}
833EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
834
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835/*
836 * Destination cache support functions
837 */
838
839static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
840{
841 struct rt6_info *rt;
842
843 rt = (struct rt6_info *) dst;
844
845 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
846 return dst;
847
848 return NULL;
849}
850
851static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
852{
853 struct rt6_info *rt = (struct rt6_info *) dst;
854
855 if (rt) {
856 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700857 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 else
859 dst_release(dst);
860 }
861 return NULL;
862}
863
864static void ip6_link_failure(struct sk_buff *skb)
865{
866 struct rt6_info *rt;
867
868 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
869
870 rt = (struct rt6_info *) skb->dst;
871 if (rt) {
872 if (rt->rt6i_flags&RTF_CACHE) {
873 dst_set_expires(&rt->u.dst, 0);
874 rt->rt6i_flags |= RTF_EXPIRES;
875 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
876 rt->rt6i_node->fn_sernum = -1;
877 }
878}
879
880static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
881{
882 struct rt6_info *rt6 = (struct rt6_info*)dst;
883
884 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
885 rt6->rt6i_flags |= RTF_MODIFIED;
886 if (mtu < IPV6_MIN_MTU) {
887 mtu = IPV6_MIN_MTU;
888 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
889 }
890 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700891 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892 }
893}
894
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895static int ipv6_get_mtu(struct net_device *dev);
896
Daniel Lezcano55786892008-03-04 13:47:47 -0800897static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898{
899 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
900
Daniel Lezcano55786892008-03-04 13:47:47 -0800901 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
902 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903
904 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900905 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
906 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
907 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908 * rely only on pmtu discovery"
909 */
910 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
911 mtu = IPV6_MAXPLEN;
912 return mtu;
913}
914
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800915static struct dst_entry *icmp6_dst_gc_list;
916static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700917
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800918struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 struct neighbour *neigh,
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800920 struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921{
922 struct rt6_info *rt;
923 struct inet6_dev *idev = in6_dev_get(dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800924 struct net *net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925
926 if (unlikely(idev == NULL))
927 return NULL;
928
929 rt = ip6_dst_alloc();
930 if (unlikely(rt == NULL)) {
931 in6_dev_put(idev);
932 goto out;
933 }
934
935 dev_hold(dev);
936 if (neigh)
937 neigh_hold(neigh);
938 else
939 neigh = ndisc_get_neigh(dev, addr);
940
941 rt->rt6i_dev = dev;
942 rt->rt6i_idev = idev;
943 rt->rt6i_nexthop = neigh;
944 atomic_set(&rt->u.dst.__refcnt, 1);
945 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
946 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800947 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800948 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949
950#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900951 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
952 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 : 0;
954 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
955 rt->rt6i_dst.plen = 128;
956#endif
957
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800958 spin_lock_bh(&icmp6_dst_lock);
959 rt->u.dst.next = icmp6_dst_gc_list;
960 icmp6_dst_gc_list = &rt->u.dst;
961 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962
Daniel Lezcano55786892008-03-04 13:47:47 -0800963 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964
965out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900966 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967}
968
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800969int icmp6_dst_gc(int *more)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970{
971 struct dst_entry *dst, *next, **pprev;
972 int freed;
973
974 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900975 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700976
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800977 spin_lock_bh(&icmp6_dst_lock);
978 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700979
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 while ((dst = *pprev) != NULL) {
981 if (!atomic_read(&dst->__refcnt)) {
982 *pprev = dst->next;
983 dst_free(dst);
984 freed++;
985 } else {
986 pprev = &dst->next;
987 (*more)++;
988 }
989 }
990
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800991 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700992
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 return freed;
994}
995
Daniel Lezcano569d3642008-01-18 03:56:57 -0800996static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997{
998 static unsigned expire = 30*HZ;
999 static unsigned long last_gc;
1000 unsigned long now = jiffies;
1001
Daniel Lezcano49905092008-01-10 03:01:01 -08001002 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1003 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004 goto out;
1005
1006 expire++;
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08001007 fib6_run_gc(expire, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 last_gc = now;
1009 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
Daniel Lezcano49905092008-01-10 03:01:01 -08001010 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011
1012out:
Daniel Lezcano49905092008-01-10 03:01:01 -08001013 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1014 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015}
1016
1017/* Clean host part of a prefix. Not necessary in radix tree,
1018 but results in cleaner routing tables.
1019
1020 Remove it only when all the things will work!
1021 */
1022
1023static int ipv6_get_mtu(struct net_device *dev)
1024{
1025 int mtu = IPV6_MIN_MTU;
1026 struct inet6_dev *idev;
1027
1028 idev = in6_dev_get(dev);
1029 if (idev) {
1030 mtu = idev->cnf.mtu6;
1031 in6_dev_put(idev);
1032 }
1033 return mtu;
1034}
1035
1036int ipv6_get_hoplimit(struct net_device *dev)
1037{
1038 int hoplimit = ipv6_devconf.hop_limit;
1039 struct inet6_dev *idev;
1040
1041 idev = in6_dev_get(dev);
1042 if (idev) {
1043 hoplimit = idev->cnf.hop_limit;
1044 in6_dev_put(idev);
1045 }
1046 return hoplimit;
1047}
1048
1049/*
1050 *
1051 */
1052
Thomas Graf86872cb2006-08-22 00:01:08 -07001053int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054{
1055 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001056 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 struct rt6_info *rt = NULL;
1058 struct net_device *dev = NULL;
1059 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001060 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 int addr_type;
1062
Thomas Graf86872cb2006-08-22 00:01:08 -07001063 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064 return -EINVAL;
1065#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001066 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 return -EINVAL;
1068#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001069 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001071 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 if (!dev)
1073 goto out;
1074 idev = in6_dev_get(dev);
1075 if (!idev)
1076 goto out;
1077 }
1078
Thomas Graf86872cb2006-08-22 00:01:08 -07001079 if (cfg->fc_metric == 0)
1080 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
Daniel Lezcano55786892008-03-04 13:47:47 -08001082 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001083 if (table == NULL) {
1084 err = -ENOBUFS;
1085 goto out;
1086 }
1087
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 rt = ip6_dst_alloc();
1089
1090 if (rt == NULL) {
1091 err = -ENOMEM;
1092 goto out;
1093 }
1094
1095 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001096 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001097
Thomas Graf86872cb2006-08-22 00:01:08 -07001098 if (cfg->fc_protocol == RTPROT_UNSPEC)
1099 cfg->fc_protocol = RTPROT_BOOT;
1100 rt->rt6i_protocol = cfg->fc_protocol;
1101
1102 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103
1104 if (addr_type & IPV6_ADDR_MULTICAST)
1105 rt->u.dst.input = ip6_mc_input;
1106 else
1107 rt->u.dst.input = ip6_forward;
1108
1109 rt->u.dst.output = ip6_output;
1110
Thomas Graf86872cb2006-08-22 00:01:08 -07001111 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1112 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 if (rt->rt6i_dst.plen == 128)
1114 rt->u.dst.flags = DST_HOST;
1115
1116#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001117 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1118 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119#endif
1120
Thomas Graf86872cb2006-08-22 00:01:08 -07001121 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122
1123 /* We cannot add true routes via loopback here,
1124 they would result in kernel looping; promote them to reject routes
1125 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001126 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1128 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001129 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 if (dev) {
1131 dev_put(dev);
1132 in6_dev_put(idev);
1133 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001134 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 dev_hold(dev);
1136 idev = in6_dev_get(dev);
1137 if (!idev) {
1138 err = -ENODEV;
1139 goto out;
1140 }
1141 }
1142 rt->u.dst.output = ip6_pkt_discard_out;
1143 rt->u.dst.input = ip6_pkt_discard;
1144 rt->u.dst.error = -ENETUNREACH;
1145 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1146 goto install_route;
1147 }
1148
Thomas Graf86872cb2006-08-22 00:01:08 -07001149 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 struct in6_addr *gw_addr;
1151 int gwa_type;
1152
Thomas Graf86872cb2006-08-22 00:01:08 -07001153 gw_addr = &cfg->fc_gateway;
1154 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 gwa_type = ipv6_addr_type(gw_addr);
1156
1157 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1158 struct rt6_info *grt;
1159
1160 /* IPv6 strictly inhibits using not link-local
1161 addresses as nexthop address.
1162 Otherwise, router will not able to send redirects.
1163 It is very good, but in some (rare!) circumstances
1164 (SIT, PtP, NBMA NOARP links) it is handy to allow
1165 some exceptions. --ANK
1166 */
1167 err = -EINVAL;
1168 if (!(gwa_type&IPV6_ADDR_UNICAST))
1169 goto out;
1170
Daniel Lezcano55786892008-03-04 13:47:47 -08001171 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172
1173 err = -EHOSTUNREACH;
1174 if (grt == NULL)
1175 goto out;
1176 if (dev) {
1177 if (dev != grt->rt6i_dev) {
1178 dst_release(&grt->u.dst);
1179 goto out;
1180 }
1181 } else {
1182 dev = grt->rt6i_dev;
1183 idev = grt->rt6i_idev;
1184 dev_hold(dev);
1185 in6_dev_hold(grt->rt6i_idev);
1186 }
1187 if (!(grt->rt6i_flags&RTF_GATEWAY))
1188 err = 0;
1189 dst_release(&grt->u.dst);
1190
1191 if (err)
1192 goto out;
1193 }
1194 err = -EINVAL;
1195 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1196 goto out;
1197 }
1198
1199 err = -ENODEV;
1200 if (dev == NULL)
1201 goto out;
1202
Thomas Graf86872cb2006-08-22 00:01:08 -07001203 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001204 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1205 if (IS_ERR(rt->rt6i_nexthop)) {
1206 err = PTR_ERR(rt->rt6i_nexthop);
1207 rt->rt6i_nexthop = NULL;
1208 goto out;
1209 }
1210 }
1211
Thomas Graf86872cb2006-08-22 00:01:08 -07001212 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213
1214install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001215 if (cfg->fc_mx) {
1216 struct nlattr *nla;
1217 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218
Thomas Graf86872cb2006-08-22 00:01:08 -07001219 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001220 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001221
1222 if (type) {
1223 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 err = -EINVAL;
1225 goto out;
1226 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001227
1228 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 }
1231 }
1232
1233 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1234 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1235 if (!rt->u.dst.metrics[RTAX_MTU-1])
1236 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1237 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
Daniel Lezcano55786892008-03-04 13:47:47 -08001238 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 rt->u.dst.dev = dev;
1240 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001241 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001242
1243 cfg->fc_nlinfo.nl_net = dev->nd_net;
1244
Thomas Graf86872cb2006-08-22 00:01:08 -07001245 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246
1247out:
1248 if (dev)
1249 dev_put(dev);
1250 if (idev)
1251 in6_dev_put(idev);
1252 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001253 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 return err;
1255}
1256
Thomas Graf86872cb2006-08-22 00:01:08 -07001257static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258{
1259 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001260 struct fib6_table *table;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001261 struct net *net = rt->rt6i_dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001263 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001264 return -ENOENT;
1265
Thomas Grafc71099a2006-08-04 23:20:06 -07001266 table = rt->rt6i_table;
1267 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
Thomas Graf86872cb2006-08-22 00:01:08 -07001269 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 dst_release(&rt->u.dst);
1271
Thomas Grafc71099a2006-08-04 23:20:06 -07001272 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273
1274 return err;
1275}
1276
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001277int ip6_del_rt(struct rt6_info *rt)
1278{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001279 struct nl_info info = {
Daniel Lezcano55786892008-03-04 13:47:47 -08001280 .nl_net = rt->rt6i_dev->nd_net,
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001281 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001282 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001283}
1284
Thomas Graf86872cb2006-08-22 00:01:08 -07001285static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286{
Thomas Grafc71099a2006-08-04 23:20:06 -07001287 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001288 struct fib6_node *fn;
1289 struct rt6_info *rt;
1290 int err = -ESRCH;
1291
Daniel Lezcano55786892008-03-04 13:47:47 -08001292 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001293 if (table == NULL)
1294 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295
Thomas Grafc71099a2006-08-04 23:20:06 -07001296 read_lock_bh(&table->tb6_lock);
1297
1298 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001299 &cfg->fc_dst, cfg->fc_dst_len,
1300 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001301
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001303 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001304 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001306 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001308 if (cfg->fc_flags & RTF_GATEWAY &&
1309 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001311 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 continue;
1313 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001314 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315
Thomas Graf86872cb2006-08-22 00:01:08 -07001316 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 }
1318 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001319 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320
1321 return err;
1322}
1323
1324/*
1325 * Handle redirects
1326 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001327struct ip6rd_flowi {
1328 struct flowi fl;
1329 struct in6_addr gateway;
1330};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001332static struct rt6_info *__ip6_route_redirect(struct net *net,
1333 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001334 struct flowi *fl,
1335 int flags)
1336{
1337 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1338 struct rt6_info *rt;
1339 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001340
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001342 * Get the "current" route for this destination and
1343 * check if the redirect has come from approriate router.
1344 *
1345 * RFC 2461 specifies that redirects should only be
1346 * accepted if they come from the nexthop to the target.
1347 * Due to the way the routes are chosen, this notion
1348 * is a bit fuzzy and one might need to check all possible
1349 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
Thomas Grafc71099a2006-08-04 23:20:06 -07001352 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001353 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001354restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001355 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001356 /*
1357 * Current route is on-link; redirect is always invalid.
1358 *
1359 * Seems, previous statement is not true. It could
1360 * be node, which looks for us as on-link (f.e. proxy ndisc)
1361 * But then router serving it might decide, that we should
1362 * know truth 8)8) --ANK (980726).
1363 */
1364 if (rt6_check_expired(rt))
1365 continue;
1366 if (!(rt->rt6i_flags & RTF_GATEWAY))
1367 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001368 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001369 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001370 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001371 continue;
1372 break;
1373 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001374
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001375 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001376 rt = net->ipv6.ip6_null_entry;
1377 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001378out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001379 dst_hold(&rt->u.dst);
1380
1381 read_unlock_bh(&table->tb6_lock);
1382
1383 return rt;
1384};
1385
1386static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1387 struct in6_addr *src,
1388 struct in6_addr *gateway,
1389 struct net_device *dev)
1390{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001391 int flags = RT6_LOOKUP_F_HAS_SADDR;
Daniel Lezcano55786892008-03-04 13:47:47 -08001392 struct net *net = dev->nd_net;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001393 struct ip6rd_flowi rdfl = {
1394 .fl = {
1395 .oif = dev->ifindex,
1396 .nl_u = {
1397 .ip6_u = {
1398 .daddr = *dest,
1399 .saddr = *src,
1400 },
1401 },
1402 },
1403 .gateway = *gateway,
1404 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001405
1406 if (rt6_need_strict(dest))
1407 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001408
Daniel Lezcano55786892008-03-04 13:47:47 -08001409 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001410 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001411}
1412
1413void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1414 struct in6_addr *saddr,
1415 struct neighbour *neigh, u8 *lladdr, int on_link)
1416{
1417 struct rt6_info *rt, *nrt = NULL;
1418 struct netevent_redirect netevent;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001419 struct net *net = neigh->dev->nd_net;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001420
1421 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1422
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001423 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 if (net_ratelimit())
1425 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1426 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001427 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 }
1429
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 /*
1431 * We have finally decided to accept it.
1432 */
1433
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001434 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1436 NEIGH_UPDATE_F_OVERRIDE|
1437 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1438 NEIGH_UPDATE_F_ISROUTER))
1439 );
1440
1441 /*
1442 * Redirect received -> path was valid.
1443 * Look, redirects are sent only in response to data packets,
1444 * so that this nexthop apparently is reachable. --ANK
1445 */
1446 dst_confirm(&rt->u.dst);
1447
1448 /* Duplicate redirect: silently ignore. */
1449 if (neigh == rt->u.dst.neighbour)
1450 goto out;
1451
1452 nrt = ip6_rt_copy(rt);
1453 if (nrt == NULL)
1454 goto out;
1455
1456 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1457 if (on_link)
1458 nrt->rt6i_flags &= ~RTF_GATEWAY;
1459
1460 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1461 nrt->rt6i_dst.plen = 128;
1462 nrt->u.dst.flags |= DST_HOST;
1463
1464 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1465 nrt->rt6i_nexthop = neigh_clone(neigh);
1466 /* Reset pmtu, it may be better */
1467 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001468 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1469 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470
Thomas Graf40e22e82006-08-22 00:00:45 -07001471 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 goto out;
1473
Tom Tucker8d717402006-07-30 20:43:36 -07001474 netevent.old = &rt->u.dst;
1475 netevent.new = &nrt->u.dst;
1476 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1477
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001479 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 return;
1481 }
1482
1483out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001484 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 return;
1486}
1487
1488/*
1489 * Handle ICMP "packet too big" messages
1490 * i.e. Path MTU discovery
1491 */
1492
1493void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1494 struct net_device *dev, u32 pmtu)
1495{
1496 struct rt6_info *rt, *nrt;
Daniel Lezcano55786892008-03-04 13:47:47 -08001497 struct net *net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 int allfrag = 0;
1499
Daniel Lezcano55786892008-03-04 13:47:47 -08001500 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 if (rt == NULL)
1502 return;
1503
1504 if (pmtu >= dst_mtu(&rt->u.dst))
1505 goto out;
1506
1507 if (pmtu < IPV6_MIN_MTU) {
1508 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001509 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 * MTU (1280) and a fragment header should always be included
1511 * after a node receiving Too Big message reporting PMTU is
1512 * less than the IPv6 Minimum Link MTU.
1513 */
1514 pmtu = IPV6_MIN_MTU;
1515 allfrag = 1;
1516 }
1517
1518 /* New mtu received -> path was valid.
1519 They are sent only in response to data packets,
1520 so that this nexthop apparently is reachable. --ANK
1521 */
1522 dst_confirm(&rt->u.dst);
1523
1524 /* Host route. If it is static, it would be better
1525 not to override it, but add new one, so that
1526 when cache entry will expire old pmtu
1527 would return automatically.
1528 */
1529 if (rt->rt6i_flags & RTF_CACHE) {
1530 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1531 if (allfrag)
1532 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001533 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1535 goto out;
1536 }
1537
1538 /* Network route.
1539 Two cases are possible:
1540 1. It is connected route. Action: COW
1541 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1542 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001543 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001544 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001545 else
1546 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001547
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001548 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001549 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1550 if (allfrag)
1551 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1552
1553 /* According to RFC 1981, detecting PMTU increase shouldn't be
1554 * happened within 5 mins, the recommended timer is 10 mins.
1555 * Here this route expiration time is set to ip6_rt_mtu_expires
1556 * which is 10 mins. After 10 mins the decreased pmtu is expired
1557 * and detecting PMTU increase will be automatically happened.
1558 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001559 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001560 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1561
Thomas Graf40e22e82006-08-22 00:00:45 -07001562 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564out:
1565 dst_release(&rt->u.dst);
1566}
1567
1568/*
1569 * Misc support functions
1570 */
1571
1572static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1573{
1574 struct rt6_info *rt = ip6_dst_alloc();
1575
1576 if (rt) {
1577 rt->u.dst.input = ort->u.dst.input;
1578 rt->u.dst.output = ort->u.dst.output;
1579
1580 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001581 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 rt->u.dst.dev = ort->u.dst.dev;
1583 if (rt->u.dst.dev)
1584 dev_hold(rt->u.dst.dev);
1585 rt->rt6i_idev = ort->rt6i_idev;
1586 if (rt->rt6i_idev)
1587 in6_dev_hold(rt->rt6i_idev);
1588 rt->u.dst.lastuse = jiffies;
1589 rt->rt6i_expires = 0;
1590
1591 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1592 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1593 rt->rt6i_metric = 0;
1594
1595 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1596#ifdef CONFIG_IPV6_SUBTREES
1597 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1598#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001599 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 }
1601 return rt;
1602}
1603
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001604#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001605static struct rt6_info *rt6_get_route_info(struct net *net,
1606 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001607 struct in6_addr *gwaddr, int ifindex)
1608{
1609 struct fib6_node *fn;
1610 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001611 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001612
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001613 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001614 if (table == NULL)
1615 return NULL;
1616
1617 write_lock_bh(&table->tb6_lock);
1618 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001619 if (!fn)
1620 goto out;
1621
Eric Dumazet7cc48262007-02-09 16:22:57 -08001622 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001623 if (rt->rt6i_dev->ifindex != ifindex)
1624 continue;
1625 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1626 continue;
1627 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1628 continue;
1629 dst_hold(&rt->u.dst);
1630 break;
1631 }
1632out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001633 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001634 return rt;
1635}
1636
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001637static struct rt6_info *rt6_add_route_info(struct net *net,
1638 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001639 struct in6_addr *gwaddr, int ifindex,
1640 unsigned pref)
1641{
Thomas Graf86872cb2006-08-22 00:01:08 -07001642 struct fib6_config cfg = {
1643 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001644 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001645 .fc_ifindex = ifindex,
1646 .fc_dst_len = prefixlen,
1647 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1648 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001649 .fc_nlinfo.pid = 0,
1650 .fc_nlinfo.nlh = NULL,
1651 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001652 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001653
Thomas Graf86872cb2006-08-22 00:01:08 -07001654 ipv6_addr_copy(&cfg.fc_dst, prefix);
1655 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1656
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001657 /* We should treat it as a default route if prefix length is 0. */
1658 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001659 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001660
Thomas Graf86872cb2006-08-22 00:01:08 -07001661 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001662
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001663 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001664}
1665#endif
1666
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001668{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001670 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671
Daniel Lezcano55786892008-03-04 13:47:47 -08001672 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001673 if (table == NULL)
1674 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675
Thomas Grafc71099a2006-08-04 23:20:06 -07001676 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001677 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001679 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1681 break;
1682 }
1683 if (rt)
1684 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001685 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 return rt;
1687}
1688
Fred L. Templinc7dc89c2007-11-29 22:11:40 +11001689EXPORT_SYMBOL(rt6_get_dflt_router);
1690
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001692 struct net_device *dev,
1693 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694{
Thomas Graf86872cb2006-08-22 00:01:08 -07001695 struct fib6_config cfg = {
1696 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001697 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001698 .fc_ifindex = dev->ifindex,
1699 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1700 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001701 .fc_nlinfo.pid = 0,
1702 .fc_nlinfo.nlh = NULL,
1703 .fc_nlinfo.nl_net = dev->nd_net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001704 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705
Thomas Graf86872cb2006-08-22 00:01:08 -07001706 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707
Thomas Graf86872cb2006-08-22 00:01:08 -07001708 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 return rt6_get_dflt_router(gwaddr, dev);
1711}
1712
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001713void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714{
1715 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001716 struct fib6_table *table;
1717
1718 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001719 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001720 if (table == NULL)
1721 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722
1723restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001724 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001725 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1727 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001728 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001729 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 goto restart;
1731 }
1732 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001733 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734}
1735
Daniel Lezcano55786892008-03-04 13:47:47 -08001736static void rtmsg_to_fib6_config(struct net *net,
1737 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001738 struct fib6_config *cfg)
1739{
1740 memset(cfg, 0, sizeof(*cfg));
1741
1742 cfg->fc_table = RT6_TABLE_MAIN;
1743 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1744 cfg->fc_metric = rtmsg->rtmsg_metric;
1745 cfg->fc_expires = rtmsg->rtmsg_info;
1746 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1747 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1748 cfg->fc_flags = rtmsg->rtmsg_flags;
1749
Daniel Lezcano55786892008-03-04 13:47:47 -08001750 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001751
Thomas Graf86872cb2006-08-22 00:01:08 -07001752 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1753 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1754 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1755}
1756
Daniel Lezcano55786892008-03-04 13:47:47 -08001757int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758{
Thomas Graf86872cb2006-08-22 00:01:08 -07001759 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 struct in6_rtmsg rtmsg;
1761 int err;
1762
1763 switch(cmd) {
1764 case SIOCADDRT: /* Add a route */
1765 case SIOCDELRT: /* Delete a route */
1766 if (!capable(CAP_NET_ADMIN))
1767 return -EPERM;
1768 err = copy_from_user(&rtmsg, arg,
1769 sizeof(struct in6_rtmsg));
1770 if (err)
1771 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001772
Daniel Lezcano55786892008-03-04 13:47:47 -08001773 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001774
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 rtnl_lock();
1776 switch (cmd) {
1777 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001778 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779 break;
1780 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001781 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 break;
1783 default:
1784 err = -EINVAL;
1785 }
1786 rtnl_unlock();
1787
1788 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001789 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790
1791 return -EINVAL;
1792}
1793
1794/*
1795 * Drop the packet on the floor
1796 */
1797
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001798static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001800 int type;
1801 switch (ipstats_mib_noroutes) {
1802 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001803 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001804 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1805 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1806 break;
1807 }
1808 /* FALLTHROUGH */
1809 case IPSTATS_MIB_OUTNOROUTES:
1810 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1811 break;
1812 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001813 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 kfree_skb(skb);
1815 return 0;
1816}
1817
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001818static int ip6_pkt_discard(struct sk_buff *skb)
1819{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001820 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001821}
1822
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001823static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824{
1825 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001826 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827}
1828
David S. Miller6723ab52006-10-18 21:20:57 -07001829#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1830
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001831static int ip6_pkt_prohibit(struct sk_buff *skb)
1832{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001833 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001834}
1835
1836static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1837{
1838 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001839 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001840}
1841
David S. Miller6723ab52006-10-18 21:20:57 -07001842#endif
1843
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844/*
1845 * Allocate a dst for local (unicast / anycast) address.
1846 */
1847
1848struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1849 const struct in6_addr *addr,
1850 int anycast)
1851{
Daniel Lezcano55786892008-03-04 13:47:47 -08001852 struct net *net = idev->dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853 struct rt6_info *rt = ip6_dst_alloc();
1854
1855 if (rt == NULL)
1856 return ERR_PTR(-ENOMEM);
1857
Daniel Lezcano55786892008-03-04 13:47:47 -08001858 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 in6_dev_hold(idev);
1860
1861 rt->u.dst.flags = DST_HOST;
1862 rt->u.dst.input = ip6_input;
1863 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001864 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 rt->rt6i_idev = idev;
1866 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001867 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1869 rt->u.dst.obsolete = -1;
1870
1871 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001872 if (anycast)
1873 rt->rt6i_flags |= RTF_ANYCAST;
1874 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 rt->rt6i_flags |= RTF_LOCAL;
1876 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1877 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001878 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 return ERR_PTR(-ENOMEM);
1880 }
1881
1882 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1883 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001884 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885
1886 atomic_set(&rt->u.dst.__refcnt, 1);
1887
1888 return rt;
1889}
1890
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001891struct arg_dev_net {
1892 struct net_device *dev;
1893 struct net *net;
1894};
1895
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896static int fib6_ifdown(struct rt6_info *rt, void *arg)
1897{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001898 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1899 struct net *net = ((struct arg_dev_net *)arg)->net;
1900
1901 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1902 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 RT6_TRACE("deleted by ifdown %p\n", rt);
1904 return -1;
1905 }
1906 return 0;
1907}
1908
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001909void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001911 struct arg_dev_net adn = {
1912 .dev = dev,
1913 .net = net,
1914 };
1915
1916 fib6_clean_all(net, fib6_ifdown, 0, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917}
1918
1919struct rt6_mtu_change_arg
1920{
1921 struct net_device *dev;
1922 unsigned mtu;
1923};
1924
1925static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1926{
1927 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1928 struct inet6_dev *idev;
Daniel Lezcano55786892008-03-04 13:47:47 -08001929 struct net *net = arg->dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930
1931 /* In IPv6 pmtu discovery is not optional,
1932 so that RTAX_MTU lock cannot disable it.
1933 We still use this lock to block changes
1934 caused by addrconf/ndisc.
1935 */
1936
1937 idev = __in6_dev_get(arg->dev);
1938 if (idev == NULL)
1939 return 0;
1940
1941 /* For administrative MTU increase, there is no way to discover
1942 IPv6 PMTU increase, so PMTU increase should be updated here.
1943 Since RFC 1981 doesn't include administrative MTU increase
1944 update PMTU increase is a MUST. (i.e. jumbo frame)
1945 */
1946 /*
1947 If new MTU is less than route PMTU, this new MTU will be the
1948 lowest MTU in the path, update the route PMTU to reflect PMTU
1949 decreases; if new MTU is greater than route PMTU, and the
1950 old MTU is the lowest MTU in the path, update the route PMTU
1951 to reflect the increase. In this case if the other nodes' MTU
1952 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1953 PMTU discouvery.
1954 */
1955 if (rt->rt6i_dev == arg->dev &&
1956 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08001957 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001958 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001959 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08001961 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07001962 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 return 0;
1964}
1965
1966void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1967{
Thomas Grafc71099a2006-08-04 23:20:06 -07001968 struct rt6_mtu_change_arg arg = {
1969 .dev = dev,
1970 .mtu = mtu,
1971 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001973 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974}
1975
Patrick McHardyef7c79e2007-06-05 12:38:30 -07001976static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07001977 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001978 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001979 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001980 [RTA_PRIORITY] = { .type = NLA_U32 },
1981 [RTA_METRICS] = { .type = NLA_NESTED },
1982};
1983
1984static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1985 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986{
Thomas Graf86872cb2006-08-22 00:01:08 -07001987 struct rtmsg *rtm;
1988 struct nlattr *tb[RTA_MAX+1];
1989 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
Thomas Graf86872cb2006-08-22 00:01:08 -07001991 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1992 if (err < 0)
1993 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994
Thomas Graf86872cb2006-08-22 00:01:08 -07001995 err = -EINVAL;
1996 rtm = nlmsg_data(nlh);
1997 memset(cfg, 0, sizeof(*cfg));
1998
1999 cfg->fc_table = rtm->rtm_table;
2000 cfg->fc_dst_len = rtm->rtm_dst_len;
2001 cfg->fc_src_len = rtm->rtm_src_len;
2002 cfg->fc_flags = RTF_UP;
2003 cfg->fc_protocol = rtm->rtm_protocol;
2004
2005 if (rtm->rtm_type == RTN_UNREACHABLE)
2006 cfg->fc_flags |= RTF_REJECT;
2007
2008 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2009 cfg->fc_nlinfo.nlh = nlh;
Benjamin Thery2216b482008-01-30 19:09:35 -08002010 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
Thomas Graf86872cb2006-08-22 00:01:08 -07002011
2012 if (tb[RTA_GATEWAY]) {
2013 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2014 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002016
2017 if (tb[RTA_DST]) {
2018 int plen = (rtm->rtm_dst_len + 7) >> 3;
2019
2020 if (nla_len(tb[RTA_DST]) < plen)
2021 goto errout;
2022
2023 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002025
2026 if (tb[RTA_SRC]) {
2027 int plen = (rtm->rtm_src_len + 7) >> 3;
2028
2029 if (nla_len(tb[RTA_SRC]) < plen)
2030 goto errout;
2031
2032 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002034
2035 if (tb[RTA_OIF])
2036 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2037
2038 if (tb[RTA_PRIORITY])
2039 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2040
2041 if (tb[RTA_METRICS]) {
2042 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2043 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002045
2046 if (tb[RTA_TABLE])
2047 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2048
2049 err = 0;
2050errout:
2051 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052}
2053
Thomas Grafc127ea22007-03-22 11:58:32 -07002054static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055{
Thomas Graf86872cb2006-08-22 00:01:08 -07002056 struct fib6_config cfg;
2057 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058
Thomas Graf86872cb2006-08-22 00:01:08 -07002059 err = rtm_to_fib6_config(skb, nlh, &cfg);
2060 if (err < 0)
2061 return err;
2062
2063 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064}
2065
Thomas Grafc127ea22007-03-22 11:58:32 -07002066static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067{
Thomas Graf86872cb2006-08-22 00:01:08 -07002068 struct fib6_config cfg;
2069 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070
Thomas Graf86872cb2006-08-22 00:01:08 -07002071 err = rtm_to_fib6_config(skb, nlh, &cfg);
2072 if (err < 0)
2073 return err;
2074
2075 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076}
2077
Thomas Graf339bf982006-11-10 14:10:15 -08002078static inline size_t rt6_nlmsg_size(void)
2079{
2080 return NLMSG_ALIGN(sizeof(struct rtmsg))
2081 + nla_total_size(16) /* RTA_SRC */
2082 + nla_total_size(16) /* RTA_DST */
2083 + nla_total_size(16) /* RTA_GATEWAY */
2084 + nla_total_size(16) /* RTA_PREFSRC */
2085 + nla_total_size(4) /* RTA_TABLE */
2086 + nla_total_size(4) /* RTA_IIF */
2087 + nla_total_size(4) /* RTA_OIF */
2088 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002089 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002090 + nla_total_size(sizeof(struct rta_cacheinfo));
2091}
2092
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002094 struct in6_addr *dst, struct in6_addr *src,
2095 int iif, int type, u32 pid, u32 seq,
2096 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097{
2098 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002099 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002100 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002101 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102
2103 if (prefix) { /* user wants prefix routes only */
2104 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2105 /* success since this is not a prefix route */
2106 return 1;
2107 }
2108 }
2109
Thomas Graf2d7202b2006-08-22 00:01:27 -07002110 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2111 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002112 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002113
2114 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 rtm->rtm_family = AF_INET6;
2116 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2117 rtm->rtm_src_len = rt->rt6i_src.plen;
2118 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002119 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002120 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002121 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002122 table = RT6_TABLE_UNSPEC;
2123 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002124 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002125 if (rt->rt6i_flags&RTF_REJECT)
2126 rtm->rtm_type = RTN_UNREACHABLE;
2127 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2128 rtm->rtm_type = RTN_LOCAL;
2129 else
2130 rtm->rtm_type = RTN_UNICAST;
2131 rtm->rtm_flags = 0;
2132 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2133 rtm->rtm_protocol = rt->rt6i_protocol;
2134 if (rt->rt6i_flags&RTF_DYNAMIC)
2135 rtm->rtm_protocol = RTPROT_REDIRECT;
2136 else if (rt->rt6i_flags & RTF_ADDRCONF)
2137 rtm->rtm_protocol = RTPROT_KERNEL;
2138 else if (rt->rt6i_flags&RTF_DEFAULT)
2139 rtm->rtm_protocol = RTPROT_RA;
2140
2141 if (rt->rt6i_flags&RTF_CACHE)
2142 rtm->rtm_flags |= RTM_F_CLONED;
2143
2144 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002145 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002146 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002148 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149#ifdef CONFIG_IPV6_SUBTREES
2150 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002151 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002152 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002153 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002154 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155#endif
2156 if (iif)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002157 NLA_PUT_U32(skb, RTA_IIF, iif);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158 else if (dst) {
2159 struct in6_addr saddr_buf;
YOSHIFUJI Hideaki5e5f3f02008-03-03 21:44:34 +09002160 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2161 dst, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002162 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002163 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002164
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002166 goto nla_put_failure;
2167
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002169 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2170
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002172 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2173
2174 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002175
2176 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2177 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2178 expires, rt->u.dst.error) < 0)
2179 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180
Thomas Graf2d7202b2006-08-22 00:01:27 -07002181 return nlmsg_end(skb, nlh);
2182
2183nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002184 nlmsg_cancel(skb, nlh);
2185 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186}
2187
Patrick McHardy1b43af52006-08-10 23:11:17 -07002188int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189{
2190 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2191 int prefix;
2192
Thomas Graf2d7202b2006-08-22 00:01:27 -07002193 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2194 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2196 } else
2197 prefix = 0;
2198
2199 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2200 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002201 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202}
2203
Thomas Grafc127ea22007-03-22 11:58:32 -07002204static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205{
Denis V. Lunevb8542722007-12-01 00:21:31 +11002206 struct net *net = in_skb->sk->sk_net;
Thomas Grafab364a62006-08-22 00:01:47 -07002207 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002209 struct sk_buff *skb;
2210 struct rtmsg *rtm;
2211 struct flowi fl;
2212 int err, iif = 0;
2213
2214 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2215 if (err < 0)
2216 goto errout;
2217
2218 err = -EINVAL;
2219 memset(&fl, 0, sizeof(fl));
2220
2221 if (tb[RTA_SRC]) {
2222 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2223 goto errout;
2224
2225 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2226 }
2227
2228 if (tb[RTA_DST]) {
2229 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2230 goto errout;
2231
2232 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2233 }
2234
2235 if (tb[RTA_IIF])
2236 iif = nla_get_u32(tb[RTA_IIF]);
2237
2238 if (tb[RTA_OIF])
2239 fl.oif = nla_get_u32(tb[RTA_OIF]);
2240
2241 if (iif) {
2242 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002243 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002244 if (!dev) {
2245 err = -ENODEV;
2246 goto errout;
2247 }
2248 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249
2250 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002251 if (skb == NULL) {
2252 err = -ENOBUFS;
2253 goto errout;
2254 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255
2256 /* Reserve room for dummy headers, this skb can pass
2257 through good chunk of routing engine.
2258 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002259 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2261
Thomas Grafab364a62006-08-22 00:01:47 -07002262 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 skb->dst = &rt->u.dst;
2264
Thomas Grafab364a62006-08-22 00:01:47 -07002265 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002267 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002269 kfree_skb(skb);
2270 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 }
2272
Daniel Lezcano55786892008-03-04 13:47:47 -08002273 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002274errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002275 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276}
2277
Thomas Graf86872cb2006-08-22 00:01:08 -07002278void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279{
2280 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002281 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002282 u32 seq;
2283 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002285 err = -ENOBUFS;
2286 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002287
Thomas Graf339bf982006-11-10 14:10:15 -08002288 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002289 if (skb == NULL)
2290 goto errout;
2291
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002292 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2293 event, info->pid, seq, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002294 if (err < 0) {
2295 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2296 WARN_ON(err == -EMSGSIZE);
2297 kfree_skb(skb);
2298 goto errout;
2299 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002300 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2301 info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002302errout:
2303 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002304 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305}
2306
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002307static int ip6_route_dev_notify(struct notifier_block *this,
2308 unsigned long event, void *data)
2309{
2310 struct net_device *dev = (struct net_device *)data;
2311 struct net *net = dev->nd_net;
2312
2313 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2314 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2315 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2316#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2317 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2318 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2319 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2320 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2321#endif
2322 }
2323
2324 return NOTIFY_OK;
2325}
2326
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327/*
2328 * /proc
2329 */
2330
2331#ifdef CONFIG_PROC_FS
2332
2333#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2334
2335struct rt6_proc_arg
2336{
2337 char *buffer;
2338 int offset;
2339 int length;
2340 int skip;
2341 int len;
2342};
2343
2344static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2345{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002346 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002348 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2349 rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350
2351#ifdef CONFIG_IPV6_SUBTREES
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002352 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2353 rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002355 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356#endif
2357
2358 if (rt->rt6i_nexthop) {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002359 seq_printf(m, NIP6_SEQFMT,
2360 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002362 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002364 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2365 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2366 rt->u.dst.__use, rt->rt6i_flags,
2367 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368 return 0;
2369}
2370
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002371static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002373 struct net *net = (struct net *)m->private;
2374 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002375 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376}
2377
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002378static int ipv6_route_open(struct inode *inode, struct file *file)
2379{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002380 struct net *net = get_proc_net(inode);
2381 if (!net)
2382 return -ENXIO;
2383 return single_open(file, ipv6_route_show, net);
2384}
2385
2386static int ipv6_route_release(struct inode *inode, struct file *file)
2387{
2388 struct seq_file *seq = file->private_data;
2389 struct net *net = seq->private;
2390 put_net(net);
2391 return single_release(inode, file);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002392}
2393
2394static const struct file_operations ipv6_route_proc_fops = {
2395 .owner = THIS_MODULE,
2396 .open = ipv6_route_open,
2397 .read = seq_read,
2398 .llseek = seq_lseek,
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002399 .release = ipv6_route_release,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002400};
2401
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2403{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002404 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002406 net->ipv6.rt6_stats->fib_nodes,
2407 net->ipv6.rt6_stats->fib_route_nodes,
2408 net->ipv6.rt6_stats->fib_rt_alloc,
2409 net->ipv6.rt6_stats->fib_rt_entries,
2410 net->ipv6.rt6_stats->fib_rt_cache,
Benjamin Theryc5728722008-03-03 23:34:17 -08002411 atomic_read(&ip6_dst_ops.entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002412 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413
2414 return 0;
2415}
2416
2417static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2418{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002419 struct net *net = get_proc_net(inode);
2420 return single_open(file, rt6_stats_seq_show, net);
2421}
2422
2423static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2424{
2425 struct seq_file *seq = file->private_data;
2426 struct net *net = (struct net *)seq->private;
2427 put_net(net);
2428 return single_release(inode, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429}
2430
Arjan van de Ven9a321442007-02-12 00:55:35 -08002431static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 .owner = THIS_MODULE,
2433 .open = rt6_stats_seq_open,
2434 .read = seq_read,
2435 .llseek = seq_lseek,
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002436 .release = rt6_stats_seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437};
2438#endif /* CONFIG_PROC_FS */
2439
2440#ifdef CONFIG_SYSCTL
2441
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442static
2443int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2444 void __user *buffer, size_t *lenp, loff_t *ppos)
2445{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002446 struct net *net = current->nsproxy->net_ns;
2447 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 if (write) {
2449 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002450 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451 return 0;
2452 } else
2453 return -EINVAL;
2454}
2455
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002456ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002457 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002459 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002461 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002462 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463 },
2464 {
2465 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2466 .procname = "gc_thresh",
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002467 .data = &ip6_dst_ops.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 .maxlen = sizeof(int),
2469 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002470 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471 },
2472 {
2473 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2474 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002475 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 .maxlen = sizeof(int),
2477 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002478 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 },
2480 {
2481 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2482 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002483 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 .maxlen = sizeof(int),
2485 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002486 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002487 .strategy = &sysctl_jiffies,
2488 },
2489 {
2490 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2491 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002492 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 .maxlen = sizeof(int),
2494 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002495 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 .strategy = &sysctl_jiffies,
2497 },
2498 {
2499 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2500 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002501 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502 .maxlen = sizeof(int),
2503 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002504 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002505 .strategy = &sysctl_jiffies,
2506 },
2507 {
2508 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2509 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002510 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 .maxlen = sizeof(int),
2512 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002513 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514 .strategy = &sysctl_jiffies,
2515 },
2516 {
2517 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2518 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002519 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520 .maxlen = sizeof(int),
2521 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002522 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523 .strategy = &sysctl_jiffies,
2524 },
2525 {
2526 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2527 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002528 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002529 .maxlen = sizeof(int),
2530 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002531 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 .strategy = &sysctl_jiffies,
2533 },
2534 {
2535 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2536 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002537 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 .maxlen = sizeof(int),
2539 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002540 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 .strategy = &sysctl_ms_jiffies,
2542 },
2543 { .ctl_name = 0 }
2544};
2545
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002546struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2547{
2548 struct ctl_table *table;
2549
2550 table = kmemdup(ipv6_route_table_template,
2551 sizeof(ipv6_route_table_template),
2552 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002553
2554 if (table) {
2555 table[0].data = &net->ipv6.sysctl.flush_delay;
2556 /* table[1].data will be handled when we have
2557 routes per namespace */
2558 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2559 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2560 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2561 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2562 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2563 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2564 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2565 }
2566
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002567 return table;
2568}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569#endif
2570
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002571static int ip6_route_net_init(struct net *net)
2572{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002573 int ret = 0;
2574
2575 ret = -ENOMEM;
2576 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2577 sizeof(*net->ipv6.ip6_null_entry),
2578 GFP_KERNEL);
2579 if (!net->ipv6.ip6_null_entry)
2580 goto out;
2581 net->ipv6.ip6_null_entry->u.dst.path =
2582 (struct dst_entry *)net->ipv6.ip6_null_entry;
2583
2584#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2585 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2586 sizeof(*net->ipv6.ip6_prohibit_entry),
2587 GFP_KERNEL);
2588 if (!net->ipv6.ip6_prohibit_entry) {
2589 kfree(net->ipv6.ip6_null_entry);
2590 goto out;
2591 }
2592 net->ipv6.ip6_prohibit_entry->u.dst.path =
2593 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2594
2595 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2596 sizeof(*net->ipv6.ip6_blk_hole_entry),
2597 GFP_KERNEL);
2598 if (!net->ipv6.ip6_blk_hole_entry) {
2599 kfree(net->ipv6.ip6_null_entry);
2600 kfree(net->ipv6.ip6_prohibit_entry);
2601 goto out;
2602 }
2603 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2604 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2605#endif
2606
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002607#ifdef CONFIG_PROC_FS
2608 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2609 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2610#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002611 ret = 0;
2612out:
2613 return ret;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002614}
2615
2616static void ip6_route_net_exit(struct net *net)
2617{
2618#ifdef CONFIG_PROC_FS
2619 proc_net_remove(net, "ipv6_route");
2620 proc_net_remove(net, "rt6_stats");
2621#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002622 kfree(net->ipv6.ip6_null_entry);
2623#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2624 kfree(net->ipv6.ip6_prohibit_entry);
2625 kfree(net->ipv6.ip6_blk_hole_entry);
2626#endif
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002627}
2628
2629static struct pernet_operations ip6_route_net_ops = {
2630 .init = ip6_route_net_init,
2631 .exit = ip6_route_net_exit,
2632};
2633
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002634static struct notifier_block ip6_route_dev_notifier = {
2635 .notifier_call = ip6_route_dev_notify,
2636 .priority = 0,
2637};
2638
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002639int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002641 int ret;
2642
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002643 ip6_dst_ops.kmem_cachep =
2644 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
Daniel Lezcanof845ab62007-12-07 00:45:16 -08002645 SLAB_HWCACHE_ALIGN, NULL);
2646 if (!ip6_dst_ops.kmem_cachep)
2647 return -ENOMEM;
2648
David S. Miller14e50e52007-05-24 18:17:54 -07002649 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2650
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002651 ret = register_pernet_subsys(&ip6_route_net_ops);
2652 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002653 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002654
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002655 /* Registering of the loopback is done before this portion of code,
2656 * the loopback reference in rt6_info will not be taken, do it
2657 * manually for init_net */
2658 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2659 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2660 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2661 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2662 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2663 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2664 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2665 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002666 ret = fib6_init();
2667 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002668 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002669
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002670 ret = xfrm6_init();
2671 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002672 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002673
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002674 ret = fib6_rules_init();
2675 if (ret)
2676 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002677
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002678 ret = -ENOBUFS;
2679 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2680 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2681 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2682 goto fib6_rules_init;
2683
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002684 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002685 if (ret)
2686 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002687
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002688out:
2689 return ret;
2690
2691fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002692 fib6_rules_cleanup();
2693xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002694 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002695out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002696 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002697out_register_subsys:
2698 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002699out_kmem_cache:
2700 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2701 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002702}
2703
2704void ip6_route_cleanup(void)
2705{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002706 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002707 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002709 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002710 unregister_pernet_subsys(&ip6_route_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002711 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002712}