blob: fb7ff8f0c6dbf3c4b16d0fc2a048ca3732964dbe [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020043#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070054#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070055#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080074#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Linus Torvalds1da177e2005-04-16 15:20:36 -070076static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080082static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080090static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080094static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 struct in6_addr *gwaddr, int ifindex);
97#endif
98
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -080099static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700110 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800112 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113};
114
David S. Miller14e50e52007-05-24 18:17:54 -0700115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116{
117}
118
119static struct dst_ops ip6_dst_blackhole_ops = {
120 .family = AF_INET6,
121 .protocol = __constant_htons(ETH_P_IPV6),
122 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800126 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700127};
128
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800129static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 .u = {
131 .dst = {
132 .__refcnt = ATOMIC_INIT(1),
133 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 .obsolete = -1,
135 .error = -ENETUNREACH,
136 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
137 .input = ip6_pkt_discard,
138 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 }
140 },
141 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
142 .rt6i_metric = ~(u32) 0,
143 .rt6i_ref = ATOMIC_INIT(1),
144};
145
Thomas Graf101367c2006-08-04 03:39:02 -0700146#ifdef CONFIG_IPV6_MULTIPLE_TABLES
147
David S. Miller6723ab52006-10-18 21:20:57 -0700148static int ip6_pkt_prohibit(struct sk_buff *skb);
149static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700150
Adrian Bunk280a34c2008-04-21 02:29:32 -0700151static struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700152 .u = {
153 .dst = {
154 .__refcnt = ATOMIC_INIT(1),
155 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700156 .obsolete = -1,
157 .error = -EACCES,
158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700159 .input = ip6_pkt_prohibit,
160 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800168static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800176 .input = dst_discard,
177 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800188static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800190 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900201 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800209 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900210 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800212 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
213 struct inet6_dev *loopback_idev =
214 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 if (loopback_idev != NULL) {
216 rt->rt6i_idev = loopback_idev;
217 in6_dev_put(idev);
218 }
219 }
220}
221
222static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223{
224 return (rt->rt6i_flags & RTF_EXPIRES &&
225 time_after(jiffies, rt->rt6i_expires));
226}
227
Thomas Grafc71099a2006-08-04 23:20:06 -0700228static inline int rt6_need_strict(struct in6_addr *daddr)
229{
230 return (ipv6_addr_type(daddr) &
YOSHIFUJI Hideaki5ce83af2008-06-25 16:58:17 +0900231 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
Thomas Grafc71099a2006-08-04 23:20:06 -0700232}
233
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700235 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 */
237
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800238static inline struct rt6_info *rt6_device_match(struct net *net,
239 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900240 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700242 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243{
244 struct rt6_info *local = NULL;
245 struct rt6_info *sprt;
246
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900247 if (!oif && ipv6_addr_any(saddr))
248 goto out;
249
250 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
251 struct net_device *dev = sprt->rt6i_dev;
252
253 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 if (dev->ifindex == oif)
255 return sprt;
256 if (dev->flags & IFF_LOOPBACK) {
257 if (sprt->rt6i_idev == NULL ||
258 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700259 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900261 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 local->rt6i_idev->dev->ifindex == oif))
263 continue;
264 }
265 local = sprt;
266 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900267 } else {
268 if (ipv6_chk_addr(net, saddr, dev,
269 flags & RT6_LOOKUP_F_IFACE))
270 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900274 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (local)
276 return local;
277
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700278 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800279 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900281out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 return rt;
283}
284
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800285#ifdef CONFIG_IPV6_ROUTER_PREF
286static void rt6_probe(struct rt6_info *rt)
287{
288 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289 /*
290 * Okay, this does not seem to be appropriate
291 * for now, however, we need to check if it
292 * is really so; aka Router Reachability Probing.
293 *
294 * Router Reachability Probe MUST be rate-limited
295 * to no more than one per minute.
296 */
297 if (!neigh || (neigh->nud_state & NUD_VALID))
298 return;
299 read_lock_bh(&neigh->lock);
300 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800301 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800302 struct in6_addr mcaddr;
303 struct in6_addr *target;
304
305 neigh->updated = jiffies;
306 read_unlock_bh(&neigh->lock);
307
308 target = (struct in6_addr *)&neigh->primary_key;
309 addrconf_addr_solict_mult(target, &mcaddr);
310 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311 } else
312 read_unlock_bh(&neigh->lock);
313}
314#else
315static inline void rt6_probe(struct rt6_info *rt)
316{
317 return;
318}
319#endif
320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800322 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700324static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800326 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700327 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800328 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700329 if ((dev->flags & IFF_LOOPBACK) &&
330 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331 return 1;
332 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333}
334
Dave Jonesb6f99a22007-03-22 12:27:49 -0700335static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800337 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800338 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700339 if (rt->rt6i_flags & RTF_NONEXTHOP ||
340 !(rt->rt6i_flags & RTF_GATEWAY))
341 m = 1;
342 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800343 read_lock_bh(&neigh->lock);
344 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700345 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800346#ifdef CONFIG_IPV6_ROUTER_PREF
347 else if (neigh->nud_state & NUD_FAILED)
348 m = 0;
349#endif
350 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800351 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800352 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800353 } else
354 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800355 return m;
356}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800358static int rt6_score_route(struct rt6_info *rt, int oif,
359 int strict)
360{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700361 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900362
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700363 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700364 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800365 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800366#ifdef CONFIG_IPV6_ROUTER_PREF
367 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
368#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700369 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800370 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800371 return -1;
372 return m;
373}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
David S. Millerf11e6652007-03-24 20:36:25 -0700375static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
376 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800377{
David S. Millerf11e6652007-03-24 20:36:25 -0700378 int m;
379
380 if (rt6_check_expired(rt))
381 goto out;
382
383 m = rt6_score_route(rt, oif, strict);
384 if (m < 0)
385 goto out;
386
387 if (m > *mpri) {
388 if (strict & RT6_LOOKUP_F_REACHABLE)
389 rt6_probe(match);
390 *mpri = m;
391 match = rt;
392 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
393 rt6_probe(rt);
394 }
395
396out:
397 return match;
398}
399
400static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
401 struct rt6_info *rr_head,
402 u32 metric, int oif, int strict)
403{
404 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800405 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
David S. Millerf11e6652007-03-24 20:36:25 -0700407 match = NULL;
408 for (rt = rr_head; rt && rt->rt6i_metric == metric;
409 rt = rt->u.dst.rt6_next)
410 match = find_match(rt, oif, strict, &mpri, match);
411 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
412 rt = rt->u.dst.rt6_next)
413 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800414
David S. Millerf11e6652007-03-24 20:36:25 -0700415 return match;
416}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800417
David S. Millerf11e6652007-03-24 20:36:25 -0700418static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
419{
420 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800421 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
David S. Millerf11e6652007-03-24 20:36:25 -0700423 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800424 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
David S. Millerf11e6652007-03-24 20:36:25 -0700426 rt0 = fn->rr_ptr;
427 if (!rt0)
428 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
David S. Millerf11e6652007-03-24 20:36:25 -0700430 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800432 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700433 (strict & RT6_LOOKUP_F_REACHABLE)) {
434 struct rt6_info *next = rt0->u.dst.rt6_next;
435
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800436 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700437 if (!next || next->rt6i_metric != rt0->rt6i_metric)
438 next = fn->leaf;
439
440 if (next != rt0)
441 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 }
443
David S. Millerf11e6652007-03-24 20:36:25 -0700444 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800445 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900447 net = dev_net(rt0->rt6i_dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800448 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449}
450
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800451#ifdef CONFIG_IPV6_ROUTE_INFO
452int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
453 struct in6_addr *gwaddr)
454{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900455 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800456 struct route_info *rinfo = (struct route_info *) opt;
457 struct in6_addr prefix_buf, *prefix;
458 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900459 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800460 struct rt6_info *rt;
461
462 if (len < sizeof(struct route_info)) {
463 return -EINVAL;
464 }
465
466 /* Sanity check for prefix_len and length */
467 if (rinfo->length > 3) {
468 return -EINVAL;
469 } else if (rinfo->prefix_len > 128) {
470 return -EINVAL;
471 } else if (rinfo->prefix_len > 64) {
472 if (rinfo->length < 2) {
473 return -EINVAL;
474 }
475 } else if (rinfo->prefix_len > 0) {
476 if (rinfo->length < 1) {
477 return -EINVAL;
478 }
479 }
480
481 pref = rinfo->route_pref;
482 if (pref == ICMPV6_ROUTER_PREF_INVALID)
483 pref = ICMPV6_ROUTER_PREF_MEDIUM;
484
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900485 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800486
487 if (rinfo->length == 3)
488 prefix = (struct in6_addr *)rinfo->prefix;
489 else {
490 /* this function is safe */
491 ipv6_addr_prefix(&prefix_buf,
492 (struct in6_addr *)rinfo->prefix,
493 rinfo->prefix_len);
494 prefix = &prefix_buf;
495 }
496
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800497 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800499
500 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700501 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800502 rt = NULL;
503 }
504
505 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800506 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800507 pref);
508 else if (rt)
509 rt->rt6i_flags = RTF_ROUTEINFO |
510 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
511
512 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900513 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800514 rt->rt6i_flags &= ~RTF_EXPIRES;
515 } else {
516 rt->rt6i_expires = jiffies + HZ * lifetime;
517 rt->rt6i_flags |= RTF_EXPIRES;
518 }
519 dst_release(&rt->u.dst);
520 }
521 return 0;
522}
523#endif
524
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800525#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700526do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800527 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700529 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700530 if (fn->fn_flags & RTN_TL_ROOT) \
531 goto out; \
532 pn = fn->parent; \
533 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800534 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700535 else \
536 fn = pn; \
537 if (fn->fn_flags & RTN_RTINFO) \
538 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700539 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700540 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700541} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700542
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800543static struct rt6_info *ip6_pol_route_lookup(struct net *net,
544 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700545 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546{
547 struct fib6_node *fn;
548 struct rt6_info *rt;
549
Thomas Grafc71099a2006-08-04 23:20:06 -0700550 read_lock_bh(&table->tb6_lock);
551 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552restart:
553 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900554 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800555 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700556out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800557 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700558 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 return rt;
560
561}
562
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900563struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
564 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700565{
566 struct flowi fl = {
567 .oif = oif,
568 .nl_u = {
569 .ip6_u = {
570 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700571 },
572 },
573 };
574 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700576
Thomas Grafadaa70b2006-10-13 15:01:03 -0700577 if (saddr) {
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 }
581
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800582 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700583 if (dst->error == 0)
584 return (struct rt6_info *) dst;
585
586 dst_release(dst);
587
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 return NULL;
589}
590
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900591EXPORT_SYMBOL(rt6_lookup);
592
Thomas Grafc71099a2006-08-04 23:20:06 -0700593/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
596 be destroyed.
597 */
598
Thomas Graf86872cb2006-08-22 00:01:08 -0700599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600{
601 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700602 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
Thomas Grafc71099a2006-08-04 23:20:06 -0700604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700606 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700607 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 return err;
610}
611
Thomas Graf40e22e82006-08-22 00:00:45 -0700612int ip6_ins_rt(struct rt6_info *rt)
613{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800614 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900615 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800616 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800617 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700618}
619
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800620static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
621 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 struct rt6_info *rt;
624
625 /*
626 * Clone the route.
627 */
628
629 rt = ip6_rt_copy(ort);
630
631 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900632 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
633 if (rt->rt6i_dst.plen != 128 &&
634 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
635 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900637 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900639 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 rt->rt6i_dst.plen = 128;
641 rt->rt6i_flags |= RTF_CACHE;
642 rt->u.dst.flags |= DST_HOST;
643
644#ifdef CONFIG_IPV6_SUBTREES
645 if (rt->rt6i_src.plen && saddr) {
646 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
647 rt->rt6i_src.plen = 128;
648 }
649#endif
650
651 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
652
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800653 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800655 return rt;
656}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800658static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659{
660 struct rt6_info *rt = ip6_rt_copy(ort);
661 if (rt) {
662 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663 rt->rt6i_dst.plen = 128;
664 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800665 rt->u.dst.flags |= DST_HOST;
666 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667 }
668 return rt;
669}
670
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800671static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
672 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673{
674 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700676 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800678 int err;
YOSHIFUJI Hideakiea659e02006-11-06 09:45:45 -0800679 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700681 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700684 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800686restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700687 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688
689restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700690 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800691
692 BACKTRACK(net, &fl->fl6_src);
693 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800694 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800695 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800697 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700698 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800699
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800700 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800701 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800702 else {
703#if CLONE_OFFLINK_ROUTE
704 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
705#else
706 goto out2;
707#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800709
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800711 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800712
713 dst_hold(&rt->u.dst);
714 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700715 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800716 if (!err)
717 goto out2;
718 }
719
720 if (--attempts <= 0)
721 goto out2;
722
723 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700724 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800725 * released someone could insert this route. Relookup.
726 */
727 dst_release(&rt->u.dst);
728 goto relookup;
729
730out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800731 if (reachable) {
732 reachable = 0;
733 goto restart_2;
734 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800735 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700736 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737out2:
738 rt->u.dst.lastuse = jiffies;
739 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700740
741 return rt;
742}
743
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800744static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700745 struct flowi *fl, int flags)
746{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800747 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700748}
749
Thomas Grafc71099a2006-08-04 23:20:06 -0700750void ip6_route_input(struct sk_buff *skb)
751{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700752 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900753 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700754 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700755 struct flowi fl = {
756 .iif = skb->dev->ifindex,
757 .nl_u = {
758 .ip6_u = {
759 .daddr = iph->daddr,
760 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800761 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700762 },
763 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900764 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700765 .proto = iph->nexthdr,
766 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700767
768 if (rt6_need_strict(&iph->daddr))
769 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700770
Daniel Lezcano55786892008-03-04 13:47:47 -0800771 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700772}
773
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800774static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700775 struct flowi *fl, int flags)
776{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800777 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700778}
779
Daniel Lezcano4591db42008-03-05 10:48:10 -0800780struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
781 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700782{
783 int flags = 0;
784
785 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700786 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700787
Thomas Grafadaa70b2006-10-13 15:01:03 -0700788 if (!ipv6_addr_any(&fl->fl6_src))
789 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +0900790 else if (sk) {
791 unsigned int prefs = inet6_sk(sk)->srcprefs;
792 if (prefs & IPV6_PREFER_SRC_TMP)
793 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
794 if (prefs & IPV6_PREFER_SRC_PUBLIC)
795 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
796 if (prefs & IPV6_PREFER_SRC_COA)
797 flags |= RT6_LOOKUP_F_SRCPREF_COA;
798 }
Thomas Grafadaa70b2006-10-13 15:01:03 -0700799
Daniel Lezcano4591db42008-03-05 10:48:10 -0800800 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801}
802
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900803EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
David S. Miller14e50e52007-05-24 18:17:54 -0700805int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
806{
807 struct rt6_info *ort = (struct rt6_info *) *dstp;
808 struct rt6_info *rt = (struct rt6_info *)
809 dst_alloc(&ip6_dst_blackhole_ops);
810 struct dst_entry *new = NULL;
811
812 if (rt) {
813 new = &rt->u.dst;
814
815 atomic_set(&new->__refcnt, 1);
816 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800817 new->input = dst_discard;
818 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700819
820 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
821 new->dev = ort->u.dst.dev;
822 if (new->dev)
823 dev_hold(new->dev);
824 rt->rt6i_idev = ort->rt6i_idev;
825 if (rt->rt6i_idev)
826 in6_dev_hold(rt->rt6i_idev);
827 rt->rt6i_expires = 0;
828
829 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
830 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
831 rt->rt6i_metric = 0;
832
833 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
834#ifdef CONFIG_IPV6_SUBTREES
835 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
836#endif
837
838 dst_free(new);
839 }
840
841 dst_release(*dstp);
842 *dstp = new;
843 return (new ? 0 : -ENOMEM);
844}
845EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
846
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847/*
848 * Destination cache support functions
849 */
850
851static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
852{
853 struct rt6_info *rt;
854
855 rt = (struct rt6_info *) dst;
856
857 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
858 return dst;
859
860 return NULL;
861}
862
863static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
864{
865 struct rt6_info *rt = (struct rt6_info *) dst;
866
867 if (rt) {
868 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700869 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 else
871 dst_release(dst);
872 }
873 return NULL;
874}
875
876static void ip6_link_failure(struct sk_buff *skb)
877{
878 struct rt6_info *rt;
879
880 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
881
882 rt = (struct rt6_info *) skb->dst;
883 if (rt) {
884 if (rt->rt6i_flags&RTF_CACHE) {
885 dst_set_expires(&rt->u.dst, 0);
886 rt->rt6i_flags |= RTF_EXPIRES;
887 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
888 rt->rt6i_node->fn_sernum = -1;
889 }
890}
891
892static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
893{
894 struct rt6_info *rt6 = (struct rt6_info*)dst;
895
896 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
897 rt6->rt6i_flags |= RTF_MODIFIED;
898 if (mtu < IPV6_MIN_MTU) {
899 mtu = IPV6_MIN_MTU;
900 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
901 }
902 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700903 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 }
905}
906
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907static int ipv6_get_mtu(struct net_device *dev);
908
Daniel Lezcano55786892008-03-04 13:47:47 -0800909static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910{
911 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
912
Daniel Lezcano55786892008-03-04 13:47:47 -0800913 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
914 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915
916 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900917 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
918 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
919 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 * rely only on pmtu discovery"
921 */
922 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
923 mtu = IPV6_MAXPLEN;
924 return mtu;
925}
926
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800927static struct dst_entry *icmp6_dst_gc_list;
928static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700929
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800930struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900932 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933{
934 struct rt6_info *rt;
935 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900936 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938 if (unlikely(idev == NULL))
939 return NULL;
940
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800941 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 if (unlikely(rt == NULL)) {
943 in6_dev_put(idev);
944 goto out;
945 }
946
947 dev_hold(dev);
948 if (neigh)
949 neigh_hold(neigh);
950 else
951 neigh = ndisc_get_neigh(dev, addr);
952
953 rt->rt6i_dev = dev;
954 rt->rt6i_idev = idev;
955 rt->rt6i_nexthop = neigh;
956 atomic_set(&rt->u.dst.__refcnt, 1);
957 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
958 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800959 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800960 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961
962#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900963 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
964 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 : 0;
966 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
967 rt->rt6i_dst.plen = 128;
968#endif
969
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800970 spin_lock_bh(&icmp6_dst_lock);
971 rt->u.dst.next = icmp6_dst_gc_list;
972 icmp6_dst_gc_list = &rt->u.dst;
973 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974
Daniel Lezcano55786892008-03-04 13:47:47 -0800975 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976
977out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900978 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979}
980
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800981int icmp6_dst_gc(int *more)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982{
983 struct dst_entry *dst, *next, **pprev;
984 int freed;
985
986 next = NULL;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900987 freed = 0;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700988
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800989 spin_lock_bh(&icmp6_dst_lock);
990 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700991
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 while ((dst = *pprev) != NULL) {
993 if (!atomic_read(&dst->__refcnt)) {
994 *pprev = dst->next;
995 dst_free(dst);
996 freed++;
997 } else {
998 pprev = &dst->next;
999 (*more)++;
1000 }
1001 }
1002
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001003 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001004
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 return freed;
1006}
1007
Daniel Lezcano569d3642008-01-18 03:56:57 -08001008static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 unsigned long now = jiffies;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001011 struct net *net = ops->dst_net;
1012 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1013 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1014 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1015 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1016 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017
Daniel Lezcano7019b782008-03-04 13:50:14 -08001018 if (time_after(rt_last_gc + rt_min_interval, now) &&
1019 atomic_read(&ops->entries) <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 goto out;
1021
Benjamin Thery6891a342008-03-04 13:49:47 -08001022 net->ipv6.ip6_rt_gc_expire++;
1023 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1024 net->ipv6.ip6_rt_last_gc = now;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001025 if (atomic_read(&ops->entries) < ops->gc_thresh)
1026 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001028 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1029 return (atomic_read(&ops->entries) > rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030}
1031
1032/* Clean host part of a prefix. Not necessary in radix tree,
1033 but results in cleaner routing tables.
1034
1035 Remove it only when all the things will work!
1036 */
1037
1038static int ipv6_get_mtu(struct net_device *dev)
1039{
1040 int mtu = IPV6_MIN_MTU;
1041 struct inet6_dev *idev;
1042
1043 idev = in6_dev_get(dev);
1044 if (idev) {
1045 mtu = idev->cnf.mtu6;
1046 in6_dev_put(idev);
1047 }
1048 return mtu;
1049}
1050
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001051int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001053 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1054 if (hoplimit < 0) {
1055 struct net_device *dev = dst->dev;
1056 struct inet6_dev *idev = in6_dev_get(dev);
1057 if (idev) {
1058 hoplimit = idev->cnf.hop_limit;
1059 in6_dev_put(idev);
1060 } else
1061 hoplimit = ipv6_devconf.hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 }
1063 return hoplimit;
1064}
1065
1066/*
1067 *
1068 */
1069
Thomas Graf86872cb2006-08-22 00:01:08 -07001070int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071{
1072 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001073 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074 struct rt6_info *rt = NULL;
1075 struct net_device *dev = NULL;
1076 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001077 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 int addr_type;
1079
Thomas Graf86872cb2006-08-22 00:01:08 -07001080 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 return -EINVAL;
1082#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001083 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 return -EINVAL;
1085#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001086 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001087 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001088 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 if (!dev)
1090 goto out;
1091 idev = in6_dev_get(dev);
1092 if (!idev)
1093 goto out;
1094 }
1095
Thomas Graf86872cb2006-08-22 00:01:08 -07001096 if (cfg->fc_metric == 0)
1097 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098
Daniel Lezcano55786892008-03-04 13:47:47 -08001099 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001100 if (table == NULL) {
1101 err = -ENOBUFS;
1102 goto out;
1103 }
1104
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001105 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106
1107 if (rt == NULL) {
1108 err = -ENOMEM;
1109 goto out;
1110 }
1111
1112 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001113 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1114 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1115 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116
Thomas Graf86872cb2006-08-22 00:01:08 -07001117 if (cfg->fc_protocol == RTPROT_UNSPEC)
1118 cfg->fc_protocol = RTPROT_BOOT;
1119 rt->rt6i_protocol = cfg->fc_protocol;
1120
1121 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122
1123 if (addr_type & IPV6_ADDR_MULTICAST)
1124 rt->u.dst.input = ip6_mc_input;
1125 else
1126 rt->u.dst.input = ip6_forward;
1127
1128 rt->u.dst.output = ip6_output;
1129
Thomas Graf86872cb2006-08-22 00:01:08 -07001130 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1131 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 if (rt->rt6i_dst.plen == 128)
1133 rt->u.dst.flags = DST_HOST;
1134
1135#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001136 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1137 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138#endif
1139
Thomas Graf86872cb2006-08-22 00:01:08 -07001140 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141
1142 /* We cannot add true routes via loopback here,
1143 they would result in kernel looping; promote them to reject routes
1144 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001145 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1147 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001148 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 if (dev) {
1150 dev_put(dev);
1151 in6_dev_put(idev);
1152 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001153 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 dev_hold(dev);
1155 idev = in6_dev_get(dev);
1156 if (!idev) {
1157 err = -ENODEV;
1158 goto out;
1159 }
1160 }
1161 rt->u.dst.output = ip6_pkt_discard_out;
1162 rt->u.dst.input = ip6_pkt_discard;
1163 rt->u.dst.error = -ENETUNREACH;
1164 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1165 goto install_route;
1166 }
1167
Thomas Graf86872cb2006-08-22 00:01:08 -07001168 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 struct in6_addr *gw_addr;
1170 int gwa_type;
1171
Thomas Graf86872cb2006-08-22 00:01:08 -07001172 gw_addr = &cfg->fc_gateway;
1173 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 gwa_type = ipv6_addr_type(gw_addr);
1175
1176 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1177 struct rt6_info *grt;
1178
1179 /* IPv6 strictly inhibits using not link-local
1180 addresses as nexthop address.
1181 Otherwise, router will not able to send redirects.
1182 It is very good, but in some (rare!) circumstances
1183 (SIT, PtP, NBMA NOARP links) it is handy to allow
1184 some exceptions. --ANK
1185 */
1186 err = -EINVAL;
1187 if (!(gwa_type&IPV6_ADDR_UNICAST))
1188 goto out;
1189
Daniel Lezcano55786892008-03-04 13:47:47 -08001190 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191
1192 err = -EHOSTUNREACH;
1193 if (grt == NULL)
1194 goto out;
1195 if (dev) {
1196 if (dev != grt->rt6i_dev) {
1197 dst_release(&grt->u.dst);
1198 goto out;
1199 }
1200 } else {
1201 dev = grt->rt6i_dev;
1202 idev = grt->rt6i_idev;
1203 dev_hold(dev);
1204 in6_dev_hold(grt->rt6i_idev);
1205 }
1206 if (!(grt->rt6i_flags&RTF_GATEWAY))
1207 err = 0;
1208 dst_release(&grt->u.dst);
1209
1210 if (err)
1211 goto out;
1212 }
1213 err = -EINVAL;
1214 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1215 goto out;
1216 }
1217
1218 err = -ENODEV;
1219 if (dev == NULL)
1220 goto out;
1221
Thomas Graf86872cb2006-08-22 00:01:08 -07001222 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1224 if (IS_ERR(rt->rt6i_nexthop)) {
1225 err = PTR_ERR(rt->rt6i_nexthop);
1226 rt->rt6i_nexthop = NULL;
1227 goto out;
1228 }
1229 }
1230
Thomas Graf86872cb2006-08-22 00:01:08 -07001231 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
1233install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001234 if (cfg->fc_mx) {
1235 struct nlattr *nla;
1236 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237
Thomas Graf86872cb2006-08-22 00:01:08 -07001238 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001239 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001240
1241 if (type) {
1242 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 err = -EINVAL;
1244 goto out;
1245 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001246
1247 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 }
1250 }
1251
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001252 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001254 if (!dst_metric(&rt->u.dst, RTAX_MTU))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001256 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
Daniel Lezcano55786892008-03-04 13:47:47 -08001257 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 rt->u.dst.dev = dev;
1259 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001260 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001261
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001262 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001263
Thomas Graf86872cb2006-08-22 00:01:08 -07001264 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265
1266out:
1267 if (dev)
1268 dev_put(dev);
1269 if (idev)
1270 in6_dev_put(idev);
1271 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001272 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 return err;
1274}
1275
Thomas Graf86872cb2006-08-22 00:01:08 -07001276static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277{
1278 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001279 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001280 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001282 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001283 return -ENOENT;
1284
Thomas Grafc71099a2006-08-04 23:20:06 -07001285 table = rt->rt6i_table;
1286 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287
Thomas Graf86872cb2006-08-22 00:01:08 -07001288 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 dst_release(&rt->u.dst);
1290
Thomas Grafc71099a2006-08-04 23:20:06 -07001291 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292
1293 return err;
1294}
1295
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001296int ip6_del_rt(struct rt6_info *rt)
1297{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001298 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001299 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001300 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001301 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001302}
1303
Thomas Graf86872cb2006-08-22 00:01:08 -07001304static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305{
Thomas Grafc71099a2006-08-04 23:20:06 -07001306 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 struct fib6_node *fn;
1308 struct rt6_info *rt;
1309 int err = -ESRCH;
1310
Daniel Lezcano55786892008-03-04 13:47:47 -08001311 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001312 if (table == NULL)
1313 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314
Thomas Grafc71099a2006-08-04 23:20:06 -07001315 read_lock_bh(&table->tb6_lock);
1316
1317 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001318 &cfg->fc_dst, cfg->fc_dst_len,
1319 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001320
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001322 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001323 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001325 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001327 if (cfg->fc_flags & RTF_GATEWAY &&
1328 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001330 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 continue;
1332 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001333 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334
Thomas Graf86872cb2006-08-22 00:01:08 -07001335 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 }
1337 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001338 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339
1340 return err;
1341}
1342
1343/*
1344 * Handle redirects
1345 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001346struct ip6rd_flowi {
1347 struct flowi fl;
1348 struct in6_addr gateway;
1349};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001351static struct rt6_info *__ip6_route_redirect(struct net *net,
1352 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001353 struct flowi *fl,
1354 int flags)
1355{
1356 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1357 struct rt6_info *rt;
1358 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001359
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001361 * Get the "current" route for this destination and
1362 * check if the redirect has come from approriate router.
1363 *
1364 * RFC 2461 specifies that redirects should only be
1365 * accepted if they come from the nexthop to the target.
1366 * Due to the way the routes are chosen, this notion
1367 * is a bit fuzzy and one might need to check all possible
1368 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370
Thomas Grafc71099a2006-08-04 23:20:06 -07001371 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001372 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001373restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001375 /*
1376 * Current route is on-link; redirect is always invalid.
1377 *
1378 * Seems, previous statement is not true. It could
1379 * be node, which looks for us as on-link (f.e. proxy ndisc)
1380 * But then router serving it might decide, that we should
1381 * know truth 8)8) --ANK (980726).
1382 */
1383 if (rt6_check_expired(rt))
1384 continue;
1385 if (!(rt->rt6i_flags & RTF_GATEWAY))
1386 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001387 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001388 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001389 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001390 continue;
1391 break;
1392 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001393
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001394 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001395 rt = net->ipv6.ip6_null_entry;
1396 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001397out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001398 dst_hold(&rt->u.dst);
1399
1400 read_unlock_bh(&table->tb6_lock);
1401
1402 return rt;
1403};
1404
1405static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1406 struct in6_addr *src,
1407 struct in6_addr *gateway,
1408 struct net_device *dev)
1409{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001410 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001411 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001412 struct ip6rd_flowi rdfl = {
1413 .fl = {
1414 .oif = dev->ifindex,
1415 .nl_u = {
1416 .ip6_u = {
1417 .daddr = *dest,
1418 .saddr = *src,
1419 },
1420 },
1421 },
1422 .gateway = *gateway,
1423 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001424
1425 if (rt6_need_strict(dest))
1426 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001427
Daniel Lezcano55786892008-03-04 13:47:47 -08001428 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001429 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001430}
1431
1432void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1433 struct in6_addr *saddr,
1434 struct neighbour *neigh, u8 *lladdr, int on_link)
1435{
1436 struct rt6_info *rt, *nrt = NULL;
1437 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001438 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001439
1440 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1441
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001442 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 if (net_ratelimit())
1444 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1445 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001446 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 }
1448
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 /*
1450 * We have finally decided to accept it.
1451 */
1452
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001453 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1455 NEIGH_UPDATE_F_OVERRIDE|
1456 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1457 NEIGH_UPDATE_F_ISROUTER))
1458 );
1459
1460 /*
1461 * Redirect received -> path was valid.
1462 * Look, redirects are sent only in response to data packets,
1463 * so that this nexthop apparently is reachable. --ANK
1464 */
1465 dst_confirm(&rt->u.dst);
1466
1467 /* Duplicate redirect: silently ignore. */
1468 if (neigh == rt->u.dst.neighbour)
1469 goto out;
1470
1471 nrt = ip6_rt_copy(rt);
1472 if (nrt == NULL)
1473 goto out;
1474
1475 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1476 if (on_link)
1477 nrt->rt6i_flags &= ~RTF_GATEWAY;
1478
1479 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1480 nrt->rt6i_dst.plen = 128;
1481 nrt->u.dst.flags |= DST_HOST;
1482
1483 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1484 nrt->rt6i_nexthop = neigh_clone(neigh);
1485 /* Reset pmtu, it may be better */
1486 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001487 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
Daniel Lezcano55786892008-03-04 13:47:47 -08001488 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489
Thomas Graf40e22e82006-08-22 00:00:45 -07001490 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 goto out;
1492
Tom Tucker8d717402006-07-30 20:43:36 -07001493 netevent.old = &rt->u.dst;
1494 netevent.new = &nrt->u.dst;
1495 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1496
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001498 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 return;
1500 }
1501
1502out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001503 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 return;
1505}
1506
1507/*
1508 * Handle ICMP "packet too big" messages
1509 * i.e. Path MTU discovery
1510 */
1511
1512void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1513 struct net_device *dev, u32 pmtu)
1514{
1515 struct rt6_info *rt, *nrt;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001516 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 int allfrag = 0;
1518
Daniel Lezcano55786892008-03-04 13:47:47 -08001519 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 if (rt == NULL)
1521 return;
1522
1523 if (pmtu >= dst_mtu(&rt->u.dst))
1524 goto out;
1525
1526 if (pmtu < IPV6_MIN_MTU) {
1527 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001528 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 * MTU (1280) and a fragment header should always be included
1530 * after a node receiving Too Big message reporting PMTU is
1531 * less than the IPv6 Minimum Link MTU.
1532 */
1533 pmtu = IPV6_MIN_MTU;
1534 allfrag = 1;
1535 }
1536
1537 /* New mtu received -> path was valid.
1538 They are sent only in response to data packets,
1539 so that this nexthop apparently is reachable. --ANK
1540 */
1541 dst_confirm(&rt->u.dst);
1542
1543 /* Host route. If it is static, it would be better
1544 not to override it, but add new one, so that
1545 when cache entry will expire old pmtu
1546 would return automatically.
1547 */
1548 if (rt->rt6i_flags & RTF_CACHE) {
1549 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1550 if (allfrag)
1551 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001552 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1554 goto out;
1555 }
1556
1557 /* Network route.
1558 Two cases are possible:
1559 1. It is connected route. Action: COW
1560 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1561 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001562 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001563 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001564 else
1565 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001566
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001567 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001568 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1569 if (allfrag)
1570 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1571
1572 /* According to RFC 1981, detecting PMTU increase shouldn't be
1573 * happened within 5 mins, the recommended timer is 10 mins.
1574 * Here this route expiration time is set to ip6_rt_mtu_expires
1575 * which is 10 mins. After 10 mins the decreased pmtu is expired
1576 * and detecting PMTU increase will be automatically happened.
1577 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001578 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001579 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1580
Thomas Graf40e22e82006-08-22 00:00:45 -07001581 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583out:
1584 dst_release(&rt->u.dst);
1585}
1586
1587/*
1588 * Misc support functions
1589 */
1590
1591static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1592{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001593 struct net *net = dev_net(ort->rt6i_dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001594 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595
1596 if (rt) {
1597 rt->u.dst.input = ort->u.dst.input;
1598 rt->u.dst.output = ort->u.dst.output;
1599
1600 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001601 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 rt->u.dst.dev = ort->u.dst.dev;
1603 if (rt->u.dst.dev)
1604 dev_hold(rt->u.dst.dev);
1605 rt->rt6i_idev = ort->rt6i_idev;
1606 if (rt->rt6i_idev)
1607 in6_dev_hold(rt->rt6i_idev);
1608 rt->u.dst.lastuse = jiffies;
1609 rt->rt6i_expires = 0;
1610
1611 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1612 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1613 rt->rt6i_metric = 0;
1614
1615 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1616#ifdef CONFIG_IPV6_SUBTREES
1617 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1618#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001619 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 }
1621 return rt;
1622}
1623
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001624#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001625static struct rt6_info *rt6_get_route_info(struct net *net,
1626 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001627 struct in6_addr *gwaddr, int ifindex)
1628{
1629 struct fib6_node *fn;
1630 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001631 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001632
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001633 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001634 if (table == NULL)
1635 return NULL;
1636
1637 write_lock_bh(&table->tb6_lock);
1638 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001639 if (!fn)
1640 goto out;
1641
Eric Dumazet7cc48262007-02-09 16:22:57 -08001642 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001643 if (rt->rt6i_dev->ifindex != ifindex)
1644 continue;
1645 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1646 continue;
1647 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1648 continue;
1649 dst_hold(&rt->u.dst);
1650 break;
1651 }
1652out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001653 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001654 return rt;
1655}
1656
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001657static struct rt6_info *rt6_add_route_info(struct net *net,
1658 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001659 struct in6_addr *gwaddr, int ifindex,
1660 unsigned pref)
1661{
Thomas Graf86872cb2006-08-22 00:01:08 -07001662 struct fib6_config cfg = {
1663 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001664 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001665 .fc_ifindex = ifindex,
1666 .fc_dst_len = prefixlen,
1667 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1668 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001669 .fc_nlinfo.pid = 0,
1670 .fc_nlinfo.nlh = NULL,
1671 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001672 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001673
Thomas Graf86872cb2006-08-22 00:01:08 -07001674 ipv6_addr_copy(&cfg.fc_dst, prefix);
1675 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1676
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001677 /* We should treat it as a default route if prefix length is 0. */
1678 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001679 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001680
Thomas Graf86872cb2006-08-22 00:01:08 -07001681 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001682
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001683 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001684}
1685#endif
1686
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001688{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001690 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001692 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001693 if (table == NULL)
1694 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695
Thomas Grafc71099a2006-08-04 23:20:06 -07001696 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001697 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001699 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1701 break;
1702 }
1703 if (rt)
1704 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001705 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 return rt;
1707}
1708
1709struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001710 struct net_device *dev,
1711 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712{
Thomas Graf86872cb2006-08-22 00:01:08 -07001713 struct fib6_config cfg = {
1714 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001715 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001716 .fc_ifindex = dev->ifindex,
1717 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1718 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001719 .fc_nlinfo.pid = 0,
1720 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001721 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001722 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723
Thomas Graf86872cb2006-08-22 00:01:08 -07001724 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725
Thomas Graf86872cb2006-08-22 00:01:08 -07001726 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 return rt6_get_dflt_router(gwaddr, dev);
1729}
1730
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001731void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732{
1733 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001734 struct fib6_table *table;
1735
1736 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001737 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001738 if (table == NULL)
1739 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740
1741restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001742 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001743 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1745 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001746 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001747 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 goto restart;
1749 }
1750 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001751 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752}
1753
Daniel Lezcano55786892008-03-04 13:47:47 -08001754static void rtmsg_to_fib6_config(struct net *net,
1755 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001756 struct fib6_config *cfg)
1757{
1758 memset(cfg, 0, sizeof(*cfg));
1759
1760 cfg->fc_table = RT6_TABLE_MAIN;
1761 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1762 cfg->fc_metric = rtmsg->rtmsg_metric;
1763 cfg->fc_expires = rtmsg->rtmsg_info;
1764 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1765 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1766 cfg->fc_flags = rtmsg->rtmsg_flags;
1767
Daniel Lezcano55786892008-03-04 13:47:47 -08001768 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001769
Thomas Graf86872cb2006-08-22 00:01:08 -07001770 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1771 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1772 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1773}
1774
Daniel Lezcano55786892008-03-04 13:47:47 -08001775int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776{
Thomas Graf86872cb2006-08-22 00:01:08 -07001777 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 struct in6_rtmsg rtmsg;
1779 int err;
1780
1781 switch(cmd) {
1782 case SIOCADDRT: /* Add a route */
1783 case SIOCDELRT: /* Delete a route */
1784 if (!capable(CAP_NET_ADMIN))
1785 return -EPERM;
1786 err = copy_from_user(&rtmsg, arg,
1787 sizeof(struct in6_rtmsg));
1788 if (err)
1789 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001790
Daniel Lezcano55786892008-03-04 13:47:47 -08001791 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001792
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793 rtnl_lock();
1794 switch (cmd) {
1795 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001796 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 break;
1798 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001799 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 break;
1801 default:
1802 err = -EINVAL;
1803 }
1804 rtnl_unlock();
1805
1806 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001807 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808
1809 return -EINVAL;
1810}
1811
1812/*
1813 * Drop the packet on the floor
1814 */
1815
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001816static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001818 int type;
1819 switch (ipstats_mib_noroutes) {
1820 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001821 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001822 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1823 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1824 break;
1825 }
1826 /* FALLTHROUGH */
1827 case IPSTATS_MIB_OUTNOROUTES:
1828 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1829 break;
1830 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001831 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832 kfree_skb(skb);
1833 return 0;
1834}
1835
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001836static int ip6_pkt_discard(struct sk_buff *skb)
1837{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001838 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001839}
1840
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001841static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842{
1843 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001844 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845}
1846
David S. Miller6723ab52006-10-18 21:20:57 -07001847#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1848
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001849static int ip6_pkt_prohibit(struct sk_buff *skb)
1850{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001851 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001852}
1853
1854static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1855{
1856 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001857 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001858}
1859
David S. Miller6723ab52006-10-18 21:20:57 -07001860#endif
1861
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862/*
1863 * Allocate a dst for local (unicast / anycast) address.
1864 */
1865
1866struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1867 const struct in6_addr *addr,
1868 int anycast)
1869{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001870 struct net *net = dev_net(idev->dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001871 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872
1873 if (rt == NULL)
1874 return ERR_PTR(-ENOMEM);
1875
Daniel Lezcano55786892008-03-04 13:47:47 -08001876 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 in6_dev_hold(idev);
1878
1879 rt->u.dst.flags = DST_HOST;
1880 rt->u.dst.input = ip6_input;
1881 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001882 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 rt->rt6i_idev = idev;
1884 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001885 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1887 rt->u.dst.obsolete = -1;
1888
1889 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001890 if (anycast)
1891 rt->rt6i_flags |= RTF_ANYCAST;
1892 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893 rt->rt6i_flags |= RTF_LOCAL;
1894 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1895 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001896 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 return ERR_PTR(-ENOMEM);
1898 }
1899
1900 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1901 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001902 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903
1904 atomic_set(&rt->u.dst.__refcnt, 1);
1905
1906 return rt;
1907}
1908
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001909struct arg_dev_net {
1910 struct net_device *dev;
1911 struct net *net;
1912};
1913
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914static int fib6_ifdown(struct rt6_info *rt, void *arg)
1915{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001916 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1917 struct net *net = ((struct arg_dev_net *)arg)->net;
1918
1919 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1920 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921 RT6_TRACE("deleted by ifdown %p\n", rt);
1922 return -1;
1923 }
1924 return 0;
1925}
1926
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001927void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001929 struct arg_dev_net adn = {
1930 .dev = dev,
1931 .net = net,
1932 };
1933
1934 fib6_clean_all(net, fib6_ifdown, 0, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935}
1936
1937struct rt6_mtu_change_arg
1938{
1939 struct net_device *dev;
1940 unsigned mtu;
1941};
1942
1943static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1944{
1945 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1946 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001947 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948
1949 /* In IPv6 pmtu discovery is not optional,
1950 so that RTAX_MTU lock cannot disable it.
1951 We still use this lock to block changes
1952 caused by addrconf/ndisc.
1953 */
1954
1955 idev = __in6_dev_get(arg->dev);
1956 if (idev == NULL)
1957 return 0;
1958
1959 /* For administrative MTU increase, there is no way to discover
1960 IPv6 PMTU increase, so PMTU increase should be updated here.
1961 Since RFC 1981 doesn't include administrative MTU increase
1962 update PMTU increase is a MUST. (i.e. jumbo frame)
1963 */
1964 /*
1965 If new MTU is less than route PMTU, this new MTU will be the
1966 lowest MTU in the path, update the route PMTU to reflect PMTU
1967 decreases; if new MTU is greater than route PMTU, and the
1968 old MTU is the lowest MTU in the path, update the route PMTU
1969 to reflect the increase. In this case if the other nodes' MTU
1970 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1971 PMTU discouvery.
1972 */
1973 if (rt->rt6i_dev == arg->dev &&
1974 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08001975 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001976 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001977 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08001979 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07001980 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 return 0;
1982}
1983
1984void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1985{
Thomas Grafc71099a2006-08-04 23:20:06 -07001986 struct rt6_mtu_change_arg arg = {
1987 .dev = dev,
1988 .mtu = mtu,
1989 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001991 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992}
1993
Patrick McHardyef7c79e2007-06-05 12:38:30 -07001994static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07001995 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07001996 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07001997 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07001998 [RTA_PRIORITY] = { .type = NLA_U32 },
1999 [RTA_METRICS] = { .type = NLA_NESTED },
2000};
2001
2002static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2003 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004{
Thomas Graf86872cb2006-08-22 00:01:08 -07002005 struct rtmsg *rtm;
2006 struct nlattr *tb[RTA_MAX+1];
2007 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008
Thomas Graf86872cb2006-08-22 00:01:08 -07002009 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2010 if (err < 0)
2011 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012
Thomas Graf86872cb2006-08-22 00:01:08 -07002013 err = -EINVAL;
2014 rtm = nlmsg_data(nlh);
2015 memset(cfg, 0, sizeof(*cfg));
2016
2017 cfg->fc_table = rtm->rtm_table;
2018 cfg->fc_dst_len = rtm->rtm_dst_len;
2019 cfg->fc_src_len = rtm->rtm_src_len;
2020 cfg->fc_flags = RTF_UP;
2021 cfg->fc_protocol = rtm->rtm_protocol;
2022
2023 if (rtm->rtm_type == RTN_UNREACHABLE)
2024 cfg->fc_flags |= RTF_REJECT;
2025
2026 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2027 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002028 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002029
2030 if (tb[RTA_GATEWAY]) {
2031 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2032 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002034
2035 if (tb[RTA_DST]) {
2036 int plen = (rtm->rtm_dst_len + 7) >> 3;
2037
2038 if (nla_len(tb[RTA_DST]) < plen)
2039 goto errout;
2040
2041 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002043
2044 if (tb[RTA_SRC]) {
2045 int plen = (rtm->rtm_src_len + 7) >> 3;
2046
2047 if (nla_len(tb[RTA_SRC]) < plen)
2048 goto errout;
2049
2050 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002052
2053 if (tb[RTA_OIF])
2054 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2055
2056 if (tb[RTA_PRIORITY])
2057 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2058
2059 if (tb[RTA_METRICS]) {
2060 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2061 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002063
2064 if (tb[RTA_TABLE])
2065 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2066
2067 err = 0;
2068errout:
2069 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070}
2071
Thomas Grafc127ea22007-03-22 11:58:32 -07002072static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073{
Thomas Graf86872cb2006-08-22 00:01:08 -07002074 struct fib6_config cfg;
2075 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076
Thomas Graf86872cb2006-08-22 00:01:08 -07002077 err = rtm_to_fib6_config(skb, nlh, &cfg);
2078 if (err < 0)
2079 return err;
2080
2081 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082}
2083
Thomas Grafc127ea22007-03-22 11:58:32 -07002084static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085{
Thomas Graf86872cb2006-08-22 00:01:08 -07002086 struct fib6_config cfg;
2087 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088
Thomas Graf86872cb2006-08-22 00:01:08 -07002089 err = rtm_to_fib6_config(skb, nlh, &cfg);
2090 if (err < 0)
2091 return err;
2092
2093 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094}
2095
Thomas Graf339bf982006-11-10 14:10:15 -08002096static inline size_t rt6_nlmsg_size(void)
2097{
2098 return NLMSG_ALIGN(sizeof(struct rtmsg))
2099 + nla_total_size(16) /* RTA_SRC */
2100 + nla_total_size(16) /* RTA_DST */
2101 + nla_total_size(16) /* RTA_GATEWAY */
2102 + nla_total_size(16) /* RTA_PREFSRC */
2103 + nla_total_size(4) /* RTA_TABLE */
2104 + nla_total_size(4) /* RTA_IIF */
2105 + nla_total_size(4) /* RTA_OIF */
2106 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002107 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002108 + nla_total_size(sizeof(struct rta_cacheinfo));
2109}
2110
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002112 struct in6_addr *dst, struct in6_addr *src,
2113 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002114 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115{
2116 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002117 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002118 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002119 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120
2121 if (prefix) { /* user wants prefix routes only */
2122 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2123 /* success since this is not a prefix route */
2124 return 1;
2125 }
2126 }
2127
Thomas Graf2d7202b2006-08-22 00:01:27 -07002128 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2129 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002130 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002131
2132 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 rtm->rtm_family = AF_INET6;
2134 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2135 rtm->rtm_src_len = rt->rt6i_src.plen;
2136 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002137 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002138 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002139 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002140 table = RT6_TABLE_UNSPEC;
2141 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002142 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 if (rt->rt6i_flags&RTF_REJECT)
2144 rtm->rtm_type = RTN_UNREACHABLE;
2145 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2146 rtm->rtm_type = RTN_LOCAL;
2147 else
2148 rtm->rtm_type = RTN_UNICAST;
2149 rtm->rtm_flags = 0;
2150 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2151 rtm->rtm_protocol = rt->rt6i_protocol;
2152 if (rt->rt6i_flags&RTF_DYNAMIC)
2153 rtm->rtm_protocol = RTPROT_REDIRECT;
2154 else if (rt->rt6i_flags & RTF_ADDRCONF)
2155 rtm->rtm_protocol = RTPROT_KERNEL;
2156 else if (rt->rt6i_flags&RTF_DEFAULT)
2157 rtm->rtm_protocol = RTPROT_RA;
2158
2159 if (rt->rt6i_flags&RTF_CACHE)
2160 rtm->rtm_flags |= RTM_F_CLONED;
2161
2162 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002163 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002164 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002166 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167#ifdef CONFIG_IPV6_SUBTREES
2168 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002169 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002170 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002171 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002172 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002173#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002174 if (iif) {
2175#ifdef CONFIG_IPV6_MROUTE
2176 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2177 int err = ip6mr_get_route(skb, rtm, nowait);
2178 if (err <= 0) {
2179 if (!nowait) {
2180 if (err == 0)
2181 return 0;
2182 goto nla_put_failure;
2183 } else {
2184 if (err == -EMSGSIZE)
2185 goto nla_put_failure;
2186 }
2187 }
2188 } else
2189#endif
2190 NLA_PUT_U32(skb, RTA_IIF, iif);
2191 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192 struct in6_addr saddr_buf;
YOSHIFUJI Hideaki5e5f3f02008-03-03 21:44:34 +09002193 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002194 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002195 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002197
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002199 goto nla_put_failure;
2200
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002202 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2203
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002205 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2206
2207 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002208
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002209 if (!(rt->rt6i_flags & RTF_EXPIRES))
2210 expires = 0;
2211 else if (rt->rt6i_expires - jiffies < INT_MAX)
2212 expires = rt->rt6i_expires - jiffies;
2213 else
2214 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002215
Thomas Grafe3703b32006-11-27 09:27:07 -08002216 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2217 expires, rt->u.dst.error) < 0)
2218 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219
Thomas Graf2d7202b2006-08-22 00:01:27 -07002220 return nlmsg_end(skb, nlh);
2221
2222nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002223 nlmsg_cancel(skb, nlh);
2224 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225}
2226
Patrick McHardy1b43af52006-08-10 23:11:17 -07002227int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228{
2229 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2230 int prefix;
2231
Thomas Graf2d7202b2006-08-22 00:01:27 -07002232 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2233 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2235 } else
2236 prefix = 0;
2237
2238 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2239 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002240 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002241}
2242
Thomas Grafc127ea22007-03-22 11:58:32 -07002243static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002245 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002246 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002248 struct sk_buff *skb;
2249 struct rtmsg *rtm;
2250 struct flowi fl;
2251 int err, iif = 0;
2252
2253 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2254 if (err < 0)
2255 goto errout;
2256
2257 err = -EINVAL;
2258 memset(&fl, 0, sizeof(fl));
2259
2260 if (tb[RTA_SRC]) {
2261 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2262 goto errout;
2263
2264 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2265 }
2266
2267 if (tb[RTA_DST]) {
2268 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2269 goto errout;
2270
2271 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2272 }
2273
2274 if (tb[RTA_IIF])
2275 iif = nla_get_u32(tb[RTA_IIF]);
2276
2277 if (tb[RTA_OIF])
2278 fl.oif = nla_get_u32(tb[RTA_OIF]);
2279
2280 if (iif) {
2281 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002282 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002283 if (!dev) {
2284 err = -ENODEV;
2285 goto errout;
2286 }
2287 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288
2289 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002290 if (skb == NULL) {
2291 err = -ENOBUFS;
2292 goto errout;
2293 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002294
2295 /* Reserve room for dummy headers, this skb can pass
2296 through good chunk of routing engine.
2297 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002298 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2300
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002301 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 skb->dst = &rt->u.dst;
2303
Thomas Grafab364a62006-08-22 00:01:47 -07002304 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002306 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002308 kfree_skb(skb);
2309 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310 }
2311
Daniel Lezcano55786892008-03-04 13:47:47 -08002312 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002313errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002314 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315}
2316
Thomas Graf86872cb2006-08-22 00:01:08 -07002317void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318{
2319 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002320 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002321 u32 seq;
2322 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002324 err = -ENOBUFS;
2325 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002326
Thomas Graf339bf982006-11-10 14:10:15 -08002327 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002328 if (skb == NULL)
2329 goto errout;
2330
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002331 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002332 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002333 if (err < 0) {
2334 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2335 WARN_ON(err == -EMSGSIZE);
2336 kfree_skb(skb);
2337 goto errout;
2338 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002339 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2340 info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002341errout:
2342 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002343 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344}
2345
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002346static int ip6_route_dev_notify(struct notifier_block *this,
2347 unsigned long event, void *data)
2348{
2349 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002350 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002351
2352 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2353 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2354 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2355#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2356 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2357 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2358 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2359 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2360#endif
2361 }
2362
2363 return NOTIFY_OK;
2364}
2365
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366/*
2367 * /proc
2368 */
2369
2370#ifdef CONFIG_PROC_FS
2371
2372#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2373
2374struct rt6_proc_arg
2375{
2376 char *buffer;
2377 int offset;
2378 int length;
2379 int skip;
2380 int len;
2381};
2382
2383static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2384{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002385 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002387 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2388 rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389
2390#ifdef CONFIG_IPV6_SUBTREES
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002391 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2392 rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002394 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395#endif
2396
2397 if (rt->rt6i_nexthop) {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002398 seq_printf(m, NIP6_SEQFMT,
2399 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002401 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002403 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2404 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2405 rt->u.dst.__use, rt->rt6i_flags,
2406 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 return 0;
2408}
2409
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002410static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002412 struct net *net = (struct net *)m->private;
2413 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002414 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415}
2416
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002417static int ipv6_route_open(struct inode *inode, struct file *file)
2418{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002419 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002420}
2421
2422static int ipv6_route_release(struct inode *inode, struct file *file)
2423{
2424 struct seq_file *seq = file->private_data;
2425 struct net *net = seq->private;
2426 put_net(net);
2427 return single_release(inode, file);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002428}
2429
2430static const struct file_operations ipv6_route_proc_fops = {
2431 .owner = THIS_MODULE,
2432 .open = ipv6_route_open,
2433 .read = seq_read,
2434 .llseek = seq_lseek,
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002435 .release = ipv6_route_release,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002436};
2437
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2439{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002440 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002442 net->ipv6.rt6_stats->fib_nodes,
2443 net->ipv6.rt6_stats->fib_route_nodes,
2444 net->ipv6.rt6_stats->fib_rt_alloc,
2445 net->ipv6.rt6_stats->fib_rt_entries,
2446 net->ipv6.rt6_stats->fib_rt_cache,
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002447 atomic_read(&net->ipv6.ip6_dst_ops->entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002448 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449
2450 return 0;
2451}
2452
2453static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2454{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002455 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002456}
2457
2458static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2459{
2460 struct seq_file *seq = file->private_data;
2461 struct net *net = (struct net *)seq->private;
2462 put_net(net);
2463 return single_release(inode, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464}
2465
Arjan van de Ven9a321442007-02-12 00:55:35 -08002466static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 .owner = THIS_MODULE,
2468 .open = rt6_stats_seq_open,
2469 .read = seq_read,
2470 .llseek = seq_lseek,
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002471 .release = rt6_stats_seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472};
2473#endif /* CONFIG_PROC_FS */
2474
2475#ifdef CONFIG_SYSCTL
2476
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477static
2478int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2479 void __user *buffer, size_t *lenp, loff_t *ppos)
2480{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002481 struct net *net = current->nsproxy->net_ns;
2482 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483 if (write) {
2484 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002485 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 return 0;
2487 } else
2488 return -EINVAL;
2489}
2490
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002491ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002492 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002494 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002496 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002497 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 },
2499 {
2500 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2501 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002502 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 .maxlen = sizeof(int),
2504 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002505 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 },
2507 {
2508 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2509 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002510 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 .maxlen = sizeof(int),
2512 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002513 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514 },
2515 {
2516 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2517 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002518 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 .maxlen = sizeof(int),
2520 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002521 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522 .strategy = &sysctl_jiffies,
2523 },
2524 {
2525 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2526 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002527 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528 .maxlen = sizeof(int),
2529 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002530 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 .strategy = &sysctl_jiffies,
2532 },
2533 {
2534 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2535 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002536 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537 .maxlen = sizeof(int),
2538 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002539 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 .strategy = &sysctl_jiffies,
2541 },
2542 {
2543 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2544 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002545 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546 .maxlen = sizeof(int),
2547 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002548 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 .strategy = &sysctl_jiffies,
2550 },
2551 {
2552 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2553 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002554 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 .maxlen = sizeof(int),
2556 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002557 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 .strategy = &sysctl_jiffies,
2559 },
2560 {
2561 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2562 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002563 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002564 .maxlen = sizeof(int),
2565 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002566 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567 .strategy = &sysctl_jiffies,
2568 },
2569 {
2570 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2571 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 .maxlen = sizeof(int),
2574 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002575 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 .strategy = &sysctl_ms_jiffies,
2577 },
2578 { .ctl_name = 0 }
2579};
2580
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002581struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2582{
2583 struct ctl_table *table;
2584
2585 table = kmemdup(ipv6_route_table_template,
2586 sizeof(ipv6_route_table_template),
2587 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002588
2589 if (table) {
2590 table[0].data = &net->ipv6.sysctl.flush_delay;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002591 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002592 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2593 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2594 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2595 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2596 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2597 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2598 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2599 }
2600
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002601 return table;
2602}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603#endif
2604
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002605static int ip6_route_net_init(struct net *net)
2606{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002607 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002608
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002609 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2610 sizeof(*net->ipv6.ip6_dst_ops),
2611 GFP_KERNEL);
2612 if (!net->ipv6.ip6_dst_ops)
2613 goto out;
Denis V. Lunev48115be2008-04-16 02:01:34 -07002614 net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002615
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002616 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2617 sizeof(*net->ipv6.ip6_null_entry),
2618 GFP_KERNEL);
2619 if (!net->ipv6.ip6_null_entry)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002620 goto out_ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002621 net->ipv6.ip6_null_entry->u.dst.path =
2622 (struct dst_entry *)net->ipv6.ip6_null_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002623 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002624
2625#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2626 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2627 sizeof(*net->ipv6.ip6_prohibit_entry),
2628 GFP_KERNEL);
2629 if (!net->ipv6.ip6_prohibit_entry) {
2630 kfree(net->ipv6.ip6_null_entry);
2631 goto out;
2632 }
2633 net->ipv6.ip6_prohibit_entry->u.dst.path =
2634 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002635 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002636
2637 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2638 sizeof(*net->ipv6.ip6_blk_hole_entry),
2639 GFP_KERNEL);
2640 if (!net->ipv6.ip6_blk_hole_entry) {
2641 kfree(net->ipv6.ip6_null_entry);
2642 kfree(net->ipv6.ip6_prohibit_entry);
2643 goto out;
2644 }
2645 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2646 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002647 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002648#endif
2649
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002650#ifdef CONFIG_PROC_FS
2651 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2652 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2653#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002654 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2655
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002656 ret = 0;
2657out:
2658 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002659
2660out_ip6_dst_ops:
Denis V. Lunev48115be2008-04-16 02:01:34 -07002661 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002662 kfree(net->ipv6.ip6_dst_ops);
2663 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002664}
2665
2666static void ip6_route_net_exit(struct net *net)
2667{
2668#ifdef CONFIG_PROC_FS
2669 proc_net_remove(net, "ipv6_route");
2670 proc_net_remove(net, "rt6_stats");
2671#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002672 kfree(net->ipv6.ip6_null_entry);
2673#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2674 kfree(net->ipv6.ip6_prohibit_entry);
2675 kfree(net->ipv6.ip6_blk_hole_entry);
2676#endif
Denis V. Lunev48115be2008-04-16 02:01:34 -07002677 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002678 kfree(net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002679}
2680
2681static struct pernet_operations ip6_route_net_ops = {
2682 .init = ip6_route_net_init,
2683 .exit = ip6_route_net_exit,
2684};
2685
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002686static struct notifier_block ip6_route_dev_notifier = {
2687 .notifier_call = ip6_route_dev_notify,
2688 .priority = 0,
2689};
2690
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002691int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002692{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002693 int ret;
2694
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002695 ret = -ENOMEM;
2696 ip6_dst_ops_template.kmem_cachep =
2697 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2698 SLAB_HWCACHE_ALIGN, NULL);
2699 if (!ip6_dst_ops_template.kmem_cachep)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002700 goto out;;
David S. Miller14e50e52007-05-24 18:17:54 -07002701
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002702 ret = register_pernet_subsys(&ip6_route_net_ops);
2703 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002704 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002705
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002706 /* Registering of the loopback is done before this portion of code,
2707 * the loopback reference in rt6_info will not be taken, do it
2708 * manually for init_net */
2709 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2710 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2711 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2712 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2713 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2714 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2715 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2716 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002717 ret = fib6_init();
2718 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002719 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002720
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002721 ret = xfrm6_init();
2722 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002723 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002724
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002725 ret = fib6_rules_init();
2726 if (ret)
2727 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002728
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002729 ret = -ENOBUFS;
2730 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2731 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2732 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2733 goto fib6_rules_init;
2734
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002735 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002736 if (ret)
2737 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002738
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002739out:
2740 return ret;
2741
2742fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002743 fib6_rules_cleanup();
2744xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002745 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002746out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002747 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002748out_register_subsys:
2749 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002750out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002751 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002752 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002753}
2754
2755void ip6_route_cleanup(void)
2756{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002757 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002758 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002759 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002760 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002761 unregister_pernet_subsys(&ip6_route_net_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002762 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002763}