blob: 635d97d54b0acf4833ba6daedfb9015179b9bd0d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020043#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070054#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070055#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080074#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Linus Torvalds1da177e2005-04-16 15:20:36 -070076static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080082static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080090static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080094static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 struct in6_addr *gwaddr, int ifindex);
97#endif
98
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -080099static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700110 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800112 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113};
114
David S. Miller14e50e52007-05-24 18:17:54 -0700115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116{
117}
118
119static struct dst_ops ip6_dst_blackhole_ops = {
120 .family = AF_INET6,
121 .protocol = __constant_htons(ETH_P_IPV6),
122 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800126 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700127};
128
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800129static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 .u = {
131 .dst = {
132 .__refcnt = ATOMIC_INIT(1),
133 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 .obsolete = -1,
135 .error = -ENETUNREACH,
136 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
137 .input = ip6_pkt_discard,
138 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 }
140 },
141 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
142 .rt6i_metric = ~(u32) 0,
143 .rt6i_ref = ATOMIC_INIT(1),
144};
145
Thomas Graf101367c2006-08-04 03:39:02 -0700146#ifdef CONFIG_IPV6_MULTIPLE_TABLES
147
David S. Miller6723ab52006-10-18 21:20:57 -0700148static int ip6_pkt_prohibit(struct sk_buff *skb);
149static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700150
Adrian Bunk280a34c2008-04-21 02:29:32 -0700151static struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700152 .u = {
153 .dst = {
154 .__refcnt = ATOMIC_INIT(1),
155 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700156 .obsolete = -1,
157 .error = -EACCES,
158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700159 .input = ip6_pkt_prohibit,
160 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800168static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800176 .input = dst_discard,
177 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800188static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800190 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900201 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800209 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900210 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800212 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
213 struct inet6_dev *loopback_idev =
214 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 if (loopback_idev != NULL) {
216 rt->rt6i_idev = loopback_idev;
217 in6_dev_put(idev);
218 }
219 }
220}
221
222static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223{
224 return (rt->rt6i_flags & RTF_EXPIRES &&
225 time_after(jiffies, rt->rt6i_expires));
226}
227
Thomas Grafc71099a2006-08-04 23:20:06 -0700228static inline int rt6_need_strict(struct in6_addr *daddr)
229{
230 return (ipv6_addr_type(daddr) &
YOSHIFUJI Hideaki5ce83af2008-06-25 16:58:17 +0900231 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
Thomas Grafc71099a2006-08-04 23:20:06 -0700232}
233
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700235 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 */
237
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800238static inline struct rt6_info *rt6_device_match(struct net *net,
239 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900240 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700242 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243{
244 struct rt6_info *local = NULL;
245 struct rt6_info *sprt;
246
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900247 if (!oif && ipv6_addr_any(saddr))
248 goto out;
249
250 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
251 struct net_device *dev = sprt->rt6i_dev;
252
253 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 if (dev->ifindex == oif)
255 return sprt;
256 if (dev->flags & IFF_LOOPBACK) {
257 if (sprt->rt6i_idev == NULL ||
258 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700259 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900261 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 local->rt6i_idev->dev->ifindex == oif))
263 continue;
264 }
265 local = sprt;
266 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900267 } else {
268 if (ipv6_chk_addr(net, saddr, dev,
269 flags & RT6_LOOKUP_F_IFACE))
270 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900274 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (local)
276 return local;
277
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700278 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800279 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900281out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 return rt;
283}
284
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800285#ifdef CONFIG_IPV6_ROUTER_PREF
286static void rt6_probe(struct rt6_info *rt)
287{
288 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289 /*
290 * Okay, this does not seem to be appropriate
291 * for now, however, we need to check if it
292 * is really so; aka Router Reachability Probing.
293 *
294 * Router Reachability Probe MUST be rate-limited
295 * to no more than one per minute.
296 */
297 if (!neigh || (neigh->nud_state & NUD_VALID))
298 return;
299 read_lock_bh(&neigh->lock);
300 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800301 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800302 struct in6_addr mcaddr;
303 struct in6_addr *target;
304
305 neigh->updated = jiffies;
306 read_unlock_bh(&neigh->lock);
307
308 target = (struct in6_addr *)&neigh->primary_key;
309 addrconf_addr_solict_mult(target, &mcaddr);
310 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311 } else
312 read_unlock_bh(&neigh->lock);
313}
314#else
315static inline void rt6_probe(struct rt6_info *rt)
316{
317 return;
318}
319#endif
320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800322 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700324static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800326 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700327 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800328 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700329 if ((dev->flags & IFF_LOOPBACK) &&
330 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331 return 1;
332 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333}
334
Dave Jonesb6f99a22007-03-22 12:27:49 -0700335static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800337 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800338 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700339 if (rt->rt6i_flags & RTF_NONEXTHOP ||
340 !(rt->rt6i_flags & RTF_GATEWAY))
341 m = 1;
342 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800343 read_lock_bh(&neigh->lock);
344 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700345 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800346#ifdef CONFIG_IPV6_ROUTER_PREF
347 else if (neigh->nud_state & NUD_FAILED)
348 m = 0;
349#endif
350 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800351 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800352 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800353 } else
354 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800355 return m;
356}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800358static int rt6_score_route(struct rt6_info *rt, int oif,
359 int strict)
360{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700361 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900362
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700363 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700364 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800365 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800366#ifdef CONFIG_IPV6_ROUTER_PREF
367 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
368#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700369 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800370 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800371 return -1;
372 return m;
373}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
David S. Millerf11e6652007-03-24 20:36:25 -0700375static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
376 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800377{
David S. Millerf11e6652007-03-24 20:36:25 -0700378 int m;
379
380 if (rt6_check_expired(rt))
381 goto out;
382
383 m = rt6_score_route(rt, oif, strict);
384 if (m < 0)
385 goto out;
386
387 if (m > *mpri) {
388 if (strict & RT6_LOOKUP_F_REACHABLE)
389 rt6_probe(match);
390 *mpri = m;
391 match = rt;
392 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
393 rt6_probe(rt);
394 }
395
396out:
397 return match;
398}
399
400static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
401 struct rt6_info *rr_head,
402 u32 metric, int oif, int strict)
403{
404 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800405 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
David S. Millerf11e6652007-03-24 20:36:25 -0700407 match = NULL;
408 for (rt = rr_head; rt && rt->rt6i_metric == metric;
409 rt = rt->u.dst.rt6_next)
410 match = find_match(rt, oif, strict, &mpri, match);
411 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
412 rt = rt->u.dst.rt6_next)
413 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800414
David S. Millerf11e6652007-03-24 20:36:25 -0700415 return match;
416}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800417
David S. Millerf11e6652007-03-24 20:36:25 -0700418static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
419{
420 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800421 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
David S. Millerf11e6652007-03-24 20:36:25 -0700423 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800424 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
David S. Millerf11e6652007-03-24 20:36:25 -0700426 rt0 = fn->rr_ptr;
427 if (!rt0)
428 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
David S. Millerf11e6652007-03-24 20:36:25 -0700430 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800432 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700433 (strict & RT6_LOOKUP_F_REACHABLE)) {
434 struct rt6_info *next = rt0->u.dst.rt6_next;
435
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800436 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700437 if (!next || next->rt6i_metric != rt0->rt6i_metric)
438 next = fn->leaf;
439
440 if (next != rt0)
441 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 }
443
David S. Millerf11e6652007-03-24 20:36:25 -0700444 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800445 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900447 net = dev_net(rt0->rt6i_dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800448 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449}
450
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800451#ifdef CONFIG_IPV6_ROUTE_INFO
452int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
453 struct in6_addr *gwaddr)
454{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900455 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800456 struct route_info *rinfo = (struct route_info *) opt;
457 struct in6_addr prefix_buf, *prefix;
458 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900459 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800460 struct rt6_info *rt;
461
462 if (len < sizeof(struct route_info)) {
463 return -EINVAL;
464 }
465
466 /* Sanity check for prefix_len and length */
467 if (rinfo->length > 3) {
468 return -EINVAL;
469 } else if (rinfo->prefix_len > 128) {
470 return -EINVAL;
471 } else if (rinfo->prefix_len > 64) {
472 if (rinfo->length < 2) {
473 return -EINVAL;
474 }
475 } else if (rinfo->prefix_len > 0) {
476 if (rinfo->length < 1) {
477 return -EINVAL;
478 }
479 }
480
481 pref = rinfo->route_pref;
482 if (pref == ICMPV6_ROUTER_PREF_INVALID)
483 pref = ICMPV6_ROUTER_PREF_MEDIUM;
484
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900485 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800486
487 if (rinfo->length == 3)
488 prefix = (struct in6_addr *)rinfo->prefix;
489 else {
490 /* this function is safe */
491 ipv6_addr_prefix(&prefix_buf,
492 (struct in6_addr *)rinfo->prefix,
493 rinfo->prefix_len);
494 prefix = &prefix_buf;
495 }
496
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800497 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800499
500 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700501 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800502 rt = NULL;
503 }
504
505 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800506 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800507 pref);
508 else if (rt)
509 rt->rt6i_flags = RTF_ROUTEINFO |
510 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
511
512 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900513 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800514 rt->rt6i_flags &= ~RTF_EXPIRES;
515 } else {
516 rt->rt6i_expires = jiffies + HZ * lifetime;
517 rt->rt6i_flags |= RTF_EXPIRES;
518 }
519 dst_release(&rt->u.dst);
520 }
521 return 0;
522}
523#endif
524
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800525#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700526do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800527 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700529 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700530 if (fn->fn_flags & RTN_TL_ROOT) \
531 goto out; \
532 pn = fn->parent; \
533 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800534 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700535 else \
536 fn = pn; \
537 if (fn->fn_flags & RTN_RTINFO) \
538 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700539 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700540 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700541} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700542
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800543static struct rt6_info *ip6_pol_route_lookup(struct net *net,
544 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700545 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546{
547 struct fib6_node *fn;
548 struct rt6_info *rt;
549
Thomas Grafc71099a2006-08-04 23:20:06 -0700550 read_lock_bh(&table->tb6_lock);
551 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552restart:
553 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900554 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800555 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700556out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800557 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700558 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 return rt;
560
561}
562
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900563struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
564 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700565{
566 struct flowi fl = {
567 .oif = oif,
568 .nl_u = {
569 .ip6_u = {
570 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700571 },
572 },
573 };
574 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700576
Thomas Grafadaa70b2006-10-13 15:01:03 -0700577 if (saddr) {
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 }
581
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800582 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700583 if (dst->error == 0)
584 return (struct rt6_info *) dst;
585
586 dst_release(dst);
587
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 return NULL;
589}
590
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900591EXPORT_SYMBOL(rt6_lookup);
592
Thomas Grafc71099a2006-08-04 23:20:06 -0700593/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
596 be destroyed.
597 */
598
Thomas Graf86872cb2006-08-22 00:01:08 -0700599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600{
601 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700602 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
Thomas Grafc71099a2006-08-04 23:20:06 -0700604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700606 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700607 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 return err;
610}
611
Thomas Graf40e22e82006-08-22 00:00:45 -0700612int ip6_ins_rt(struct rt6_info *rt)
613{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800614 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900615 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800616 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800617 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700618}
619
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800620static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
621 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 struct rt6_info *rt;
624
625 /*
626 * Clone the route.
627 */
628
629 rt = ip6_rt_copy(ort);
630
631 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900632 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
633 if (rt->rt6i_dst.plen != 128 &&
634 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
635 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900637 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900639 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 rt->rt6i_dst.plen = 128;
641 rt->rt6i_flags |= RTF_CACHE;
642 rt->u.dst.flags |= DST_HOST;
643
644#ifdef CONFIG_IPV6_SUBTREES
645 if (rt->rt6i_src.plen && saddr) {
646 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
647 rt->rt6i_src.plen = 128;
648 }
649#endif
650
651 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
652
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800653 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800655 return rt;
656}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800658static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659{
660 struct rt6_info *rt = ip6_rt_copy(ort);
661 if (rt) {
662 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663 rt->rt6i_dst.plen = 128;
664 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800665 rt->u.dst.flags |= DST_HOST;
666 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667 }
668 return rt;
669}
670
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800671static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
672 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673{
674 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700676 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800678 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700679 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700681 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700684 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800686restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700687 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688
689restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700690 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800691
692 BACKTRACK(net, &fl->fl6_src);
693 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800694 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800695 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800697 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700698 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800699
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800700 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800701 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800702 else {
703#if CLONE_OFFLINK_ROUTE
704 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
705#else
706 goto out2;
707#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800709
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800711 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800712
713 dst_hold(&rt->u.dst);
714 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700715 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800716 if (!err)
717 goto out2;
718 }
719
720 if (--attempts <= 0)
721 goto out2;
722
723 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700724 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800725 * released someone could insert this route. Relookup.
726 */
727 dst_release(&rt->u.dst);
728 goto relookup;
729
730out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800731 if (reachable) {
732 reachable = 0;
733 goto restart_2;
734 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800735 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700736 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737out2:
738 rt->u.dst.lastuse = jiffies;
739 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700740
741 return rt;
742}
743
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800744static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700745 struct flowi *fl, int flags)
746{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800747 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700748}
749
Thomas Grafc71099a2006-08-04 23:20:06 -0700750void ip6_route_input(struct sk_buff *skb)
751{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700752 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900753 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700754 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700755 struct flowi fl = {
756 .iif = skb->dev->ifindex,
757 .nl_u = {
758 .ip6_u = {
759 .daddr = iph->daddr,
760 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800761 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700762 },
763 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900764 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700765 .proto = iph->nexthdr,
766 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700767
768 if (rt6_need_strict(&iph->daddr))
769 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700770
Daniel Lezcano55786892008-03-04 13:47:47 -0800771 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700772}
773
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800774static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700775 struct flowi *fl, int flags)
776{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800777 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700778}
779
Daniel Lezcano4591db42008-03-05 10:48:10 -0800780struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
781 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700782{
783 int flags = 0;
784
785 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700786 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700787
Thomas Grafadaa70b2006-10-13 15:01:03 -0700788 if (!ipv6_addr_any(&fl->fl6_src))
789 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +0900790 else if (sk) {
791 unsigned int prefs = inet6_sk(sk)->srcprefs;
792 if (prefs & IPV6_PREFER_SRC_TMP)
793 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
794 if (prefs & IPV6_PREFER_SRC_PUBLIC)
795 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
796 if (prefs & IPV6_PREFER_SRC_COA)
797 flags |= RT6_LOOKUP_F_SRCPREF_COA;
798 }
Thomas Grafadaa70b2006-10-13 15:01:03 -0700799
Daniel Lezcano4591db42008-03-05 10:48:10 -0800800 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801}
802
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900803EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
David S. Miller14e50e52007-05-24 18:17:54 -0700805int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
806{
807 struct rt6_info *ort = (struct rt6_info *) *dstp;
808 struct rt6_info *rt = (struct rt6_info *)
809 dst_alloc(&ip6_dst_blackhole_ops);
810 struct dst_entry *new = NULL;
811
812 if (rt) {
813 new = &rt->u.dst;
814
815 atomic_set(&new->__refcnt, 1);
816 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800817 new->input = dst_discard;
818 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700819
820 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
821 new->dev = ort->u.dst.dev;
822 if (new->dev)
823 dev_hold(new->dev);
824 rt->rt6i_idev = ort->rt6i_idev;
825 if (rt->rt6i_idev)
826 in6_dev_hold(rt->rt6i_idev);
827 rt->rt6i_expires = 0;
828
829 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
830 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
831 rt->rt6i_metric = 0;
832
833 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
834#ifdef CONFIG_IPV6_SUBTREES
835 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
836#endif
837
838 dst_free(new);
839 }
840
841 dst_release(*dstp);
842 *dstp = new;
843 return (new ? 0 : -ENOMEM);
844}
845EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
846
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847/*
848 * Destination cache support functions
849 */
850
851static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
852{
853 struct rt6_info *rt;
854
855 rt = (struct rt6_info *) dst;
856
857 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
858 return dst;
859
860 return NULL;
861}
862
863static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
864{
865 struct rt6_info *rt = (struct rt6_info *) dst;
866
867 if (rt) {
868 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700869 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 else
871 dst_release(dst);
872 }
873 return NULL;
874}
875
876static void ip6_link_failure(struct sk_buff *skb)
877{
878 struct rt6_info *rt;
879
880 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
881
882 rt = (struct rt6_info *) skb->dst;
883 if (rt) {
884 if (rt->rt6i_flags&RTF_CACHE) {
885 dst_set_expires(&rt->u.dst, 0);
886 rt->rt6i_flags |= RTF_EXPIRES;
887 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
888 rt->rt6i_node->fn_sernum = -1;
889 }
890}
891
892static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
893{
894 struct rt6_info *rt6 = (struct rt6_info*)dst;
895
896 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
897 rt6->rt6i_flags |= RTF_MODIFIED;
898 if (mtu < IPV6_MIN_MTU) {
899 mtu = IPV6_MIN_MTU;
900 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
901 }
902 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700903 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 }
905}
906
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907static int ipv6_get_mtu(struct net_device *dev);
908
Daniel Lezcano55786892008-03-04 13:47:47 -0800909static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910{
911 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
912
Daniel Lezcano55786892008-03-04 13:47:47 -0800913 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
914 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915
916 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900917 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
918 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
919 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 * rely only on pmtu discovery"
921 */
922 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
923 mtu = IPV6_MAXPLEN;
924 return mtu;
925}
926
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800927static struct dst_entry *icmp6_dst_gc_list;
928static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700929
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800930struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900932 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933{
934 struct rt6_info *rt;
935 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900936 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938 if (unlikely(idev == NULL))
939 return NULL;
940
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800941 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 if (unlikely(rt == NULL)) {
943 in6_dev_put(idev);
944 goto out;
945 }
946
947 dev_hold(dev);
948 if (neigh)
949 neigh_hold(neigh);
950 else
951 neigh = ndisc_get_neigh(dev, addr);
952
953 rt->rt6i_dev = dev;
954 rt->rt6i_idev = idev;
955 rt->rt6i_nexthop = neigh;
956 atomic_set(&rt->u.dst.__refcnt, 1);
957 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
958 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800959 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800960 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961
962#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900963 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
964 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 : 0;
966 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
967 rt->rt6i_dst.plen = 128;
968#endif
969
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800970 spin_lock_bh(&icmp6_dst_lock);
971 rt->u.dst.next = icmp6_dst_gc_list;
972 icmp6_dst_gc_list = &rt->u.dst;
973 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974
Daniel Lezcano55786892008-03-04 13:47:47 -0800975 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976
977out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900978 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979}
980
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -0700981int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982{
983 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -0700984 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985
986 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700987
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800988 spin_lock_bh(&icmp6_dst_lock);
989 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700990
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 while ((dst = *pprev) != NULL) {
992 if (!atomic_read(&dst->__refcnt)) {
993 *pprev = dst->next;
994 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 } else {
996 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -0700997 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 }
999 }
1000
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001001 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001002
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001003 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004}
1005
David S. Miller1e493d12008-09-10 17:27:15 -07001006static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1007 void *arg)
1008{
1009 struct dst_entry *dst, **pprev;
1010
1011 spin_lock_bh(&icmp6_dst_lock);
1012 pprev = &icmp6_dst_gc_list;
1013 while ((dst = *pprev) != NULL) {
1014 struct rt6_info *rt = (struct rt6_info *) dst;
1015 if (func(rt, arg)) {
1016 *pprev = dst->next;
1017 dst_free(dst);
1018 } else {
1019 pprev = &dst->next;
1020 }
1021 }
1022 spin_unlock_bh(&icmp6_dst_lock);
1023}
1024
Daniel Lezcano569d3642008-01-18 03:56:57 -08001025static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 unsigned long now = jiffies;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001028 struct net *net = ops->dst_net;
1029 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1030 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1031 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1032 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1033 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034
Daniel Lezcano7019b782008-03-04 13:50:14 -08001035 if (time_after(rt_last_gc + rt_min_interval, now) &&
1036 atomic_read(&ops->entries) <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 goto out;
1038
Benjamin Thery6891a342008-03-04 13:49:47 -08001039 net->ipv6.ip6_rt_gc_expire++;
1040 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1041 net->ipv6.ip6_rt_last_gc = now;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001042 if (atomic_read(&ops->entries) < ops->gc_thresh)
1043 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001045 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1046 return (atomic_read(&ops->entries) > rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047}
1048
1049/* Clean host part of a prefix. Not necessary in radix tree,
1050 but results in cleaner routing tables.
1051
1052 Remove it only when all the things will work!
1053 */
1054
1055static int ipv6_get_mtu(struct net_device *dev)
1056{
1057 int mtu = IPV6_MIN_MTU;
1058 struct inet6_dev *idev;
1059
1060 idev = in6_dev_get(dev);
1061 if (idev) {
1062 mtu = idev->cnf.mtu6;
1063 in6_dev_put(idev);
1064 }
1065 return mtu;
1066}
1067
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001068int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001070 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1071 if (hoplimit < 0) {
1072 struct net_device *dev = dst->dev;
1073 struct inet6_dev *idev = in6_dev_get(dev);
1074 if (idev) {
1075 hoplimit = idev->cnf.hop_limit;
1076 in6_dev_put(idev);
1077 } else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001078 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 }
1080 return hoplimit;
1081}
1082
1083/*
1084 *
1085 */
1086
Thomas Graf86872cb2006-08-22 00:01:08 -07001087int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088{
1089 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001090 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 struct rt6_info *rt = NULL;
1092 struct net_device *dev = NULL;
1093 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001094 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 int addr_type;
1096
Thomas Graf86872cb2006-08-22 00:01:08 -07001097 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 return -EINVAL;
1099#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001100 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 return -EINVAL;
1102#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001103 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001105 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 if (!dev)
1107 goto out;
1108 idev = in6_dev_get(dev);
1109 if (!idev)
1110 goto out;
1111 }
1112
Thomas Graf86872cb2006-08-22 00:01:08 -07001113 if (cfg->fc_metric == 0)
1114 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115
Daniel Lezcano55786892008-03-04 13:47:47 -08001116 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001117 if (table == NULL) {
1118 err = -ENOBUFS;
1119 goto out;
1120 }
1121
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001122 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123
1124 if (rt == NULL) {
1125 err = -ENOMEM;
1126 goto out;
1127 }
1128
1129 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001130 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1131 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1132 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133
Thomas Graf86872cb2006-08-22 00:01:08 -07001134 if (cfg->fc_protocol == RTPROT_UNSPEC)
1135 cfg->fc_protocol = RTPROT_BOOT;
1136 rt->rt6i_protocol = cfg->fc_protocol;
1137
1138 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139
1140 if (addr_type & IPV6_ADDR_MULTICAST)
1141 rt->u.dst.input = ip6_mc_input;
1142 else
1143 rt->u.dst.input = ip6_forward;
1144
1145 rt->u.dst.output = ip6_output;
1146
Thomas Graf86872cb2006-08-22 00:01:08 -07001147 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1148 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 if (rt->rt6i_dst.plen == 128)
1150 rt->u.dst.flags = DST_HOST;
1151
1152#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001153 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1154 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155#endif
1156
Thomas Graf86872cb2006-08-22 00:01:08 -07001157 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158
1159 /* We cannot add true routes via loopback here,
1160 they would result in kernel looping; promote them to reject routes
1161 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001162 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1164 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001165 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 if (dev) {
1167 dev_put(dev);
1168 in6_dev_put(idev);
1169 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001170 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 dev_hold(dev);
1172 idev = in6_dev_get(dev);
1173 if (!idev) {
1174 err = -ENODEV;
1175 goto out;
1176 }
1177 }
1178 rt->u.dst.output = ip6_pkt_discard_out;
1179 rt->u.dst.input = ip6_pkt_discard;
1180 rt->u.dst.error = -ENETUNREACH;
1181 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1182 goto install_route;
1183 }
1184
Thomas Graf86872cb2006-08-22 00:01:08 -07001185 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 struct in6_addr *gw_addr;
1187 int gwa_type;
1188
Thomas Graf86872cb2006-08-22 00:01:08 -07001189 gw_addr = &cfg->fc_gateway;
1190 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 gwa_type = ipv6_addr_type(gw_addr);
1192
1193 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1194 struct rt6_info *grt;
1195
1196 /* IPv6 strictly inhibits using not link-local
1197 addresses as nexthop address.
1198 Otherwise, router will not able to send redirects.
1199 It is very good, but in some (rare!) circumstances
1200 (SIT, PtP, NBMA NOARP links) it is handy to allow
1201 some exceptions. --ANK
1202 */
1203 err = -EINVAL;
1204 if (!(gwa_type&IPV6_ADDR_UNICAST))
1205 goto out;
1206
Daniel Lezcano55786892008-03-04 13:47:47 -08001207 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208
1209 err = -EHOSTUNREACH;
1210 if (grt == NULL)
1211 goto out;
1212 if (dev) {
1213 if (dev != grt->rt6i_dev) {
1214 dst_release(&grt->u.dst);
1215 goto out;
1216 }
1217 } else {
1218 dev = grt->rt6i_dev;
1219 idev = grt->rt6i_idev;
1220 dev_hold(dev);
1221 in6_dev_hold(grt->rt6i_idev);
1222 }
1223 if (!(grt->rt6i_flags&RTF_GATEWAY))
1224 err = 0;
1225 dst_release(&grt->u.dst);
1226
1227 if (err)
1228 goto out;
1229 }
1230 err = -EINVAL;
1231 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1232 goto out;
1233 }
1234
1235 err = -ENODEV;
1236 if (dev == NULL)
1237 goto out;
1238
Thomas Graf86872cb2006-08-22 00:01:08 -07001239 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1241 if (IS_ERR(rt->rt6i_nexthop)) {
1242 err = PTR_ERR(rt->rt6i_nexthop);
1243 rt->rt6i_nexthop = NULL;
1244 goto out;
1245 }
1246 }
1247
Thomas Graf86872cb2006-08-22 00:01:08 -07001248 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
1250install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001251 if (cfg->fc_mx) {
1252 struct nlattr *nla;
1253 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
Thomas Graf86872cb2006-08-22 00:01:08 -07001255 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001256 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001257
1258 if (type) {
1259 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 err = -EINVAL;
1261 goto out;
1262 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001263
1264 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 }
1267 }
1268
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001269 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
Rami Rosen1ca615f2008-08-06 02:34:21 -07001271 if (!dst_mtu(&rt->u.dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001273 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
Daniel Lezcano55786892008-03-04 13:47:47 -08001274 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 rt->u.dst.dev = dev;
1276 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001277 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001278
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001279 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001280
Thomas Graf86872cb2006-08-22 00:01:08 -07001281 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
1283out:
1284 if (dev)
1285 dev_put(dev);
1286 if (idev)
1287 in6_dev_put(idev);
1288 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001289 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 return err;
1291}
1292
Thomas Graf86872cb2006-08-22 00:01:08 -07001293static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294{
1295 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001296 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001297 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001299 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001300 return -ENOENT;
1301
Thomas Grafc71099a2006-08-04 23:20:06 -07001302 table = rt->rt6i_table;
1303 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304
Thomas Graf86872cb2006-08-22 00:01:08 -07001305 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 dst_release(&rt->u.dst);
1307
Thomas Grafc71099a2006-08-04 23:20:06 -07001308 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309
1310 return err;
1311}
1312
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001313int ip6_del_rt(struct rt6_info *rt)
1314{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001315 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001316 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001317 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001318 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001319}
1320
Thomas Graf86872cb2006-08-22 00:01:08 -07001321static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322{
Thomas Grafc71099a2006-08-04 23:20:06 -07001323 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 struct fib6_node *fn;
1325 struct rt6_info *rt;
1326 int err = -ESRCH;
1327
Daniel Lezcano55786892008-03-04 13:47:47 -08001328 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001329 if (table == NULL)
1330 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
Thomas Grafc71099a2006-08-04 23:20:06 -07001332 read_lock_bh(&table->tb6_lock);
1333
1334 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001335 &cfg->fc_dst, cfg->fc_dst_len,
1336 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001337
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001339 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001340 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001342 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001344 if (cfg->fc_flags & RTF_GATEWAY &&
1345 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001347 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 continue;
1349 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001350 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
Thomas Graf86872cb2006-08-22 00:01:08 -07001352 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 }
1354 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001355 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356
1357 return err;
1358}
1359
1360/*
1361 * Handle redirects
1362 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001363struct ip6rd_flowi {
1364 struct flowi fl;
1365 struct in6_addr gateway;
1366};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001368static struct rt6_info *__ip6_route_redirect(struct net *net,
1369 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001370 struct flowi *fl,
1371 int flags)
1372{
1373 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1374 struct rt6_info *rt;
1375 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001376
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001378 * Get the "current" route for this destination and
1379 * check if the redirect has come from approriate router.
1380 *
1381 * RFC 2461 specifies that redirects should only be
1382 * accepted if they come from the nexthop to the target.
1383 * Due to the way the routes are chosen, this notion
1384 * is a bit fuzzy and one might need to check all possible
1385 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
Thomas Grafc71099a2006-08-04 23:20:06 -07001388 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001389 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001390restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001391 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001392 /*
1393 * Current route is on-link; redirect is always invalid.
1394 *
1395 * Seems, previous statement is not true. It could
1396 * be node, which looks for us as on-link (f.e. proxy ndisc)
1397 * But then router serving it might decide, that we should
1398 * know truth 8)8) --ANK (980726).
1399 */
1400 if (rt6_check_expired(rt))
1401 continue;
1402 if (!(rt->rt6i_flags & RTF_GATEWAY))
1403 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001404 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001405 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001406 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001407 continue;
1408 break;
1409 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001410
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001411 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001412 rt = net->ipv6.ip6_null_entry;
1413 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001414out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001415 dst_hold(&rt->u.dst);
1416
1417 read_unlock_bh(&table->tb6_lock);
1418
1419 return rt;
1420};
1421
1422static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1423 struct in6_addr *src,
1424 struct in6_addr *gateway,
1425 struct net_device *dev)
1426{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001427 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001428 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001429 struct ip6rd_flowi rdfl = {
1430 .fl = {
1431 .oif = dev->ifindex,
1432 .nl_u = {
1433 .ip6_u = {
1434 .daddr = *dest,
1435 .saddr = *src,
1436 },
1437 },
1438 },
1439 .gateway = *gateway,
1440 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001441
1442 if (rt6_need_strict(dest))
1443 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001444
Daniel Lezcano55786892008-03-04 13:47:47 -08001445 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001446 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001447}
1448
1449void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1450 struct in6_addr *saddr,
1451 struct neighbour *neigh, u8 *lladdr, int on_link)
1452{
1453 struct rt6_info *rt, *nrt = NULL;
1454 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001455 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001456
1457 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1458
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001459 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 if (net_ratelimit())
1461 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1462 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001463 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464 }
1465
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466 /*
1467 * We have finally decided to accept it.
1468 */
1469
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001470 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1472 NEIGH_UPDATE_F_OVERRIDE|
1473 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1474 NEIGH_UPDATE_F_ISROUTER))
1475 );
1476
1477 /*
1478 * Redirect received -> path was valid.
1479 * Look, redirects are sent only in response to data packets,
1480 * so that this nexthop apparently is reachable. --ANK
1481 */
1482 dst_confirm(&rt->u.dst);
1483
1484 /* Duplicate redirect: silently ignore. */
1485 if (neigh == rt->u.dst.neighbour)
1486 goto out;
1487
1488 nrt = ip6_rt_copy(rt);
1489 if (nrt == NULL)
1490 goto out;
1491
1492 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1493 if (on_link)
1494 nrt->rt6i_flags &= ~RTF_GATEWAY;
1495
1496 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1497 nrt->rt6i_dst.plen = 128;
1498 nrt->u.dst.flags |= DST_HOST;
1499
1500 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1501 nrt->rt6i_nexthop = neigh_clone(neigh);
1502 /* Reset pmtu, it may be better */
1503 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001504 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
Daniel Lezcano55786892008-03-04 13:47:47 -08001505 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506
Thomas Graf40e22e82006-08-22 00:00:45 -07001507 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 goto out;
1509
Tom Tucker8d717402006-07-30 20:43:36 -07001510 netevent.old = &rt->u.dst;
1511 netevent.new = &nrt->u.dst;
1512 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1513
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001515 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 return;
1517 }
1518
1519out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001520 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 return;
1522}
1523
1524/*
1525 * Handle ICMP "packet too big" messages
1526 * i.e. Path MTU discovery
1527 */
1528
1529void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1530 struct net_device *dev, u32 pmtu)
1531{
1532 struct rt6_info *rt, *nrt;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001533 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 int allfrag = 0;
1535
Daniel Lezcano55786892008-03-04 13:47:47 -08001536 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 if (rt == NULL)
1538 return;
1539
1540 if (pmtu >= dst_mtu(&rt->u.dst))
1541 goto out;
1542
1543 if (pmtu < IPV6_MIN_MTU) {
1544 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001545 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 * MTU (1280) and a fragment header should always be included
1547 * after a node receiving Too Big message reporting PMTU is
1548 * less than the IPv6 Minimum Link MTU.
1549 */
1550 pmtu = IPV6_MIN_MTU;
1551 allfrag = 1;
1552 }
1553
1554 /* New mtu received -> path was valid.
1555 They are sent only in response to data packets,
1556 so that this nexthop apparently is reachable. --ANK
1557 */
1558 dst_confirm(&rt->u.dst);
1559
1560 /* Host route. If it is static, it would be better
1561 not to override it, but add new one, so that
1562 when cache entry will expire old pmtu
1563 would return automatically.
1564 */
1565 if (rt->rt6i_flags & RTF_CACHE) {
1566 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1567 if (allfrag)
1568 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001569 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1571 goto out;
1572 }
1573
1574 /* Network route.
1575 Two cases are possible:
1576 1. It is connected route. Action: COW
1577 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1578 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001579 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001580 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001581 else
1582 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001583
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001584 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001585 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1586 if (allfrag)
1587 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1588
1589 /* According to RFC 1981, detecting PMTU increase shouldn't be
1590 * happened within 5 mins, the recommended timer is 10 mins.
1591 * Here this route expiration time is set to ip6_rt_mtu_expires
1592 * which is 10 mins. After 10 mins the decreased pmtu is expired
1593 * and detecting PMTU increase will be automatically happened.
1594 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001595 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001596 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1597
Thomas Graf40e22e82006-08-22 00:00:45 -07001598 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600out:
1601 dst_release(&rt->u.dst);
1602}
1603
1604/*
1605 * Misc support functions
1606 */
1607
1608static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1609{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001610 struct net *net = dev_net(ort->rt6i_dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001611 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
1613 if (rt) {
1614 rt->u.dst.input = ort->u.dst.input;
1615 rt->u.dst.output = ort->u.dst.output;
1616
1617 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001618 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 rt->u.dst.dev = ort->u.dst.dev;
1620 if (rt->u.dst.dev)
1621 dev_hold(rt->u.dst.dev);
1622 rt->rt6i_idev = ort->rt6i_idev;
1623 if (rt->rt6i_idev)
1624 in6_dev_hold(rt->rt6i_idev);
1625 rt->u.dst.lastuse = jiffies;
1626 rt->rt6i_expires = 0;
1627
1628 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1629 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1630 rt->rt6i_metric = 0;
1631
1632 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1633#ifdef CONFIG_IPV6_SUBTREES
1634 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1635#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001636 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 }
1638 return rt;
1639}
1640
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001641#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001642static struct rt6_info *rt6_get_route_info(struct net *net,
1643 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001644 struct in6_addr *gwaddr, int ifindex)
1645{
1646 struct fib6_node *fn;
1647 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001648 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001649
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001650 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001651 if (table == NULL)
1652 return NULL;
1653
1654 write_lock_bh(&table->tb6_lock);
1655 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001656 if (!fn)
1657 goto out;
1658
Eric Dumazet7cc48262007-02-09 16:22:57 -08001659 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001660 if (rt->rt6i_dev->ifindex != ifindex)
1661 continue;
1662 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1663 continue;
1664 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1665 continue;
1666 dst_hold(&rt->u.dst);
1667 break;
1668 }
1669out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001670 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001671 return rt;
1672}
1673
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001674static struct rt6_info *rt6_add_route_info(struct net *net,
1675 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001676 struct in6_addr *gwaddr, int ifindex,
1677 unsigned pref)
1678{
Thomas Graf86872cb2006-08-22 00:01:08 -07001679 struct fib6_config cfg = {
1680 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001681 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001682 .fc_ifindex = ifindex,
1683 .fc_dst_len = prefixlen,
1684 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1685 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001686 .fc_nlinfo.pid = 0,
1687 .fc_nlinfo.nlh = NULL,
1688 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001689 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001690
Thomas Graf86872cb2006-08-22 00:01:08 -07001691 ipv6_addr_copy(&cfg.fc_dst, prefix);
1692 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1693
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001694 /* We should treat it as a default route if prefix length is 0. */
1695 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001696 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001697
Thomas Graf86872cb2006-08-22 00:01:08 -07001698 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001699
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001700 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001701}
1702#endif
1703
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001705{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001707 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001709 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001710 if (table == NULL)
1711 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712
Thomas Grafc71099a2006-08-04 23:20:06 -07001713 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001714 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001716 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1718 break;
1719 }
1720 if (rt)
1721 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001722 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 return rt;
1724}
1725
1726struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001727 struct net_device *dev,
1728 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729{
Thomas Graf86872cb2006-08-22 00:01:08 -07001730 struct fib6_config cfg = {
1731 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001732 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001733 .fc_ifindex = dev->ifindex,
1734 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1735 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001736 .fc_nlinfo.pid = 0,
1737 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001738 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001739 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740
Thomas Graf86872cb2006-08-22 00:01:08 -07001741 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742
Thomas Graf86872cb2006-08-22 00:01:08 -07001743 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 return rt6_get_dflt_router(gwaddr, dev);
1746}
1747
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001748void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749{
1750 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001751 struct fib6_table *table;
1752
1753 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001754 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001755 if (table == NULL)
1756 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757
1758restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001759 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001760 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1762 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001763 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001764 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 goto restart;
1766 }
1767 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001768 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769}
1770
Daniel Lezcano55786892008-03-04 13:47:47 -08001771static void rtmsg_to_fib6_config(struct net *net,
1772 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001773 struct fib6_config *cfg)
1774{
1775 memset(cfg, 0, sizeof(*cfg));
1776
1777 cfg->fc_table = RT6_TABLE_MAIN;
1778 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1779 cfg->fc_metric = rtmsg->rtmsg_metric;
1780 cfg->fc_expires = rtmsg->rtmsg_info;
1781 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1782 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1783 cfg->fc_flags = rtmsg->rtmsg_flags;
1784
Daniel Lezcano55786892008-03-04 13:47:47 -08001785 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001786
Thomas Graf86872cb2006-08-22 00:01:08 -07001787 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1788 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1789 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1790}
1791
Daniel Lezcano55786892008-03-04 13:47:47 -08001792int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793{
Thomas Graf86872cb2006-08-22 00:01:08 -07001794 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 struct in6_rtmsg rtmsg;
1796 int err;
1797
1798 switch(cmd) {
1799 case SIOCADDRT: /* Add a route */
1800 case SIOCDELRT: /* Delete a route */
1801 if (!capable(CAP_NET_ADMIN))
1802 return -EPERM;
1803 err = copy_from_user(&rtmsg, arg,
1804 sizeof(struct in6_rtmsg));
1805 if (err)
1806 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001807
Daniel Lezcano55786892008-03-04 13:47:47 -08001808 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001809
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 rtnl_lock();
1811 switch (cmd) {
1812 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001813 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 break;
1815 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001816 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 break;
1818 default:
1819 err = -EINVAL;
1820 }
1821 rtnl_unlock();
1822
1823 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001824 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825
1826 return -EINVAL;
1827}
1828
1829/*
1830 * Drop the packet on the floor
1831 */
1832
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001833static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001835 int type;
1836 switch (ipstats_mib_noroutes) {
1837 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001838 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001839 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1840 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1841 break;
1842 }
1843 /* FALLTHROUGH */
1844 case IPSTATS_MIB_OUTNOROUTES:
1845 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1846 break;
1847 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001848 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 kfree_skb(skb);
1850 return 0;
1851}
1852
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001853static int ip6_pkt_discard(struct sk_buff *skb)
1854{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001855 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001856}
1857
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001858static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859{
1860 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001861 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862}
1863
David S. Miller6723ab52006-10-18 21:20:57 -07001864#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1865
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001866static int ip6_pkt_prohibit(struct sk_buff *skb)
1867{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001868 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001869}
1870
1871static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1872{
1873 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001874 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001875}
1876
David S. Miller6723ab52006-10-18 21:20:57 -07001877#endif
1878
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879/*
1880 * Allocate a dst for local (unicast / anycast) address.
1881 */
1882
1883struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1884 const struct in6_addr *addr,
1885 int anycast)
1886{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001887 struct net *net = dev_net(idev->dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001888 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889
1890 if (rt == NULL)
1891 return ERR_PTR(-ENOMEM);
1892
Daniel Lezcano55786892008-03-04 13:47:47 -08001893 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 in6_dev_hold(idev);
1895
1896 rt->u.dst.flags = DST_HOST;
1897 rt->u.dst.input = ip6_input;
1898 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001899 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 rt->rt6i_idev = idev;
1901 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001902 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1904 rt->u.dst.obsolete = -1;
1905
1906 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001907 if (anycast)
1908 rt->rt6i_flags |= RTF_ANYCAST;
1909 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 rt->rt6i_flags |= RTF_LOCAL;
1911 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1912 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001913 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 return ERR_PTR(-ENOMEM);
1915 }
1916
1917 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1918 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001919 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920
1921 atomic_set(&rt->u.dst.__refcnt, 1);
1922
1923 return rt;
1924}
1925
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001926struct arg_dev_net {
1927 struct net_device *dev;
1928 struct net *net;
1929};
1930
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931static int fib6_ifdown(struct rt6_info *rt, void *arg)
1932{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001933 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1934 struct net *net = ((struct arg_dev_net *)arg)->net;
1935
1936 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1937 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 RT6_TRACE("deleted by ifdown %p\n", rt);
1939 return -1;
1940 }
1941 return 0;
1942}
1943
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001944void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001946 struct arg_dev_net adn = {
1947 .dev = dev,
1948 .net = net,
1949 };
1950
1951 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07001952 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953}
1954
1955struct rt6_mtu_change_arg
1956{
1957 struct net_device *dev;
1958 unsigned mtu;
1959};
1960
1961static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1962{
1963 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1964 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001965 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966
1967 /* In IPv6 pmtu discovery is not optional,
1968 so that RTAX_MTU lock cannot disable it.
1969 We still use this lock to block changes
1970 caused by addrconf/ndisc.
1971 */
1972
1973 idev = __in6_dev_get(arg->dev);
1974 if (idev == NULL)
1975 return 0;
1976
1977 /* For administrative MTU increase, there is no way to discover
1978 IPv6 PMTU increase, so PMTU increase should be updated here.
1979 Since RFC 1981 doesn't include administrative MTU increase
1980 update PMTU increase is a MUST. (i.e. jumbo frame)
1981 */
1982 /*
1983 If new MTU is less than route PMTU, this new MTU will be the
1984 lowest MTU in the path, update the route PMTU to reflect PMTU
1985 decreases; if new MTU is greater than route PMTU, and the
1986 old MTU is the lowest MTU in the path, update the route PMTU
1987 to reflect the increase. In this case if the other nodes' MTU
1988 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1989 PMTU discouvery.
1990 */
1991 if (rt->rt6i_dev == arg->dev &&
1992 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08001993 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001994 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001995 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08001997 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07001998 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 return 0;
2000}
2001
2002void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2003{
Thomas Grafc71099a2006-08-04 23:20:06 -07002004 struct rt6_mtu_change_arg arg = {
2005 .dev = dev,
2006 .mtu = mtu,
2007 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002009 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010}
2011
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002012static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002013 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002014 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002015 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002016 [RTA_PRIORITY] = { .type = NLA_U32 },
2017 [RTA_METRICS] = { .type = NLA_NESTED },
2018};
2019
2020static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2021 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022{
Thomas Graf86872cb2006-08-22 00:01:08 -07002023 struct rtmsg *rtm;
2024 struct nlattr *tb[RTA_MAX+1];
2025 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002026
Thomas Graf86872cb2006-08-22 00:01:08 -07002027 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2028 if (err < 0)
2029 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030
Thomas Graf86872cb2006-08-22 00:01:08 -07002031 err = -EINVAL;
2032 rtm = nlmsg_data(nlh);
2033 memset(cfg, 0, sizeof(*cfg));
2034
2035 cfg->fc_table = rtm->rtm_table;
2036 cfg->fc_dst_len = rtm->rtm_dst_len;
2037 cfg->fc_src_len = rtm->rtm_src_len;
2038 cfg->fc_flags = RTF_UP;
2039 cfg->fc_protocol = rtm->rtm_protocol;
2040
2041 if (rtm->rtm_type == RTN_UNREACHABLE)
2042 cfg->fc_flags |= RTF_REJECT;
2043
2044 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2045 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002046 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002047
2048 if (tb[RTA_GATEWAY]) {
2049 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2050 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002052
2053 if (tb[RTA_DST]) {
2054 int plen = (rtm->rtm_dst_len + 7) >> 3;
2055
2056 if (nla_len(tb[RTA_DST]) < plen)
2057 goto errout;
2058
2059 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002061
2062 if (tb[RTA_SRC]) {
2063 int plen = (rtm->rtm_src_len + 7) >> 3;
2064
2065 if (nla_len(tb[RTA_SRC]) < plen)
2066 goto errout;
2067
2068 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002070
2071 if (tb[RTA_OIF])
2072 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2073
2074 if (tb[RTA_PRIORITY])
2075 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2076
2077 if (tb[RTA_METRICS]) {
2078 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2079 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002081
2082 if (tb[RTA_TABLE])
2083 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2084
2085 err = 0;
2086errout:
2087 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088}
2089
Thomas Grafc127ea22007-03-22 11:58:32 -07002090static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091{
Thomas Graf86872cb2006-08-22 00:01:08 -07002092 struct fib6_config cfg;
2093 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094
Thomas Graf86872cb2006-08-22 00:01:08 -07002095 err = rtm_to_fib6_config(skb, nlh, &cfg);
2096 if (err < 0)
2097 return err;
2098
2099 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100}
2101
Thomas Grafc127ea22007-03-22 11:58:32 -07002102static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103{
Thomas Graf86872cb2006-08-22 00:01:08 -07002104 struct fib6_config cfg;
2105 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106
Thomas Graf86872cb2006-08-22 00:01:08 -07002107 err = rtm_to_fib6_config(skb, nlh, &cfg);
2108 if (err < 0)
2109 return err;
2110
2111 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002112}
2113
Thomas Graf339bf982006-11-10 14:10:15 -08002114static inline size_t rt6_nlmsg_size(void)
2115{
2116 return NLMSG_ALIGN(sizeof(struct rtmsg))
2117 + nla_total_size(16) /* RTA_SRC */
2118 + nla_total_size(16) /* RTA_DST */
2119 + nla_total_size(16) /* RTA_GATEWAY */
2120 + nla_total_size(16) /* RTA_PREFSRC */
2121 + nla_total_size(4) /* RTA_TABLE */
2122 + nla_total_size(4) /* RTA_IIF */
2123 + nla_total_size(4) /* RTA_OIF */
2124 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002125 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002126 + nla_total_size(sizeof(struct rta_cacheinfo));
2127}
2128
Brian Haley191cd582008-08-14 15:33:21 -07002129static int rt6_fill_node(struct net *net,
2130 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002131 struct in6_addr *dst, struct in6_addr *src,
2132 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002133 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134{
2135 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002136 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002137 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002138 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139
2140 if (prefix) { /* user wants prefix routes only */
2141 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2142 /* success since this is not a prefix route */
2143 return 1;
2144 }
2145 }
2146
Thomas Graf2d7202b2006-08-22 00:01:27 -07002147 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2148 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002149 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002150
2151 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152 rtm->rtm_family = AF_INET6;
2153 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2154 rtm->rtm_src_len = rt->rt6i_src.plen;
2155 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002156 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002157 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002158 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002159 table = RT6_TABLE_UNSPEC;
2160 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002161 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162 if (rt->rt6i_flags&RTF_REJECT)
2163 rtm->rtm_type = RTN_UNREACHABLE;
2164 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2165 rtm->rtm_type = RTN_LOCAL;
2166 else
2167 rtm->rtm_type = RTN_UNICAST;
2168 rtm->rtm_flags = 0;
2169 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2170 rtm->rtm_protocol = rt->rt6i_protocol;
2171 if (rt->rt6i_flags&RTF_DYNAMIC)
2172 rtm->rtm_protocol = RTPROT_REDIRECT;
2173 else if (rt->rt6i_flags & RTF_ADDRCONF)
2174 rtm->rtm_protocol = RTPROT_KERNEL;
2175 else if (rt->rt6i_flags&RTF_DEFAULT)
2176 rtm->rtm_protocol = RTPROT_RA;
2177
2178 if (rt->rt6i_flags&RTF_CACHE)
2179 rtm->rtm_flags |= RTM_F_CLONED;
2180
2181 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002182 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002183 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002184 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002185 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186#ifdef CONFIG_IPV6_SUBTREES
2187 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002188 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002189 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002191 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002192#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002193 if (iif) {
2194#ifdef CONFIG_IPV6_MROUTE
2195 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2196 int err = ip6mr_get_route(skb, rtm, nowait);
2197 if (err <= 0) {
2198 if (!nowait) {
2199 if (err == 0)
2200 return 0;
2201 goto nla_put_failure;
2202 } else {
2203 if (err == -EMSGSIZE)
2204 goto nla_put_failure;
2205 }
2206 }
2207 } else
2208#endif
2209 NLA_PUT_U32(skb, RTA_IIF, iif);
2210 } else if (dst) {
Brian Haley5e0115e2008-08-13 01:58:57 -07002211 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002213 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002214 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002215 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002217
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002219 goto nla_put_failure;
2220
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002222 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2223
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002225 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2226
2227 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002228
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002229 if (!(rt->rt6i_flags & RTF_EXPIRES))
2230 expires = 0;
2231 else if (rt->rt6i_expires - jiffies < INT_MAX)
2232 expires = rt->rt6i_expires - jiffies;
2233 else
2234 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002235
Thomas Grafe3703b32006-11-27 09:27:07 -08002236 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2237 expires, rt->u.dst.error) < 0)
2238 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239
Thomas Graf2d7202b2006-08-22 00:01:27 -07002240 return nlmsg_end(skb, nlh);
2241
2242nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002243 nlmsg_cancel(skb, nlh);
2244 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245}
2246
Patrick McHardy1b43af52006-08-10 23:11:17 -07002247int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248{
2249 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2250 int prefix;
2251
Thomas Graf2d7202b2006-08-22 00:01:27 -07002252 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2253 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2255 } else
2256 prefix = 0;
2257
Brian Haley191cd582008-08-14 15:33:21 -07002258 return rt6_fill_node(arg->net,
2259 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002261 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262}
2263
Thomas Grafc127ea22007-03-22 11:58:32 -07002264static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002266 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002267 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002269 struct sk_buff *skb;
2270 struct rtmsg *rtm;
2271 struct flowi fl;
2272 int err, iif = 0;
2273
2274 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2275 if (err < 0)
2276 goto errout;
2277
2278 err = -EINVAL;
2279 memset(&fl, 0, sizeof(fl));
2280
2281 if (tb[RTA_SRC]) {
2282 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2283 goto errout;
2284
2285 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2286 }
2287
2288 if (tb[RTA_DST]) {
2289 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2290 goto errout;
2291
2292 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2293 }
2294
2295 if (tb[RTA_IIF])
2296 iif = nla_get_u32(tb[RTA_IIF]);
2297
2298 if (tb[RTA_OIF])
2299 fl.oif = nla_get_u32(tb[RTA_OIF]);
2300
2301 if (iif) {
2302 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002303 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002304 if (!dev) {
2305 err = -ENODEV;
2306 goto errout;
2307 }
2308 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309
2310 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002311 if (skb == NULL) {
2312 err = -ENOBUFS;
2313 goto errout;
2314 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315
2316 /* Reserve room for dummy headers, this skb can pass
2317 through good chunk of routing engine.
2318 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002319 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002320 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2321
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002322 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 skb->dst = &rt->u.dst;
2324
Brian Haley191cd582008-08-14 15:33:21 -07002325 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002327 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002329 kfree_skb(skb);
2330 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 }
2332
Daniel Lezcano55786892008-03-04 13:47:47 -08002333 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002334errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002335 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336}
2337
Thomas Graf86872cb2006-08-22 00:01:08 -07002338void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339{
2340 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002341 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002342 u32 seq;
2343 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002345 err = -ENOBUFS;
2346 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002347
Thomas Graf339bf982006-11-10 14:10:15 -08002348 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002349 if (skb == NULL)
2350 goto errout;
2351
Brian Haley191cd582008-08-14 15:33:21 -07002352 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002353 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002354 if (err < 0) {
2355 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2356 WARN_ON(err == -EMSGSIZE);
2357 kfree_skb(skb);
2358 goto errout;
2359 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002360 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2361 info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002362errout:
2363 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002364 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365}
2366
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002367static int ip6_route_dev_notify(struct notifier_block *this,
2368 unsigned long event, void *data)
2369{
2370 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002371 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002372
2373 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2374 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2375 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2376#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2377 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2378 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2379 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2380 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2381#endif
2382 }
2383
2384 return NOTIFY_OK;
2385}
2386
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387/*
2388 * /proc
2389 */
2390
2391#ifdef CONFIG_PROC_FS
2392
2393#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2394
2395struct rt6_proc_arg
2396{
2397 char *buffer;
2398 int offset;
2399 int length;
2400 int skip;
2401 int len;
2402};
2403
2404static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2405{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002406 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002408 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2409 rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410
2411#ifdef CONFIG_IPV6_SUBTREES
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002412 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2413 rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002414#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002415 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002416#endif
2417
2418 if (rt->rt6i_nexthop) {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002419 seq_printf(m, NIP6_SEQFMT,
2420 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002422 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002424 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2425 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2426 rt->u.dst.__use, rt->rt6i_flags,
2427 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 return 0;
2429}
2430
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002431static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002433 struct net *net = (struct net *)m->private;
2434 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002435 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436}
2437
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002438static int ipv6_route_open(struct inode *inode, struct file *file)
2439{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002440 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002441}
2442
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002443static const struct file_operations ipv6_route_proc_fops = {
2444 .owner = THIS_MODULE,
2445 .open = ipv6_route_open,
2446 .read = seq_read,
2447 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002448 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002449};
2450
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2452{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002453 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002455 net->ipv6.rt6_stats->fib_nodes,
2456 net->ipv6.rt6_stats->fib_route_nodes,
2457 net->ipv6.rt6_stats->fib_rt_alloc,
2458 net->ipv6.rt6_stats->fib_rt_entries,
2459 net->ipv6.rt6_stats->fib_rt_cache,
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002460 atomic_read(&net->ipv6.ip6_dst_ops->entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002461 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462
2463 return 0;
2464}
2465
2466static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2467{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002468 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002469}
2470
Arjan van de Ven9a321442007-02-12 00:55:35 -08002471static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472 .owner = THIS_MODULE,
2473 .open = rt6_stats_seq_open,
2474 .read = seq_read,
2475 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002476 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477};
2478#endif /* CONFIG_PROC_FS */
2479
2480#ifdef CONFIG_SYSCTL
2481
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482static
2483int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2484 void __user *buffer, size_t *lenp, loff_t *ppos)
2485{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002486 struct net *net = current->nsproxy->net_ns;
2487 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 if (write) {
2489 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002490 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 return 0;
2492 } else
2493 return -EINVAL;
2494}
2495
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002496ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002497 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002499 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002501 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002502 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 },
2504 {
2505 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2506 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002507 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508 .maxlen = sizeof(int),
2509 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002510 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 },
2512 {
2513 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2514 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002515 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 .maxlen = sizeof(int),
2517 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002518 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 },
2520 {
2521 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2522 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002523 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 .maxlen = sizeof(int),
2525 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002526 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527 .strategy = &sysctl_jiffies,
2528 },
2529 {
2530 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2531 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002532 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 .maxlen = sizeof(int),
2534 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002535 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 .strategy = &sysctl_jiffies,
2537 },
2538 {
2539 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2540 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002541 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 .maxlen = sizeof(int),
2543 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002544 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 .strategy = &sysctl_jiffies,
2546 },
2547 {
2548 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2549 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002550 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 .maxlen = sizeof(int),
2552 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002553 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 .strategy = &sysctl_jiffies,
2555 },
2556 {
2557 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2558 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002559 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 .maxlen = sizeof(int),
2561 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002562 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 .strategy = &sysctl_jiffies,
2564 },
2565 {
2566 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2567 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002568 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 .maxlen = sizeof(int),
2570 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002571 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 .strategy = &sysctl_jiffies,
2573 },
2574 {
2575 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2576 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002577 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 .maxlen = sizeof(int),
2579 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002580 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 .strategy = &sysctl_ms_jiffies,
2582 },
2583 { .ctl_name = 0 }
2584};
2585
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002586struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2587{
2588 struct ctl_table *table;
2589
2590 table = kmemdup(ipv6_route_table_template,
2591 sizeof(ipv6_route_table_template),
2592 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002593
2594 if (table) {
2595 table[0].data = &net->ipv6.sysctl.flush_delay;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002596 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002597 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2598 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2599 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2600 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2601 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2602 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2603 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2604 }
2605
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002606 return table;
2607}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608#endif
2609
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002610static int ip6_route_net_init(struct net *net)
2611{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002612 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002613
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002614 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2615 sizeof(*net->ipv6.ip6_dst_ops),
2616 GFP_KERNEL);
2617 if (!net->ipv6.ip6_dst_ops)
2618 goto out;
Denis V. Lunev48115be2008-04-16 02:01:34 -07002619 net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002620
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002621 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2622 sizeof(*net->ipv6.ip6_null_entry),
2623 GFP_KERNEL);
2624 if (!net->ipv6.ip6_null_entry)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002625 goto out_ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002626 net->ipv6.ip6_null_entry->u.dst.path =
2627 (struct dst_entry *)net->ipv6.ip6_null_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002628 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002629
2630#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2631 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2632 sizeof(*net->ipv6.ip6_prohibit_entry),
2633 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002634 if (!net->ipv6.ip6_prohibit_entry)
2635 goto out_ip6_null_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002636 net->ipv6.ip6_prohibit_entry->u.dst.path =
2637 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002638 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002639
2640 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2641 sizeof(*net->ipv6.ip6_blk_hole_entry),
2642 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002643 if (!net->ipv6.ip6_blk_hole_entry)
2644 goto out_ip6_prohibit_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002645 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2646 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002647 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002648#endif
2649
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002650#ifdef CONFIG_PROC_FS
2651 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2652 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2653#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002654 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2655
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002656 ret = 0;
2657out:
2658 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002659
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002660#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2661out_ip6_prohibit_entry:
2662 kfree(net->ipv6.ip6_prohibit_entry);
2663out_ip6_null_entry:
2664 kfree(net->ipv6.ip6_null_entry);
2665#endif
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002666out_ip6_dst_ops:
Denis V. Lunev48115be2008-04-16 02:01:34 -07002667 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002668 kfree(net->ipv6.ip6_dst_ops);
2669 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002670}
2671
2672static void ip6_route_net_exit(struct net *net)
2673{
2674#ifdef CONFIG_PROC_FS
2675 proc_net_remove(net, "ipv6_route");
2676 proc_net_remove(net, "rt6_stats");
2677#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002678 kfree(net->ipv6.ip6_null_entry);
2679#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2680 kfree(net->ipv6.ip6_prohibit_entry);
2681 kfree(net->ipv6.ip6_blk_hole_entry);
2682#endif
Denis V. Lunev48115be2008-04-16 02:01:34 -07002683 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002684 kfree(net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002685}
2686
2687static struct pernet_operations ip6_route_net_ops = {
2688 .init = ip6_route_net_init,
2689 .exit = ip6_route_net_exit,
2690};
2691
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002692static struct notifier_block ip6_route_dev_notifier = {
2693 .notifier_call = ip6_route_dev_notify,
2694 .priority = 0,
2695};
2696
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002697int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002699 int ret;
2700
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002701 ret = -ENOMEM;
2702 ip6_dst_ops_template.kmem_cachep =
2703 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2704 SLAB_HWCACHE_ALIGN, NULL);
2705 if (!ip6_dst_ops_template.kmem_cachep)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002706 goto out;;
David S. Miller14e50e52007-05-24 18:17:54 -07002707
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002708 ret = register_pernet_subsys(&ip6_route_net_ops);
2709 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002710 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002711
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002712 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2713
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002714 /* Registering of the loopback is done before this portion of code,
2715 * the loopback reference in rt6_info will not be taken, do it
2716 * manually for init_net */
2717 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2718 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2719 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2720 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2721 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2722 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2723 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2724 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002725 ret = fib6_init();
2726 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002727 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002728
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002729 ret = xfrm6_init();
2730 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002731 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002732
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002733 ret = fib6_rules_init();
2734 if (ret)
2735 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002736
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002737 ret = -ENOBUFS;
2738 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2739 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2740 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2741 goto fib6_rules_init;
2742
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002743 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002744 if (ret)
2745 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002746
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002747out:
2748 return ret;
2749
2750fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002751 fib6_rules_cleanup();
2752xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002753 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002754out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002755 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002756out_register_subsys:
2757 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002758out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002759 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002760 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761}
2762
2763void ip6_route_cleanup(void)
2764{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002765 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002766 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002767 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002768 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002769 unregister_pernet_subsys(&ip6_route_net_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002770 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771}