blob: d69fa462d3f038fc346bb0ebcfc8aa40bac8a1c1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020043#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070054#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070055#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080074#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Linus Torvalds1da177e2005-04-16 15:20:36 -070076static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080082static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080090static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080094static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 struct in6_addr *gwaddr, int ifindex);
97#endif
98
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -080099static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700110 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800112 .entries = ATOMIC_INIT(0),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113};
114
David S. Miller14e50e52007-05-24 18:17:54 -0700115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116{
117}
118
119static struct dst_ops ip6_dst_blackhole_ops = {
120 .family = AF_INET6,
121 .protocol = __constant_htons(ETH_P_IPV6),
122 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entry_size = sizeof(struct rt6_info),
Eric Dumazete2422972008-01-30 20:07:45 -0800126 .entries = ATOMIC_INIT(0),
David S. Miller14e50e52007-05-24 18:17:54 -0700127};
128
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800129static struct rt6_info ip6_null_entry_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130 .u = {
131 .dst = {
132 .__refcnt = ATOMIC_INIT(1),
133 .__use = 1,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 .obsolete = -1,
135 .error = -ENETUNREACH,
136 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
137 .input = ip6_pkt_discard,
138 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 }
140 },
141 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
142 .rt6i_metric = ~(u32) 0,
143 .rt6i_ref = ATOMIC_INIT(1),
144};
145
Thomas Graf101367c2006-08-04 03:39:02 -0700146#ifdef CONFIG_IPV6_MULTIPLE_TABLES
147
David S. Miller6723ab52006-10-18 21:20:57 -0700148static int ip6_pkt_prohibit(struct sk_buff *skb);
149static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700150
Adrian Bunk280a34c2008-04-21 02:29:32 -0700151static struct rt6_info ip6_prohibit_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700152 .u = {
153 .dst = {
154 .__refcnt = ATOMIC_INIT(1),
155 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700156 .obsolete = -1,
157 .error = -EACCES,
158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Thomas Graf9ce8ade2006-10-18 20:46:54 -0700159 .input = ip6_pkt_prohibit,
160 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800168static struct rt6_info ip6_blk_hole_entry_template = {
Thomas Graf101367c2006-08-04 03:39:02 -0700169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
Thomas Graf101367c2006-08-04 03:39:02 -0700173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
Herbert Xu352e5122007-11-13 21:34:06 -0800176 .input = dst_discard,
177 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800188static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800190 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900201 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800209 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900210 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800212 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
213 struct inet6_dev *loopback_idev =
214 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 if (loopback_idev != NULL) {
216 rt->rt6i_idev = loopback_idev;
217 in6_dev_put(idev);
218 }
219 }
220}
221
222static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223{
224 return (rt->rt6i_flags & RTF_EXPIRES &&
225 time_after(jiffies, rt->rt6i_expires));
226}
227
Thomas Grafc71099a2006-08-04 23:20:06 -0700228static inline int rt6_need_strict(struct in6_addr *daddr)
229{
230 return (ipv6_addr_type(daddr) &
YOSHIFUJI Hideaki5ce83af2008-06-25 16:58:17 +0900231 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
Thomas Grafc71099a2006-08-04 23:20:06 -0700232}
233
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700235 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 */
237
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800238static inline struct rt6_info *rt6_device_match(struct net *net,
239 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900240 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700242 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243{
244 struct rt6_info *local = NULL;
245 struct rt6_info *sprt;
246
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900247 if (!oif && ipv6_addr_any(saddr))
248 goto out;
249
250 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
251 struct net_device *dev = sprt->rt6i_dev;
252
253 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 if (dev->ifindex == oif)
255 return sprt;
256 if (dev->flags & IFF_LOOPBACK) {
257 if (sprt->rt6i_idev == NULL ||
258 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700259 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900261 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 local->rt6i_idev->dev->ifindex == oif))
263 continue;
264 }
265 local = sprt;
266 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900267 } else {
268 if (ipv6_chk_addr(net, saddr, dev,
269 flags & RT6_LOOKUP_F_IFACE))
270 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900272 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900274 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 if (local)
276 return local;
277
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700278 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800279 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900281out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282 return rt;
283}
284
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800285#ifdef CONFIG_IPV6_ROUTER_PREF
286static void rt6_probe(struct rt6_info *rt)
287{
288 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289 /*
290 * Okay, this does not seem to be appropriate
291 * for now, however, we need to check if it
292 * is really so; aka Router Reachability Probing.
293 *
294 * Router Reachability Probe MUST be rate-limited
295 * to no more than one per minute.
296 */
297 if (!neigh || (neigh->nud_state & NUD_VALID))
298 return;
299 read_lock_bh(&neigh->lock);
300 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800301 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800302 struct in6_addr mcaddr;
303 struct in6_addr *target;
304
305 neigh->updated = jiffies;
306 read_unlock_bh(&neigh->lock);
307
308 target = (struct in6_addr *)&neigh->primary_key;
309 addrconf_addr_solict_mult(target, &mcaddr);
310 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311 } else
312 read_unlock_bh(&neigh->lock);
313}
314#else
315static inline void rt6_probe(struct rt6_info *rt)
316{
317 return;
318}
319#endif
320
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800322 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700324static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800326 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700327 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800328 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700329 if ((dev->flags & IFF_LOOPBACK) &&
330 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331 return 1;
332 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333}
334
Dave Jonesb6f99a22007-03-22 12:27:49 -0700335static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800337 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800338 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700339 if (rt->rt6i_flags & RTF_NONEXTHOP ||
340 !(rt->rt6i_flags & RTF_GATEWAY))
341 m = 1;
342 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800343 read_lock_bh(&neigh->lock);
344 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700345 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800346#ifdef CONFIG_IPV6_ROUTER_PREF
347 else if (neigh->nud_state & NUD_FAILED)
348 m = 0;
349#endif
350 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800351 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800352 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800353 } else
354 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800355 return m;
356}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800358static int rt6_score_route(struct rt6_info *rt, int oif,
359 int strict)
360{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700361 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900362
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700363 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700364 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800365 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800366#ifdef CONFIG_IPV6_ROUTER_PREF
367 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
368#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700369 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800370 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800371 return -1;
372 return m;
373}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
David S. Millerf11e6652007-03-24 20:36:25 -0700375static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
376 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800377{
David S. Millerf11e6652007-03-24 20:36:25 -0700378 int m;
379
380 if (rt6_check_expired(rt))
381 goto out;
382
383 m = rt6_score_route(rt, oif, strict);
384 if (m < 0)
385 goto out;
386
387 if (m > *mpri) {
388 if (strict & RT6_LOOKUP_F_REACHABLE)
389 rt6_probe(match);
390 *mpri = m;
391 match = rt;
392 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
393 rt6_probe(rt);
394 }
395
396out:
397 return match;
398}
399
400static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
401 struct rt6_info *rr_head,
402 u32 metric, int oif, int strict)
403{
404 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800405 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
David S. Millerf11e6652007-03-24 20:36:25 -0700407 match = NULL;
408 for (rt = rr_head; rt && rt->rt6i_metric == metric;
409 rt = rt->u.dst.rt6_next)
410 match = find_match(rt, oif, strict, &mpri, match);
411 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
412 rt = rt->u.dst.rt6_next)
413 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800414
David S. Millerf11e6652007-03-24 20:36:25 -0700415 return match;
416}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800417
David S. Millerf11e6652007-03-24 20:36:25 -0700418static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
419{
420 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800421 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
David S. Millerf11e6652007-03-24 20:36:25 -0700423 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800424 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
David S. Millerf11e6652007-03-24 20:36:25 -0700426 rt0 = fn->rr_ptr;
427 if (!rt0)
428 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
David S. Millerf11e6652007-03-24 20:36:25 -0700430 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800432 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700433 (strict & RT6_LOOKUP_F_REACHABLE)) {
434 struct rt6_info *next = rt0->u.dst.rt6_next;
435
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800436 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700437 if (!next || next->rt6i_metric != rt0->rt6i_metric)
438 next = fn->leaf;
439
440 if (next != rt0)
441 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 }
443
David S. Millerf11e6652007-03-24 20:36:25 -0700444 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800445 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900447 net = dev_net(rt0->rt6i_dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800448 return (match ? match : net->ipv6.ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449}
450
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800451#ifdef CONFIG_IPV6_ROUTE_INFO
452int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
453 struct in6_addr *gwaddr)
454{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900455 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800456 struct route_info *rinfo = (struct route_info *) opt;
457 struct in6_addr prefix_buf, *prefix;
458 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900459 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800460 struct rt6_info *rt;
461
462 if (len < sizeof(struct route_info)) {
463 return -EINVAL;
464 }
465
466 /* Sanity check for prefix_len and length */
467 if (rinfo->length > 3) {
468 return -EINVAL;
469 } else if (rinfo->prefix_len > 128) {
470 return -EINVAL;
471 } else if (rinfo->prefix_len > 64) {
472 if (rinfo->length < 2) {
473 return -EINVAL;
474 }
475 } else if (rinfo->prefix_len > 0) {
476 if (rinfo->length < 1) {
477 return -EINVAL;
478 }
479 }
480
481 pref = rinfo->route_pref;
482 if (pref == ICMPV6_ROUTER_PREF_INVALID)
483 pref = ICMPV6_ROUTER_PREF_MEDIUM;
484
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900485 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800486
487 if (rinfo->length == 3)
488 prefix = (struct in6_addr *)rinfo->prefix;
489 else {
490 /* this function is safe */
491 ipv6_addr_prefix(&prefix_buf,
492 (struct in6_addr *)rinfo->prefix,
493 rinfo->prefix_len);
494 prefix = &prefix_buf;
495 }
496
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800497 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800499
500 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700501 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800502 rt = NULL;
503 }
504
505 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800506 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800507 pref);
508 else if (rt)
509 rt->rt6i_flags = RTF_ROUTEINFO |
510 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
511
512 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900513 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800514 rt->rt6i_flags &= ~RTF_EXPIRES;
515 } else {
516 rt->rt6i_expires = jiffies + HZ * lifetime;
517 rt->rt6i_flags |= RTF_EXPIRES;
518 }
519 dst_release(&rt->u.dst);
520 }
521 return 0;
522}
523#endif
524
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800525#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700526do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800527 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700529 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700530 if (fn->fn_flags & RTN_TL_ROOT) \
531 goto out; \
532 pn = fn->parent; \
533 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800534 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700535 else \
536 fn = pn; \
537 if (fn->fn_flags & RTN_RTINFO) \
538 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700539 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700540 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700541} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700542
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800543static struct rt6_info *ip6_pol_route_lookup(struct net *net,
544 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700545 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546{
547 struct fib6_node *fn;
548 struct rt6_info *rt;
549
Thomas Grafc71099a2006-08-04 23:20:06 -0700550 read_lock_bh(&table->tb6_lock);
551 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552restart:
553 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900554 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800555 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700556out:
Pavel Emelyanov03f49f32007-11-10 21:28:34 -0800557 dst_use(&rt->u.dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700558 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700559 return rt;
560
561}
562
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900563struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
564 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700565{
566 struct flowi fl = {
567 .oif = oif,
568 .nl_u = {
569 .ip6_u = {
570 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700571 },
572 },
573 };
574 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700576
Thomas Grafadaa70b2006-10-13 15:01:03 -0700577 if (saddr) {
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 }
581
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800582 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700583 if (dst->error == 0)
584 return (struct rt6_info *) dst;
585
586 dst_release(dst);
587
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 return NULL;
589}
590
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900591EXPORT_SYMBOL(rt6_lookup);
592
Thomas Grafc71099a2006-08-04 23:20:06 -0700593/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
596 be destroyed.
597 */
598
Thomas Graf86872cb2006-08-22 00:01:08 -0700599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600{
601 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700602 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
Thomas Grafc71099a2006-08-04 23:20:06 -0700604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700606 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700607 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608
609 return err;
610}
611
Thomas Graf40e22e82006-08-22 00:00:45 -0700612int ip6_ins_rt(struct rt6_info *rt)
613{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800614 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900615 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800616 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800617 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700618}
619
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800620static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
621 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 struct rt6_info *rt;
624
625 /*
626 * Clone the route.
627 */
628
629 rt = ip6_rt_copy(ort);
630
631 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900632 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
633 if (rt->rt6i_dst.plen != 128 &&
634 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
635 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900637 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900639 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 rt->rt6i_dst.plen = 128;
641 rt->rt6i_flags |= RTF_CACHE;
642 rt->u.dst.flags |= DST_HOST;
643
644#ifdef CONFIG_IPV6_SUBTREES
645 if (rt->rt6i_src.plen && saddr) {
646 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
647 rt->rt6i_src.plen = 128;
648 }
649#endif
650
651 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
652
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800653 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800655 return rt;
656}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800658static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659{
660 struct rt6_info *rt = ip6_rt_copy(ort);
661 if (rt) {
662 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663 rt->rt6i_dst.plen = 128;
664 rt->rt6i_flags |= RTF_CACHE;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800665 rt->u.dst.flags |= DST_HOST;
666 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667 }
668 return rt;
669}
670
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800671static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
672 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673{
674 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800675 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700676 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800678 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700679 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700681 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700684 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800686restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700687 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688
689restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700690 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800691
692 BACKTRACK(net, &fl->fl6_src);
693 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800694 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800695 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800697 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700698 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800699
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800700 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800701 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800702 else {
703#if CLONE_OFFLINK_ROUTE
704 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
705#else
706 goto out2;
707#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800709
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800710 dst_release(&rt->u.dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800711 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800712
713 dst_hold(&rt->u.dst);
714 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700715 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800716 if (!err)
717 goto out2;
718 }
719
720 if (--attempts <= 0)
721 goto out2;
722
723 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700724 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800725 * released someone could insert this route. Relookup.
726 */
727 dst_release(&rt->u.dst);
728 goto relookup;
729
730out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800731 if (reachable) {
732 reachable = 0;
733 goto restart_2;
734 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800735 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700736 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737out2:
738 rt->u.dst.lastuse = jiffies;
739 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700740
741 return rt;
742}
743
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800744static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700745 struct flowi *fl, int flags)
746{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800747 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700748}
749
Thomas Grafc71099a2006-08-04 23:20:06 -0700750void ip6_route_input(struct sk_buff *skb)
751{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700752 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900753 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700754 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700755 struct flowi fl = {
756 .iif = skb->dev->ifindex,
757 .nl_u = {
758 .ip6_u = {
759 .daddr = iph->daddr,
760 .saddr = iph->saddr,
Al Viro90bcaf72006-11-08 00:25:17 -0800761 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
Thomas Grafc71099a2006-08-04 23:20:06 -0700762 },
763 },
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900764 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700765 .proto = iph->nexthdr,
766 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700767
768 if (rt6_need_strict(&iph->daddr))
769 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700770
Daniel Lezcano55786892008-03-04 13:47:47 -0800771 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
Thomas Grafc71099a2006-08-04 23:20:06 -0700772}
773
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800774static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700775 struct flowi *fl, int flags)
776{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800777 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700778}
779
Daniel Lezcano4591db42008-03-05 10:48:10 -0800780struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
781 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700782{
783 int flags = 0;
784
785 if (rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700786 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700787
Thomas Grafadaa70b2006-10-13 15:01:03 -0700788 if (!ipv6_addr_any(&fl->fl6_src))
789 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +0900790 else if (sk) {
791 unsigned int prefs = inet6_sk(sk)->srcprefs;
792 if (prefs & IPV6_PREFER_SRC_TMP)
793 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
794 if (prefs & IPV6_PREFER_SRC_PUBLIC)
795 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
796 if (prefs & IPV6_PREFER_SRC_COA)
797 flags |= RT6_LOOKUP_F_SRCPREF_COA;
798 }
Thomas Grafadaa70b2006-10-13 15:01:03 -0700799
Daniel Lezcano4591db42008-03-05 10:48:10 -0800800 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801}
802
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900803EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
David S. Miller14e50e52007-05-24 18:17:54 -0700805int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
806{
807 struct rt6_info *ort = (struct rt6_info *) *dstp;
808 struct rt6_info *rt = (struct rt6_info *)
809 dst_alloc(&ip6_dst_blackhole_ops);
810 struct dst_entry *new = NULL;
811
812 if (rt) {
813 new = &rt->u.dst;
814
815 atomic_set(&new->__refcnt, 1);
816 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800817 new->input = dst_discard;
818 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700819
820 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
821 new->dev = ort->u.dst.dev;
822 if (new->dev)
823 dev_hold(new->dev);
824 rt->rt6i_idev = ort->rt6i_idev;
825 if (rt->rt6i_idev)
826 in6_dev_hold(rt->rt6i_idev);
827 rt->rt6i_expires = 0;
828
829 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
830 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
831 rt->rt6i_metric = 0;
832
833 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
834#ifdef CONFIG_IPV6_SUBTREES
835 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
836#endif
837
838 dst_free(new);
839 }
840
841 dst_release(*dstp);
842 *dstp = new;
843 return (new ? 0 : -ENOMEM);
844}
845EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
846
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847/*
848 * Destination cache support functions
849 */
850
851static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
852{
853 struct rt6_info *rt;
854
855 rt = (struct rt6_info *) dst;
856
857 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
858 return dst;
859
860 return NULL;
861}
862
863static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
864{
865 struct rt6_info *rt = (struct rt6_info *) dst;
866
867 if (rt) {
868 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700869 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 else
871 dst_release(dst);
872 }
873 return NULL;
874}
875
876static void ip6_link_failure(struct sk_buff *skb)
877{
878 struct rt6_info *rt;
879
880 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
881
882 rt = (struct rt6_info *) skb->dst;
883 if (rt) {
884 if (rt->rt6i_flags&RTF_CACHE) {
885 dst_set_expires(&rt->u.dst, 0);
886 rt->rt6i_flags |= RTF_EXPIRES;
887 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
888 rt->rt6i_node->fn_sernum = -1;
889 }
890}
891
892static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
893{
894 struct rt6_info *rt6 = (struct rt6_info*)dst;
895
896 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
897 rt6->rt6i_flags |= RTF_MODIFIED;
898 if (mtu < IPV6_MIN_MTU) {
899 mtu = IPV6_MIN_MTU;
900 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
901 }
902 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700903 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 }
905}
906
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907static int ipv6_get_mtu(struct net_device *dev);
908
Daniel Lezcano55786892008-03-04 13:47:47 -0800909static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910{
911 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
912
Daniel Lezcano55786892008-03-04 13:47:47 -0800913 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
914 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915
916 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900917 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
918 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
919 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 * rely only on pmtu discovery"
921 */
922 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
923 mtu = IPV6_MAXPLEN;
924 return mtu;
925}
926
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800927static struct dst_entry *icmp6_dst_gc_list;
928static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700929
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800930struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900932 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700933{
934 struct rt6_info *rt;
935 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900936 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938 if (unlikely(idev == NULL))
939 return NULL;
940
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800941 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 if (unlikely(rt == NULL)) {
943 in6_dev_put(idev);
944 goto out;
945 }
946
947 dev_hold(dev);
948 if (neigh)
949 neigh_hold(neigh);
950 else
951 neigh = ndisc_get_neigh(dev, addr);
952
953 rt->rt6i_dev = dev;
954 rt->rt6i_idev = idev;
955 rt->rt6i_nexthop = neigh;
956 atomic_set(&rt->u.dst.__refcnt, 1);
957 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
958 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -0800959 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800960 rt->u.dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961
962#if 0 /* there's no chance to use these for ndisc */
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900963 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
964 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 : 0;
966 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
967 rt->rt6i_dst.plen = 128;
968#endif
969
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800970 spin_lock_bh(&icmp6_dst_lock);
971 rt->u.dst.next = icmp6_dst_gc_list;
972 icmp6_dst_gc_list = &rt->u.dst;
973 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974
Daniel Lezcano55786892008-03-04 13:47:47 -0800975 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976
977out:
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +0900978 return &rt->u.dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979}
980
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -0700981int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982{
983 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -0700984 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985
986 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700987
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800988 spin_lock_bh(&icmp6_dst_lock);
989 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700990
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 while ((dst = *pprev) != NULL) {
992 if (!atomic_read(&dst->__refcnt)) {
993 *pprev = dst->next;
994 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 } else {
996 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -0700997 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 }
999 }
1000
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001001 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001002
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001003 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001004}
1005
David S. Miller1e493d12008-09-10 17:27:15 -07001006static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1007 void *arg)
1008{
1009 struct dst_entry *dst, **pprev;
1010
1011 spin_lock_bh(&icmp6_dst_lock);
1012 pprev = &icmp6_dst_gc_list;
1013 while ((dst = *pprev) != NULL) {
1014 struct rt6_info *rt = (struct rt6_info *) dst;
1015 if (func(rt, arg)) {
1016 *pprev = dst->next;
1017 dst_free(dst);
1018 } else {
1019 pprev = &dst->next;
1020 }
1021 }
1022 spin_unlock_bh(&icmp6_dst_lock);
1023}
1024
Daniel Lezcano569d3642008-01-18 03:56:57 -08001025static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 unsigned long now = jiffies;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001028 struct net *net = ops->dst_net;
1029 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1030 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1031 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1032 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1033 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034
Daniel Lezcano7019b782008-03-04 13:50:14 -08001035 if (time_after(rt_last_gc + rt_min_interval, now) &&
1036 atomic_read(&ops->entries) <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 goto out;
1038
Benjamin Thery6891a342008-03-04 13:49:47 -08001039 net->ipv6.ip6_rt_gc_expire++;
1040 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1041 net->ipv6.ip6_rt_last_gc = now;
Daniel Lezcano7019b782008-03-04 13:50:14 -08001042 if (atomic_read(&ops->entries) < ops->gc_thresh)
1043 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001045 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1046 return (atomic_read(&ops->entries) > rt_max_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047}
1048
1049/* Clean host part of a prefix. Not necessary in radix tree,
1050 but results in cleaner routing tables.
1051
1052 Remove it only when all the things will work!
1053 */
1054
1055static int ipv6_get_mtu(struct net_device *dev)
1056{
1057 int mtu = IPV6_MIN_MTU;
1058 struct inet6_dev *idev;
1059
1060 idev = in6_dev_get(dev);
1061 if (idev) {
1062 mtu = idev->cnf.mtu6;
1063 in6_dev_put(idev);
1064 }
1065 return mtu;
1066}
1067
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001068int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001069{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001070 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1071 if (hoplimit < 0) {
1072 struct net_device *dev = dst->dev;
1073 struct inet6_dev *idev = in6_dev_get(dev);
1074 if (idev) {
1075 hoplimit = idev->cnf.hop_limit;
1076 in6_dev_put(idev);
1077 } else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001078 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 }
1080 return hoplimit;
1081}
1082
1083/*
1084 *
1085 */
1086
Thomas Graf86872cb2006-08-22 00:01:08 -07001087int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088{
1089 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001090 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 struct rt6_info *rt = NULL;
1092 struct net_device *dev = NULL;
1093 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001094 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 int addr_type;
1096
Thomas Graf86872cb2006-08-22 00:01:08 -07001097 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 return -EINVAL;
1099#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001100 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 return -EINVAL;
1102#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001103 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001105 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 if (!dev)
1107 goto out;
1108 idev = in6_dev_get(dev);
1109 if (!idev)
1110 goto out;
1111 }
1112
Thomas Graf86872cb2006-08-22 00:01:08 -07001113 if (cfg->fc_metric == 0)
1114 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115
Daniel Lezcano55786892008-03-04 13:47:47 -08001116 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001117 if (table == NULL) {
1118 err = -ENOBUFS;
1119 goto out;
1120 }
1121
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001122 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123
1124 if (rt == NULL) {
1125 err = -ENOMEM;
1126 goto out;
1127 }
1128
1129 rt->u.dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001130 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1131 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1132 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133
Thomas Graf86872cb2006-08-22 00:01:08 -07001134 if (cfg->fc_protocol == RTPROT_UNSPEC)
1135 cfg->fc_protocol = RTPROT_BOOT;
1136 rt->rt6i_protocol = cfg->fc_protocol;
1137
1138 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139
1140 if (addr_type & IPV6_ADDR_MULTICAST)
1141 rt->u.dst.input = ip6_mc_input;
1142 else
1143 rt->u.dst.input = ip6_forward;
1144
1145 rt->u.dst.output = ip6_output;
1146
Thomas Graf86872cb2006-08-22 00:01:08 -07001147 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1148 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 if (rt->rt6i_dst.plen == 128)
1150 rt->u.dst.flags = DST_HOST;
1151
1152#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001153 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1154 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155#endif
1156
Thomas Graf86872cb2006-08-22 00:01:08 -07001157 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158
1159 /* We cannot add true routes via loopback here,
1160 they would result in kernel looping; promote them to reject routes
1161 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001162 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1164 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001165 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 if (dev) {
1167 dev_put(dev);
1168 in6_dev_put(idev);
1169 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001170 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 dev_hold(dev);
1172 idev = in6_dev_get(dev);
1173 if (!idev) {
1174 err = -ENODEV;
1175 goto out;
1176 }
1177 }
1178 rt->u.dst.output = ip6_pkt_discard_out;
1179 rt->u.dst.input = ip6_pkt_discard;
1180 rt->u.dst.error = -ENETUNREACH;
1181 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1182 goto install_route;
1183 }
1184
Thomas Graf86872cb2006-08-22 00:01:08 -07001185 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 struct in6_addr *gw_addr;
1187 int gwa_type;
1188
Thomas Graf86872cb2006-08-22 00:01:08 -07001189 gw_addr = &cfg->fc_gateway;
1190 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 gwa_type = ipv6_addr_type(gw_addr);
1192
1193 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1194 struct rt6_info *grt;
1195
1196 /* IPv6 strictly inhibits using not link-local
1197 addresses as nexthop address.
1198 Otherwise, router will not able to send redirects.
1199 It is very good, but in some (rare!) circumstances
1200 (SIT, PtP, NBMA NOARP links) it is handy to allow
1201 some exceptions. --ANK
1202 */
1203 err = -EINVAL;
1204 if (!(gwa_type&IPV6_ADDR_UNICAST))
1205 goto out;
1206
Daniel Lezcano55786892008-03-04 13:47:47 -08001207 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208
1209 err = -EHOSTUNREACH;
1210 if (grt == NULL)
1211 goto out;
1212 if (dev) {
1213 if (dev != grt->rt6i_dev) {
1214 dst_release(&grt->u.dst);
1215 goto out;
1216 }
1217 } else {
1218 dev = grt->rt6i_dev;
1219 idev = grt->rt6i_idev;
1220 dev_hold(dev);
1221 in6_dev_hold(grt->rt6i_idev);
1222 }
1223 if (!(grt->rt6i_flags&RTF_GATEWAY))
1224 err = 0;
1225 dst_release(&grt->u.dst);
1226
1227 if (err)
1228 goto out;
1229 }
1230 err = -EINVAL;
1231 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1232 goto out;
1233 }
1234
1235 err = -ENODEV;
1236 if (dev == NULL)
1237 goto out;
1238
Thomas Graf86872cb2006-08-22 00:01:08 -07001239 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1241 if (IS_ERR(rt->rt6i_nexthop)) {
1242 err = PTR_ERR(rt->rt6i_nexthop);
1243 rt->rt6i_nexthop = NULL;
1244 goto out;
1245 }
1246 }
1247
Thomas Graf86872cb2006-08-22 00:01:08 -07001248 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
1250install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001251 if (cfg->fc_mx) {
1252 struct nlattr *nla;
1253 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
Thomas Graf86872cb2006-08-22 00:01:08 -07001255 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001256 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001257
1258 if (type) {
1259 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 err = -EINVAL;
1261 goto out;
1262 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001263
1264 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 }
1267 }
1268
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001269 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
Rami Rosen1ca615f2008-08-06 02:34:21 -07001271 if (!dst_mtu(&rt->u.dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
Satoru SATOH5ffc02a2008-05-04 22:14:42 -07001273 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
Daniel Lezcano55786892008-03-04 13:47:47 -08001274 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275 rt->u.dst.dev = dev;
1276 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001277 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001278
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001279 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001280
Thomas Graf86872cb2006-08-22 00:01:08 -07001281 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282
1283out:
1284 if (dev)
1285 dev_put(dev);
1286 if (idev)
1287 in6_dev_put(idev);
1288 if (rt)
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001289 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 return err;
1291}
1292
Thomas Graf86872cb2006-08-22 00:01:08 -07001293static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294{
1295 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001296 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001297 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001299 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001300 return -ENOENT;
1301
Thomas Grafc71099a2006-08-04 23:20:06 -07001302 table = rt->rt6i_table;
1303 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304
Thomas Graf86872cb2006-08-22 00:01:08 -07001305 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 dst_release(&rt->u.dst);
1307
Thomas Grafc71099a2006-08-04 23:20:06 -07001308 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309
1310 return err;
1311}
1312
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001313int ip6_del_rt(struct rt6_info *rt)
1314{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001315 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001316 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001317 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001318 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001319}
1320
Thomas Graf86872cb2006-08-22 00:01:08 -07001321static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322{
Thomas Grafc71099a2006-08-04 23:20:06 -07001323 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 struct fib6_node *fn;
1325 struct rt6_info *rt;
1326 int err = -ESRCH;
1327
Daniel Lezcano55786892008-03-04 13:47:47 -08001328 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001329 if (table == NULL)
1330 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
Thomas Grafc71099a2006-08-04 23:20:06 -07001332 read_lock_bh(&table->tb6_lock);
1333
1334 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001335 &cfg->fc_dst, cfg->fc_dst_len,
1336 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001337
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 if (fn) {
Eric Dumazet7cc48262007-02-09 16:22:57 -08001339 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001340 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001342 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001344 if (cfg->fc_flags & RTF_GATEWAY &&
1345 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001347 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 continue;
1349 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001350 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351
Thomas Graf86872cb2006-08-22 00:01:08 -07001352 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 }
1354 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001355 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356
1357 return err;
1358}
1359
1360/*
1361 * Handle redirects
1362 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001363struct ip6rd_flowi {
1364 struct flowi fl;
1365 struct in6_addr gateway;
1366};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001368static struct rt6_info *__ip6_route_redirect(struct net *net,
1369 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001370 struct flowi *fl,
1371 int flags)
1372{
1373 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1374 struct rt6_info *rt;
1375 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001376
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001378 * Get the "current" route for this destination and
1379 * check if the redirect has come from approriate router.
1380 *
1381 * RFC 2461 specifies that redirects should only be
1382 * accepted if they come from the nexthop to the target.
1383 * Due to the way the routes are chosen, this notion
1384 * is a bit fuzzy and one might need to check all possible
1385 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
Thomas Grafc71099a2006-08-04 23:20:06 -07001388 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001389 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001390restart:
Eric Dumazet7cc48262007-02-09 16:22:57 -08001391 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001392 /*
1393 * Current route is on-link; redirect is always invalid.
1394 *
1395 * Seems, previous statement is not true. It could
1396 * be node, which looks for us as on-link (f.e. proxy ndisc)
1397 * But then router serving it might decide, that we should
1398 * know truth 8)8) --ANK (980726).
1399 */
1400 if (rt6_check_expired(rt))
1401 continue;
1402 if (!(rt->rt6i_flags & RTF_GATEWAY))
1403 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001404 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001405 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001406 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001407 continue;
1408 break;
1409 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001410
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001411 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001412 rt = net->ipv6.ip6_null_entry;
1413 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001414out:
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001415 dst_hold(&rt->u.dst);
1416
1417 read_unlock_bh(&table->tb6_lock);
1418
1419 return rt;
1420};
1421
1422static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1423 struct in6_addr *src,
1424 struct in6_addr *gateway,
1425 struct net_device *dev)
1426{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001427 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001428 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001429 struct ip6rd_flowi rdfl = {
1430 .fl = {
1431 .oif = dev->ifindex,
1432 .nl_u = {
1433 .ip6_u = {
1434 .daddr = *dest,
1435 .saddr = *src,
1436 },
1437 },
1438 },
1439 .gateway = *gateway,
1440 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001441
1442 if (rt6_need_strict(dest))
1443 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001444
Daniel Lezcano55786892008-03-04 13:47:47 -08001445 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001446 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001447}
1448
1449void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1450 struct in6_addr *saddr,
1451 struct neighbour *neigh, u8 *lladdr, int on_link)
1452{
1453 struct rt6_info *rt, *nrt = NULL;
1454 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001455 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001456
1457 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1458
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001459 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 if (net_ratelimit())
1461 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1462 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001463 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464 }
1465
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466 /*
1467 * We have finally decided to accept it.
1468 */
1469
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001470 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1472 NEIGH_UPDATE_F_OVERRIDE|
1473 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1474 NEIGH_UPDATE_F_ISROUTER))
1475 );
1476
1477 /*
1478 * Redirect received -> path was valid.
1479 * Look, redirects are sent only in response to data packets,
1480 * so that this nexthop apparently is reachable. --ANK
1481 */
1482 dst_confirm(&rt->u.dst);
1483
1484 /* Duplicate redirect: silently ignore. */
1485 if (neigh == rt->u.dst.neighbour)
1486 goto out;
1487
1488 nrt = ip6_rt_copy(rt);
1489 if (nrt == NULL)
1490 goto out;
1491
1492 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1493 if (on_link)
1494 nrt->rt6i_flags &= ~RTF_GATEWAY;
1495
1496 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1497 nrt->rt6i_dst.plen = 128;
1498 nrt->u.dst.flags |= DST_HOST;
1499
1500 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1501 nrt->rt6i_nexthop = neigh_clone(neigh);
1502 /* Reset pmtu, it may be better */
1503 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001504 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
Daniel Lezcano55786892008-03-04 13:47:47 -08001505 dst_mtu(&nrt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506
Thomas Graf40e22e82006-08-22 00:00:45 -07001507 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 goto out;
1509
Tom Tucker8d717402006-07-30 20:43:36 -07001510 netevent.old = &rt->u.dst;
1511 netevent.new = &nrt->u.dst;
1512 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1513
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001515 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 return;
1517 }
1518
1519out:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001520 dst_release(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 return;
1522}
1523
1524/*
1525 * Handle ICMP "packet too big" messages
1526 * i.e. Path MTU discovery
1527 */
1528
1529void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1530 struct net_device *dev, u32 pmtu)
1531{
1532 struct rt6_info *rt, *nrt;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001533 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 int allfrag = 0;
1535
Daniel Lezcano55786892008-03-04 13:47:47 -08001536 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 if (rt == NULL)
1538 return;
1539
1540 if (pmtu >= dst_mtu(&rt->u.dst))
1541 goto out;
1542
1543 if (pmtu < IPV6_MIN_MTU) {
1544 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001545 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 * MTU (1280) and a fragment header should always be included
1547 * after a node receiving Too Big message reporting PMTU is
1548 * less than the IPv6 Minimum Link MTU.
1549 */
1550 pmtu = IPV6_MIN_MTU;
1551 allfrag = 1;
1552 }
1553
1554 /* New mtu received -> path was valid.
1555 They are sent only in response to data packets,
1556 so that this nexthop apparently is reachable. --ANK
1557 */
1558 dst_confirm(&rt->u.dst);
1559
1560 /* Host route. If it is static, it would be better
1561 not to override it, but add new one, so that
1562 when cache entry will expire old pmtu
1563 would return automatically.
1564 */
1565 if (rt->rt6i_flags & RTF_CACHE) {
1566 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1567 if (allfrag)
1568 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
Daniel Lezcano55786892008-03-04 13:47:47 -08001569 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1571 goto out;
1572 }
1573
1574 /* Network route.
1575 Two cases are possible:
1576 1. It is connected route. Action: COW
1577 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1578 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001579 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001580 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001581 else
1582 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001583
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001584 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001585 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1586 if (allfrag)
1587 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1588
1589 /* According to RFC 1981, detecting PMTU increase shouldn't be
1590 * happened within 5 mins, the recommended timer is 10 mins.
1591 * Here this route expiration time is set to ip6_rt_mtu_expires
1592 * which is 10 mins. After 10 mins the decreased pmtu is expired
1593 * and detecting PMTU increase will be automatically happened.
1594 */
Daniel Lezcano55786892008-03-04 13:47:47 -08001595 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001596 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1597
Thomas Graf40e22e82006-08-22 00:00:45 -07001598 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600out:
1601 dst_release(&rt->u.dst);
1602}
1603
1604/*
1605 * Misc support functions
1606 */
1607
1608static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1609{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001610 struct net *net = dev_net(ort->rt6i_dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001611 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
1613 if (rt) {
1614 rt->u.dst.input = ort->u.dst.input;
1615 rt->u.dst.output = ort->u.dst.output;
1616
1617 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
Ville Nuorvala22e1e4d2006-10-16 22:14:26 -07001618 rt->u.dst.error = ort->u.dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 rt->u.dst.dev = ort->u.dst.dev;
1620 if (rt->u.dst.dev)
1621 dev_hold(rt->u.dst.dev);
1622 rt->rt6i_idev = ort->rt6i_idev;
1623 if (rt->rt6i_idev)
1624 in6_dev_hold(rt->rt6i_idev);
1625 rt->u.dst.lastuse = jiffies;
1626 rt->rt6i_expires = 0;
1627
1628 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1629 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1630 rt->rt6i_metric = 0;
1631
1632 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1633#ifdef CONFIG_IPV6_SUBTREES
1634 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1635#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001636 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 }
1638 return rt;
1639}
1640
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001641#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001642static struct rt6_info *rt6_get_route_info(struct net *net,
1643 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001644 struct in6_addr *gwaddr, int ifindex)
1645{
1646 struct fib6_node *fn;
1647 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001648 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001649
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001650 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001651 if (table == NULL)
1652 return NULL;
1653
1654 write_lock_bh(&table->tb6_lock);
1655 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001656 if (!fn)
1657 goto out;
1658
Eric Dumazet7cc48262007-02-09 16:22:57 -08001659 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001660 if (rt->rt6i_dev->ifindex != ifindex)
1661 continue;
1662 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1663 continue;
1664 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1665 continue;
1666 dst_hold(&rt->u.dst);
1667 break;
1668 }
1669out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001670 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001671 return rt;
1672}
1673
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001674static struct rt6_info *rt6_add_route_info(struct net *net,
1675 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001676 struct in6_addr *gwaddr, int ifindex,
1677 unsigned pref)
1678{
Thomas Graf86872cb2006-08-22 00:01:08 -07001679 struct fib6_config cfg = {
1680 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001681 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001682 .fc_ifindex = ifindex,
1683 .fc_dst_len = prefixlen,
1684 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1685 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001686 .fc_nlinfo.pid = 0,
1687 .fc_nlinfo.nlh = NULL,
1688 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001689 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001690
Thomas Graf86872cb2006-08-22 00:01:08 -07001691 ipv6_addr_copy(&cfg.fc_dst, prefix);
1692 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1693
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001694 /* We should treat it as a default route if prefix length is 0. */
1695 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001696 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001697
Thomas Graf86872cb2006-08-22 00:01:08 -07001698 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001699
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001700 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001701}
1702#endif
1703
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001705{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001707 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001709 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001710 if (table == NULL)
1711 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712
Thomas Grafc71099a2006-08-04 23:20:06 -07001713 write_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001714 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001716 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1718 break;
1719 }
1720 if (rt)
1721 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001722 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 return rt;
1724}
1725
1726struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001727 struct net_device *dev,
1728 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729{
Thomas Graf86872cb2006-08-22 00:01:08 -07001730 struct fib6_config cfg = {
1731 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001732 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001733 .fc_ifindex = dev->ifindex,
1734 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1735 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001736 .fc_nlinfo.pid = 0,
1737 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001738 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001739 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740
Thomas Graf86872cb2006-08-22 00:01:08 -07001741 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742
Thomas Graf86872cb2006-08-22 00:01:08 -07001743 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 return rt6_get_dflt_router(gwaddr, dev);
1746}
1747
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001748void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749{
1750 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001751 struct fib6_table *table;
1752
1753 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001754 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001755 if (table == NULL)
1756 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757
1758restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001759 read_lock_bh(&table->tb6_lock);
Eric Dumazet7cc48262007-02-09 16:22:57 -08001760 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1762 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001763 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001764 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 goto restart;
1766 }
1767 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001768 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769}
1770
Daniel Lezcano55786892008-03-04 13:47:47 -08001771static void rtmsg_to_fib6_config(struct net *net,
1772 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001773 struct fib6_config *cfg)
1774{
1775 memset(cfg, 0, sizeof(*cfg));
1776
1777 cfg->fc_table = RT6_TABLE_MAIN;
1778 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1779 cfg->fc_metric = rtmsg->rtmsg_metric;
1780 cfg->fc_expires = rtmsg->rtmsg_info;
1781 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1782 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1783 cfg->fc_flags = rtmsg->rtmsg_flags;
1784
Daniel Lezcano55786892008-03-04 13:47:47 -08001785 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001786
Thomas Graf86872cb2006-08-22 00:01:08 -07001787 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1788 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1789 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1790}
1791
Daniel Lezcano55786892008-03-04 13:47:47 -08001792int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793{
Thomas Graf86872cb2006-08-22 00:01:08 -07001794 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 struct in6_rtmsg rtmsg;
1796 int err;
1797
1798 switch(cmd) {
1799 case SIOCADDRT: /* Add a route */
1800 case SIOCDELRT: /* Delete a route */
1801 if (!capable(CAP_NET_ADMIN))
1802 return -EPERM;
1803 err = copy_from_user(&rtmsg, arg,
1804 sizeof(struct in6_rtmsg));
1805 if (err)
1806 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001807
Daniel Lezcano55786892008-03-04 13:47:47 -08001808 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001809
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 rtnl_lock();
1811 switch (cmd) {
1812 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001813 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 break;
1815 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001816 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 break;
1818 default:
1819 err = -EINVAL;
1820 }
1821 rtnl_unlock();
1822
1823 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001824 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825
1826 return -EINVAL;
1827}
1828
1829/*
1830 * Drop the packet on the floor
1831 */
1832
Ilpo Järvinen50eb4312008-01-12 03:21:00 -08001833static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001835 int type;
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001836 struct dst_entry *dst = skb->dst;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001837 switch (ipstats_mib_noroutes) {
1838 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001839 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001840 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001841 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1842 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001843 break;
1844 }
1845 /* FALLTHROUGH */
1846 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001847 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1848 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001849 break;
1850 }
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001851 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 kfree_skb(skb);
1853 return 0;
1854}
1855
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001856static int ip6_pkt_discard(struct sk_buff *skb)
1857{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001858 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001859}
1860
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001861static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862{
1863 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001864 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865}
1866
David S. Miller6723ab52006-10-18 21:20:57 -07001867#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1868
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001869static int ip6_pkt_prohibit(struct sk_buff *skb)
1870{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001871 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001872}
1873
1874static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1875{
1876 skb->dev = skb->dst->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001877 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001878}
1879
David S. Miller6723ab52006-10-18 21:20:57 -07001880#endif
1881
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882/*
1883 * Allocate a dst for local (unicast / anycast) address.
1884 */
1885
1886struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1887 const struct in6_addr *addr,
1888 int anycast)
1889{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001890 struct net *net = dev_net(idev->dev);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08001891 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892
1893 if (rt == NULL)
1894 return ERR_PTR(-ENOMEM);
1895
Daniel Lezcano55786892008-03-04 13:47:47 -08001896 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 in6_dev_hold(idev);
1898
1899 rt->u.dst.flags = DST_HOST;
1900 rt->u.dst.input = ip6_input;
1901 rt->u.dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001902 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 rt->rt6i_idev = idev;
1904 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
Daniel Lezcano55786892008-03-04 13:47:47 -08001905 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1907 rt->u.dst.obsolete = -1;
1908
1909 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001910 if (anycast)
1911 rt->rt6i_flags |= RTF_ANYCAST;
1912 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 rt->rt6i_flags |= RTF_LOCAL;
1914 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1915 if (rt->rt6i_nexthop == NULL) {
YOSHIFUJI Hideaki40aa7b92006-10-19 13:50:09 +09001916 dst_free(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 return ERR_PTR(-ENOMEM);
1918 }
1919
1920 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1921 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001922 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923
1924 atomic_set(&rt->u.dst.__refcnt, 1);
1925
1926 return rt;
1927}
1928
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001929struct arg_dev_net {
1930 struct net_device *dev;
1931 struct net *net;
1932};
1933
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934static int fib6_ifdown(struct rt6_info *rt, void *arg)
1935{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001936 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1937 struct net *net = ((struct arg_dev_net *)arg)->net;
1938
1939 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1940 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 RT6_TRACE("deleted by ifdown %p\n", rt);
1942 return -1;
1943 }
1944 return 0;
1945}
1946
Daniel Lezcanof3db4852008-03-03 23:27:06 -08001947void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001949 struct arg_dev_net adn = {
1950 .dev = dev,
1951 .net = net,
1952 };
1953
1954 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07001955 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956}
1957
1958struct rt6_mtu_change_arg
1959{
1960 struct net_device *dev;
1961 unsigned mtu;
1962};
1963
1964static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1965{
1966 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1967 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001968 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969
1970 /* In IPv6 pmtu discovery is not optional,
1971 so that RTAX_MTU lock cannot disable it.
1972 We still use this lock to block changes
1973 caused by addrconf/ndisc.
1974 */
1975
1976 idev = __in6_dev_get(arg->dev);
1977 if (idev == NULL)
1978 return 0;
1979
1980 /* For administrative MTU increase, there is no way to discover
1981 IPv6 PMTU increase, so PMTU increase should be updated here.
1982 Since RFC 1981 doesn't include administrative MTU increase
1983 update PMTU increase is a MUST. (i.e. jumbo frame)
1984 */
1985 /*
1986 If new MTU is less than route PMTU, this new MTU will be the
1987 lowest MTU in the path, update the route PMTU to reflect PMTU
1988 decreases; if new MTU is greater than route PMTU, and the
1989 old MTU is the lowest MTU in the path, update the route PMTU
1990 to reflect the increase. In this case if the other nodes' MTU
1991 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1992 PMTU discouvery.
1993 */
1994 if (rt->rt6i_dev == arg->dev &&
1995 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
Jim Paris23717792008-01-31 16:36:25 -08001996 (dst_mtu(&rt->u.dst) >= arg->mtu ||
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001997 (dst_mtu(&rt->u.dst) < arg->mtu &&
Simon Arlott566cfd82007-07-26 00:09:55 -07001998 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
Daniel Lezcano55786892008-03-04 13:47:47 -08002000 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002001 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002 return 0;
2003}
2004
2005void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2006{
Thomas Grafc71099a2006-08-04 23:20:06 -07002007 struct rt6_mtu_change_arg arg = {
2008 .dev = dev,
2009 .mtu = mtu,
2010 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002012 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013}
2014
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002015static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002016 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002017 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002018 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002019 [RTA_PRIORITY] = { .type = NLA_U32 },
2020 [RTA_METRICS] = { .type = NLA_NESTED },
2021};
2022
2023static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2024 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025{
Thomas Graf86872cb2006-08-22 00:01:08 -07002026 struct rtmsg *rtm;
2027 struct nlattr *tb[RTA_MAX+1];
2028 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029
Thomas Graf86872cb2006-08-22 00:01:08 -07002030 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2031 if (err < 0)
2032 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033
Thomas Graf86872cb2006-08-22 00:01:08 -07002034 err = -EINVAL;
2035 rtm = nlmsg_data(nlh);
2036 memset(cfg, 0, sizeof(*cfg));
2037
2038 cfg->fc_table = rtm->rtm_table;
2039 cfg->fc_dst_len = rtm->rtm_dst_len;
2040 cfg->fc_src_len = rtm->rtm_src_len;
2041 cfg->fc_flags = RTF_UP;
2042 cfg->fc_protocol = rtm->rtm_protocol;
2043
2044 if (rtm->rtm_type == RTN_UNREACHABLE)
2045 cfg->fc_flags |= RTF_REJECT;
2046
2047 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2048 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002049 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002050
2051 if (tb[RTA_GATEWAY]) {
2052 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2053 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002055
2056 if (tb[RTA_DST]) {
2057 int plen = (rtm->rtm_dst_len + 7) >> 3;
2058
2059 if (nla_len(tb[RTA_DST]) < plen)
2060 goto errout;
2061
2062 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002064
2065 if (tb[RTA_SRC]) {
2066 int plen = (rtm->rtm_src_len + 7) >> 3;
2067
2068 if (nla_len(tb[RTA_SRC]) < plen)
2069 goto errout;
2070
2071 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002073
2074 if (tb[RTA_OIF])
2075 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2076
2077 if (tb[RTA_PRIORITY])
2078 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2079
2080 if (tb[RTA_METRICS]) {
2081 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2082 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002084
2085 if (tb[RTA_TABLE])
2086 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2087
2088 err = 0;
2089errout:
2090 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091}
2092
Thomas Grafc127ea22007-03-22 11:58:32 -07002093static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094{
Thomas Graf86872cb2006-08-22 00:01:08 -07002095 struct fib6_config cfg;
2096 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097
Thomas Graf86872cb2006-08-22 00:01:08 -07002098 err = rtm_to_fib6_config(skb, nlh, &cfg);
2099 if (err < 0)
2100 return err;
2101
2102 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103}
2104
Thomas Grafc127ea22007-03-22 11:58:32 -07002105static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106{
Thomas Graf86872cb2006-08-22 00:01:08 -07002107 struct fib6_config cfg;
2108 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002109
Thomas Graf86872cb2006-08-22 00:01:08 -07002110 err = rtm_to_fib6_config(skb, nlh, &cfg);
2111 if (err < 0)
2112 return err;
2113
2114 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115}
2116
Thomas Graf339bf982006-11-10 14:10:15 -08002117static inline size_t rt6_nlmsg_size(void)
2118{
2119 return NLMSG_ALIGN(sizeof(struct rtmsg))
2120 + nla_total_size(16) /* RTA_SRC */
2121 + nla_total_size(16) /* RTA_DST */
2122 + nla_total_size(16) /* RTA_GATEWAY */
2123 + nla_total_size(16) /* RTA_PREFSRC */
2124 + nla_total_size(4) /* RTA_TABLE */
2125 + nla_total_size(4) /* RTA_IIF */
2126 + nla_total_size(4) /* RTA_OIF */
2127 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002128 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002129 + nla_total_size(sizeof(struct rta_cacheinfo));
2130}
2131
Brian Haley191cd582008-08-14 15:33:21 -07002132static int rt6_fill_node(struct net *net,
2133 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002134 struct in6_addr *dst, struct in6_addr *src,
2135 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002136 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002137{
2138 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002139 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002140 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002141 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142
2143 if (prefix) { /* user wants prefix routes only */
2144 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2145 /* success since this is not a prefix route */
2146 return 1;
2147 }
2148 }
2149
Thomas Graf2d7202b2006-08-22 00:01:27 -07002150 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2151 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002152 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002153
2154 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 rtm->rtm_family = AF_INET6;
2156 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2157 rtm->rtm_src_len = rt->rt6i_src.plen;
2158 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002159 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002160 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002161 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002162 table = RT6_TABLE_UNSPEC;
2163 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002164 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165 if (rt->rt6i_flags&RTF_REJECT)
2166 rtm->rtm_type = RTN_UNREACHABLE;
2167 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2168 rtm->rtm_type = RTN_LOCAL;
2169 else
2170 rtm->rtm_type = RTN_UNICAST;
2171 rtm->rtm_flags = 0;
2172 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2173 rtm->rtm_protocol = rt->rt6i_protocol;
2174 if (rt->rt6i_flags&RTF_DYNAMIC)
2175 rtm->rtm_protocol = RTPROT_REDIRECT;
2176 else if (rt->rt6i_flags & RTF_ADDRCONF)
2177 rtm->rtm_protocol = RTPROT_KERNEL;
2178 else if (rt->rt6i_flags&RTF_DEFAULT)
2179 rtm->rtm_protocol = RTPROT_RA;
2180
2181 if (rt->rt6i_flags&RTF_CACHE)
2182 rtm->rtm_flags |= RTM_F_CLONED;
2183
2184 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002185 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002186 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002188 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189#ifdef CONFIG_IPV6_SUBTREES
2190 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002191 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002192 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002194 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002196 if (iif) {
2197#ifdef CONFIG_IPV6_MROUTE
2198 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2199 int err = ip6mr_get_route(skb, rtm, nowait);
2200 if (err <= 0) {
2201 if (!nowait) {
2202 if (err == 0)
2203 return 0;
2204 goto nla_put_failure;
2205 } else {
2206 if (err == -EMSGSIZE)
2207 goto nla_put_failure;
2208 }
2209 }
2210 } else
2211#endif
2212 NLA_PUT_U32(skb, RTA_IIF, iif);
2213 } else if (dst) {
Brian Haley5e0115e2008-08-13 01:58:57 -07002214 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002216 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002217 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002218 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002220
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002222 goto nla_put_failure;
2223
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 if (rt->u.dst.neighbour)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002225 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2226
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227 if (rt->u.dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002228 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2229
2230 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002231
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002232 if (!(rt->rt6i_flags & RTF_EXPIRES))
2233 expires = 0;
2234 else if (rt->rt6i_expires - jiffies < INT_MAX)
2235 expires = rt->rt6i_expires - jiffies;
2236 else
2237 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002238
Thomas Grafe3703b32006-11-27 09:27:07 -08002239 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2240 expires, rt->u.dst.error) < 0)
2241 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242
Thomas Graf2d7202b2006-08-22 00:01:27 -07002243 return nlmsg_end(skb, nlh);
2244
2245nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002246 nlmsg_cancel(skb, nlh);
2247 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248}
2249
Patrick McHardy1b43af52006-08-10 23:11:17 -07002250int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251{
2252 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2253 int prefix;
2254
Thomas Graf2d7202b2006-08-22 00:01:27 -07002255 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2256 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2258 } else
2259 prefix = 0;
2260
Brian Haley191cd582008-08-14 15:33:21 -07002261 return rt6_fill_node(arg->net,
2262 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002264 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002265}
2266
Thomas Grafc127ea22007-03-22 11:58:32 -07002267static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002269 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002270 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002272 struct sk_buff *skb;
2273 struct rtmsg *rtm;
2274 struct flowi fl;
2275 int err, iif = 0;
2276
2277 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2278 if (err < 0)
2279 goto errout;
2280
2281 err = -EINVAL;
2282 memset(&fl, 0, sizeof(fl));
2283
2284 if (tb[RTA_SRC]) {
2285 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2286 goto errout;
2287
2288 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2289 }
2290
2291 if (tb[RTA_DST]) {
2292 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2293 goto errout;
2294
2295 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2296 }
2297
2298 if (tb[RTA_IIF])
2299 iif = nla_get_u32(tb[RTA_IIF]);
2300
2301 if (tb[RTA_OIF])
2302 fl.oif = nla_get_u32(tb[RTA_OIF]);
2303
2304 if (iif) {
2305 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002306 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002307 if (!dev) {
2308 err = -ENODEV;
2309 goto errout;
2310 }
2311 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312
2313 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002314 if (skb == NULL) {
2315 err = -ENOBUFS;
2316 goto errout;
2317 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318
2319 /* Reserve room for dummy headers, this skb can pass
2320 through good chunk of routing engine.
2321 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002322 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2324
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002325 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 skb->dst = &rt->u.dst;
2327
Brian Haley191cd582008-08-14 15:33:21 -07002328 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002330 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002332 kfree_skb(skb);
2333 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 }
2335
Daniel Lezcano55786892008-03-04 13:47:47 -08002336 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002337errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339}
2340
Thomas Graf86872cb2006-08-22 00:01:08 -07002341void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342{
2343 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002344 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002345 u32 seq;
2346 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002348 err = -ENOBUFS;
2349 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002350
Thomas Graf339bf982006-11-10 14:10:15 -08002351 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002352 if (skb == NULL)
2353 goto errout;
2354
Brian Haley191cd582008-08-14 15:33:21 -07002355 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002356 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002357 if (err < 0) {
2358 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2359 WARN_ON(err == -EMSGSIZE);
2360 kfree_skb(skb);
2361 goto errout;
2362 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002363 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2364 info->nlh, gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002365errout:
2366 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002367 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002368}
2369
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002370static int ip6_route_dev_notify(struct notifier_block *this,
2371 unsigned long event, void *data)
2372{
2373 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002374 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002375
2376 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2377 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2378 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2379#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2380 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2381 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2382 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2383 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2384#endif
2385 }
2386
2387 return NOTIFY_OK;
2388}
2389
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390/*
2391 * /proc
2392 */
2393
2394#ifdef CONFIG_PROC_FS
2395
2396#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2397
2398struct rt6_proc_arg
2399{
2400 char *buffer;
2401 int offset;
2402 int length;
2403 int skip;
2404 int len;
2405};
2406
2407static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2408{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002409 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002411 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412
2413#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002414 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002416 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417#endif
2418
2419 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002420 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002422 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002424 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2425 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2426 rt->u.dst.__use, rt->rt6i_flags,
2427 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428 return 0;
2429}
2430
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002431static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002433 struct net *net = (struct net *)m->private;
2434 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002435 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002436}
2437
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002438static int ipv6_route_open(struct inode *inode, struct file *file)
2439{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002440 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002441}
2442
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002443static const struct file_operations ipv6_route_proc_fops = {
2444 .owner = THIS_MODULE,
2445 .open = ipv6_route_open,
2446 .read = seq_read,
2447 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002448 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002449};
2450
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2452{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002453 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002455 net->ipv6.rt6_stats->fib_nodes,
2456 net->ipv6.rt6_stats->fib_route_nodes,
2457 net->ipv6.rt6_stats->fib_rt_alloc,
2458 net->ipv6.rt6_stats->fib_rt_entries,
2459 net->ipv6.rt6_stats->fib_rt_cache,
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002460 atomic_read(&net->ipv6.ip6_dst_ops->entries),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002461 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462
2463 return 0;
2464}
2465
2466static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2467{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002468 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002469}
2470
Arjan van de Ven9a321442007-02-12 00:55:35 -08002471static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472 .owner = THIS_MODULE,
2473 .open = rt6_stats_seq_open,
2474 .read = seq_read,
2475 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002476 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477};
2478#endif /* CONFIG_PROC_FS */
2479
2480#ifdef CONFIG_SYSCTL
2481
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482static
2483int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2484 void __user *buffer, size_t *lenp, loff_t *ppos)
2485{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002486 struct net *net = current->nsproxy->net_ns;
2487 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 if (write) {
2489 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002490 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 return 0;
2492 } else
2493 return -EINVAL;
2494}
2495
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002496ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002497 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002499 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002501 .mode = 0200,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002502 .proc_handler = &ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 },
2504 {
2505 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2506 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002507 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508 .maxlen = sizeof(int),
2509 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002510 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 },
2512 {
2513 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2514 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002515 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 .maxlen = sizeof(int),
2517 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002518 .proc_handler = &proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 },
2520 {
2521 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2522 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002523 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 .maxlen = sizeof(int),
2525 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002526 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527 .strategy = &sysctl_jiffies,
2528 },
2529 {
2530 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2531 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002532 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 .maxlen = sizeof(int),
2534 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002535 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536 .strategy = &sysctl_jiffies,
2537 },
2538 {
2539 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2540 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002541 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 .maxlen = sizeof(int),
2543 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002544 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 .strategy = &sysctl_jiffies,
2546 },
2547 {
2548 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2549 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002550 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 .maxlen = sizeof(int),
2552 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002553 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 .strategy = &sysctl_jiffies,
2555 },
2556 {
2557 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2558 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002559 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 .maxlen = sizeof(int),
2561 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002562 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 .strategy = &sysctl_jiffies,
2564 },
2565 {
2566 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2567 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002568 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002569 .maxlen = sizeof(int),
2570 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002571 .proc_handler = &proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 .strategy = &sysctl_jiffies,
2573 },
2574 {
2575 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2576 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002577 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 .maxlen = sizeof(int),
2579 .mode = 0644,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002580 .proc_handler = &proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 .strategy = &sysctl_ms_jiffies,
2582 },
2583 { .ctl_name = 0 }
2584};
2585
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002586struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2587{
2588 struct ctl_table *table;
2589
2590 table = kmemdup(ipv6_route_table_template,
2591 sizeof(ipv6_route_table_template),
2592 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002593
2594 if (table) {
2595 table[0].data = &net->ipv6.sysctl.flush_delay;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002596 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002597 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2598 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2599 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2600 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2601 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2602 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2603 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2604 }
2605
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002606 return table;
2607}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608#endif
2609
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002610static int ip6_route_net_init(struct net *net)
2611{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002612 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002613
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002614 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2615 sizeof(*net->ipv6.ip6_dst_ops),
2616 GFP_KERNEL);
2617 if (!net->ipv6.ip6_dst_ops)
2618 goto out;
Denis V. Lunev48115be2008-04-16 02:01:34 -07002619 net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002620
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002621 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2622 sizeof(*net->ipv6.ip6_null_entry),
2623 GFP_KERNEL);
2624 if (!net->ipv6.ip6_null_entry)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002625 goto out_ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002626 net->ipv6.ip6_null_entry->u.dst.path =
2627 (struct dst_entry *)net->ipv6.ip6_null_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002628 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002629
2630#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2631 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2632 sizeof(*net->ipv6.ip6_prohibit_entry),
2633 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002634 if (!net->ipv6.ip6_prohibit_entry)
2635 goto out_ip6_null_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002636 net->ipv6.ip6_prohibit_entry->u.dst.path =
2637 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002638 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002639
2640 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2641 sizeof(*net->ipv6.ip6_blk_hole_entry),
2642 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002643 if (!net->ipv6.ip6_blk_hole_entry)
2644 goto out_ip6_prohibit_entry;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002645 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2646 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002647 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002648#endif
2649
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002650 net->ipv6.sysctl.flush_delay = 0;
2651 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2652 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2653 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2654 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2655 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2656 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2657 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2658
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002659#ifdef CONFIG_PROC_FS
2660 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2661 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2662#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002663 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2664
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002665 ret = 0;
2666out:
2667 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002668
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002669#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2670out_ip6_prohibit_entry:
2671 kfree(net->ipv6.ip6_prohibit_entry);
2672out_ip6_null_entry:
2673 kfree(net->ipv6.ip6_null_entry);
2674#endif
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002675out_ip6_dst_ops:
Denis V. Lunev48115be2008-04-16 02:01:34 -07002676 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002677 kfree(net->ipv6.ip6_dst_ops);
2678 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002679}
2680
2681static void ip6_route_net_exit(struct net *net)
2682{
2683#ifdef CONFIG_PROC_FS
2684 proc_net_remove(net, "ipv6_route");
2685 proc_net_remove(net, "rt6_stats");
2686#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002687 kfree(net->ipv6.ip6_null_entry);
2688#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2689 kfree(net->ipv6.ip6_prohibit_entry);
2690 kfree(net->ipv6.ip6_blk_hole_entry);
2691#endif
Denis V. Lunev48115be2008-04-16 02:01:34 -07002692 release_net(net->ipv6.ip6_dst_ops->dst_net);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002693 kfree(net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002694}
2695
2696static struct pernet_operations ip6_route_net_ops = {
2697 .init = ip6_route_net_init,
2698 .exit = ip6_route_net_exit,
2699};
2700
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002701static struct notifier_block ip6_route_dev_notifier = {
2702 .notifier_call = ip6_route_dev_notify,
2703 .priority = 0,
2704};
2705
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002706int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002707{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002708 int ret;
2709
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002710 ret = -ENOMEM;
2711 ip6_dst_ops_template.kmem_cachep =
2712 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2713 SLAB_HWCACHE_ALIGN, NULL);
2714 if (!ip6_dst_ops_template.kmem_cachep)
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002715 goto out;;
David S. Miller14e50e52007-05-24 18:17:54 -07002716
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002717 ret = register_pernet_subsys(&ip6_route_net_ops);
2718 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002719 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002720
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002721 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2722
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002723 /* Registering of the loopback is done before this portion of code,
2724 * the loopback reference in rt6_info will not be taken, do it
2725 * manually for init_net */
2726 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2727 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2728 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2729 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2730 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2731 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2732 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2733 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002734 ret = fib6_init();
2735 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002736 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002737
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002738 ret = xfrm6_init();
2739 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002740 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002741
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002742 ret = fib6_rules_init();
2743 if (ret)
2744 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002745
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002746 ret = -ENOBUFS;
2747 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2748 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2749 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2750 goto fib6_rules_init;
2751
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002752 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002753 if (ret)
2754 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002755
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002756out:
2757 return ret;
2758
2759fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002760 fib6_rules_cleanup();
2761xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002762 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002763out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002764 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002765out_register_subsys:
2766 unregister_pernet_subsys(&ip6_route_net_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002767out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002768 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002769 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770}
2771
2772void ip6_route_cleanup(void)
2773{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002774 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002775 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002777 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002778 unregister_pernet_subsys(&ip6_route_net_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002779 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780}