blob: c346ccf66ae108cf5d36ef39f7f25c922ebc3388 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090037#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080042#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090043#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020044#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
Linus Torvalds1da177e2005-04-16 15:20:36 -070077static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080083static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080090#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080091static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080093 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080095static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080097 struct in6_addr *gwaddr, int ifindex);
98#endif
99
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800100static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800102 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700111 .local_out = __ip6_local_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
David S. Miller14e50e52007-05-24 18:17:54 -0700114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800120 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Miller14e50e52007-05-24 18:17:54 -0700124};
125
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800126static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
133 .input = ip6_pkt_discard,
134 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 },
136 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700137 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
Thomas Graf101367c2006-08-04 03:39:02 -0700142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
David S. Miller6723ab52006-10-18 21:20:57 -0700144static int ip6_pkt_prohibit(struct sk_buff *skb);
145static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700146
Adrian Bunk280a34c2008-04-21 02:29:32 -0700147static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700148 .dst = {
149 .__refcnt = ATOMIC_INIT(1),
150 .__use = 1,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700158 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800163static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
170 .input = dst_discard,
171 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700172 },
173 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700174 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700175 .rt6i_metric = ~(u32) 0,
176 .rt6i_ref = ATOMIC_INIT(1),
177};
178
179#endif
180
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181/* allocate dst with ip6_dst_ops */
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800182static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183{
Benjamin Theryf2fc6a52008-03-04 13:49:23 -0800184 return (struct rt6_info *)dst_alloc(ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185}
186
187static void ip6_dst_destroy(struct dst_entry *dst)
188{
189 struct rt6_info *rt = (struct rt6_info *)dst;
190 struct inet6_dev *idev = rt->rt6i_idev;
191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900195 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196}
197
198static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
199 int how)
200{
201 struct rt6_info *rt = (struct rt6_info *)dst;
202 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800203 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900204 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800206 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
207 struct inet6_dev *loopback_idev =
208 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 if (loopback_idev != NULL) {
210 rt->rt6i_idev = loopback_idev;
211 in6_dev_put(idev);
212 }
213 }
214}
215
216static __inline__ int rt6_check_expired(const struct rt6_info *rt)
217{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000218 return (rt->rt6i_flags & RTF_EXPIRES) &&
219 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220}
221
Thomas Grafc71099a2006-08-04 23:20:06 -0700222static inline int rt6_need_strict(struct in6_addr *daddr)
223{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000224 return ipv6_addr_type(daddr) &
225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700226}
227
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700229 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 */
231
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800232static inline struct rt6_info *rt6_device_match(struct net *net,
233 struct rt6_info *rt,
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900234 struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700236 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900241 if (!oif && ipv6_addr_any(saddr))
242 goto out;
243
Changli Gaod8d1f302010-06-10 23:31:35 -0700244 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900245 struct net_device *dev = sprt->rt6i_dev;
246
247 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 if (dev->ifindex == oif)
249 return sprt;
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700253 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900255 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256 local->rt6i_idev->dev->ifindex == oif))
257 continue;
258 }
259 local = sprt;
260 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900261 } else {
262 if (ipv6_chk_addr(net, saddr, dev,
263 flags & RT6_LOOKUP_F_IFACE))
264 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900266 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900268 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 if (local)
270 return local;
271
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700272 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800273 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900275out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 return rt;
277}
278
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800279#ifdef CONFIG_IPV6_ROUTER_PREF
280static void rt6_probe(struct rt6_info *rt)
281{
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283 /*
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
287 *
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
290 */
291 if (!neigh || (neigh->nud_state & NUD_VALID))
292 return;
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800296 struct in6_addr mcaddr;
297 struct in6_addr *target;
298
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
301
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 } else
306 read_unlock_bh(&neigh->lock);
307}
308#else
309static inline void rt6_probe(struct rt6_info *rt)
310{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800311}
312#endif
313
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800315 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700317static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800319 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700320 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800321 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700322 if ((dev->flags & IFF_LOOPBACK) &&
323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
324 return 1;
325 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326}
327
Dave Jonesb6f99a22007-03-22 12:27:49 -0700328static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800330 struct neighbour *neigh = rt->rt6i_nexthop;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800331 int m;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700332 if (rt->rt6i_flags & RTF_NONEXTHOP ||
333 !(rt->rt6i_flags & RTF_GATEWAY))
334 m = 1;
335 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800336 read_lock_bh(&neigh->lock);
337 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700338 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800339#ifdef CONFIG_IPV6_ROUTER_PREF
340 else if (neigh->nud_state & NUD_FAILED)
341 m = 0;
342#endif
343 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800344 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800345 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800346 } else
347 m = 0;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800348 return m;
349}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800351static int rt6_score_route(struct rt6_info *rt, int oif,
352 int strict)
353{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700354 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900355
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700356 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800358 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800359#ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700362 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800364 return -1;
365 return m;
366}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367
David S. Millerf11e6652007-03-24 20:36:25 -0700368static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800370{
David S. Millerf11e6652007-03-24 20:36:25 -0700371 int m;
372
373 if (rt6_check_expired(rt))
374 goto out;
375
376 m = rt6_score_route(rt, oif, strict);
377 if (m < 0)
378 goto out;
379
380 if (m > *mpri) {
381 if (strict & RT6_LOOKUP_F_REACHABLE)
382 rt6_probe(match);
383 *mpri = m;
384 match = rt;
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 rt6_probe(rt);
387 }
388
389out:
390 return match;
391}
392
393static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
396{
397 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800398 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
David S. Millerf11e6652007-03-24 20:36:25 -0700400 match = NULL;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700402 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700405 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700406 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800407
David S. Millerf11e6652007-03-24 20:36:25 -0700408 return match;
409}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800410
David S. Millerf11e6652007-03-24 20:36:25 -0700411static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412{
413 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800414 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415
David S. Millerf11e6652007-03-24 20:36:25 -0700416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800417 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
David S. Millerf11e6652007-03-24 20:36:25 -0700419 rt0 = fn->rr_ptr;
420 if (!rt0)
421 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422
David S. Millerf11e6652007-03-24 20:36:25 -0700423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800425 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700426 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700427 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700428
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800429 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 next = fn->leaf;
432
433 if (next != rt0)
434 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 }
436
David S. Millerf11e6652007-03-24 20:36:25 -0700437 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800438 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900440 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000441 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442}
443
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800444#ifdef CONFIG_IPV6_ROUTE_INFO
445int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct in6_addr *gwaddr)
447{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900448 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800449 struct route_info *rinfo = (struct route_info *) opt;
450 struct in6_addr prefix_buf, *prefix;
451 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900452 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800453 struct rt6_info *rt;
454
455 if (len < sizeof(struct route_info)) {
456 return -EINVAL;
457 }
458
459 /* Sanity check for prefix_len and length */
460 if (rinfo->length > 3) {
461 return -EINVAL;
462 } else if (rinfo->prefix_len > 128) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 64) {
465 if (rinfo->length < 2) {
466 return -EINVAL;
467 }
468 } else if (rinfo->prefix_len > 0) {
469 if (rinfo->length < 1) {
470 return -EINVAL;
471 }
472 }
473
474 pref = rinfo->route_pref;
475 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000476 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800477
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900478 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800479
480 if (rinfo->length == 3)
481 prefix = (struct in6_addr *)rinfo->prefix;
482 else {
483 /* this function is safe */
484 ipv6_addr_prefix(&prefix_buf,
485 (struct in6_addr *)rinfo->prefix,
486 rinfo->prefix_len);
487 prefix = &prefix_buf;
488 }
489
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800490 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
491 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800492
493 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700494 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800495 rt = NULL;
496 }
497
498 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800499 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800500 pref);
501 else if (rt)
502 rt->rt6i_flags = RTF_ROUTEINFO |
503 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
504
505 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900506 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800507 rt->rt6i_flags &= ~RTF_EXPIRES;
508 } else {
509 rt->rt6i_expires = jiffies + HZ * lifetime;
510 rt->rt6i_flags |= RTF_EXPIRES;
511 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700512 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800513 }
514 return 0;
515}
516#endif
517
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800518#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700519do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800520 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700521 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700522 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700523 if (fn->fn_flags & RTN_TL_ROOT) \
524 goto out; \
525 pn = fn->parent; \
526 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800527 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700528 else \
529 fn = pn; \
530 if (fn->fn_flags & RTN_RTINFO) \
531 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700532 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700533 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700534} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700535
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800536static struct rt6_info *ip6_pol_route_lookup(struct net *net,
537 struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700538 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539{
540 struct fib6_node *fn;
541 struct rt6_info *rt;
542
Thomas Grafc71099a2006-08-04 23:20:06 -0700543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
545restart:
546 rt = fn->leaf;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900547 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800548 BACKTRACK(net, &fl->fl6_src);
Thomas Grafc71099a2006-08-04 23:20:06 -0700549out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700550 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700551 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700552 return rt;
553
554}
555
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900556struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
557 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700558{
559 struct flowi fl = {
560 .oif = oif,
Changli Gao58116622010-11-12 18:43:55 +0000561 .fl6_dst = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700562 };
563 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700564 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700565
Thomas Grafadaa70b2006-10-13 15:01:03 -0700566 if (saddr) {
567 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
568 flags |= RT6_LOOKUP_F_HAS_SADDR;
569 }
570
Daniel Lezcano606a2b42008-03-04 13:45:59 -0800571 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700572 if (dst->error == 0)
573 return (struct rt6_info *) dst;
574
575 dst_release(dst);
576
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577 return NULL;
578}
579
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900580EXPORT_SYMBOL(rt6_lookup);
581
Thomas Grafc71099a2006-08-04 23:20:06 -0700582/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 It takes new route entry, the addition fails by any reason the
584 route is freed. In any case, if caller does not hold it, it may
585 be destroyed.
586 */
587
Thomas Graf86872cb2006-08-22 00:01:08 -0700588static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589{
590 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700591 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592
Thomas Grafc71099a2006-08-04 23:20:06 -0700593 table = rt->rt6i_table;
594 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700595 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700596 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
598 return err;
599}
600
Thomas Graf40e22e82006-08-22 00:00:45 -0700601int ip6_ins_rt(struct rt6_info *rt)
602{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800603 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900604 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800605 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800606 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700607}
608
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800609static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
610 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 struct rt6_info *rt;
613
614 /*
615 * Clone the route.
616 */
617
618 rt = ip6_rt_copy(ort);
619
620 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800621 struct neighbour *neigh;
622 int attempts = !in_softirq();
623
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900624 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
625 if (rt->rt6i_dst.plen != 128 &&
626 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
627 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900629 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900631 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 rt->rt6i_dst.plen = 128;
633 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700634 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
636#ifdef CONFIG_IPV6_SUBTREES
637 if (rt->rt6i_src.plen && saddr) {
638 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
639 rt->rt6i_src.plen = 128;
640 }
641#endif
642
David S. Miller14deae42009-01-04 16:04:39 -0800643 retry:
644 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
645 if (IS_ERR(neigh)) {
646 struct net *net = dev_net(rt->rt6i_dev);
647 int saved_rt_min_interval =
648 net->ipv6.sysctl.ip6_rt_gc_min_interval;
649 int saved_rt_elasticity =
650 net->ipv6.sysctl.ip6_rt_gc_elasticity;
651
652 if (attempts-- > 0) {
653 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
654 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
655
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000656 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800657
658 net->ipv6.sysctl.ip6_rt_gc_elasticity =
659 saved_rt_elasticity;
660 net->ipv6.sysctl.ip6_rt_gc_min_interval =
661 saved_rt_min_interval;
662 goto retry;
663 }
664
665 if (net_ratelimit())
666 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700667 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700668 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800669 return NULL;
670 }
671 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800673 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800675 return rt;
676}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800678static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
679{
680 struct rt6_info *rt = ip6_rt_copy(ort);
681 if (rt) {
682 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
683 rt->rt6i_dst.plen = 128;
684 rt->rt6i_flags |= RTF_CACHE;
Changli Gaod8d1f302010-06-10 23:31:35 -0700685 rt->dst.flags |= DST_HOST;
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800686 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
687 }
688 return rt;
689}
690
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800691static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
692 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693{
694 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800695 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700696 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800698 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700699 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700701 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702
703relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700704 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800706restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700707 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708
709restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700710 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800711
712 BACKTRACK(net, &fl->fl6_src);
713 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800714 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800715 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
Changli Gaod8d1f302010-06-10 23:31:35 -0700717 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700718 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800719
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800720 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800721 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800722 else {
723#if CLONE_OFFLINK_ROUTE
724 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
725#else
726 goto out2;
727#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800729
Changli Gaod8d1f302010-06-10 23:31:35 -0700730 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800731 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800732
Changli Gaod8d1f302010-06-10 23:31:35 -0700733 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800734 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700735 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800736 if (!err)
737 goto out2;
738 }
739
740 if (--attempts <= 0)
741 goto out2;
742
743 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700744 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800745 * released someone could insert this route. Relookup.
746 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700747 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800748 goto relookup;
749
750out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800751 if (reachable) {
752 reachable = 0;
753 goto restart_2;
754 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700755 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700756 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700758 rt->dst.lastuse = jiffies;
759 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700760
761 return rt;
762}
763
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800764static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700765 struct flowi *fl, int flags)
766{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800767 return ip6_pol_route(net, table, fl->iif, fl, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700768}
769
Thomas Grafc71099a2006-08-04 23:20:06 -0700770void ip6_route_input(struct sk_buff *skb)
771{
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -0700772 struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900773 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700774 int flags = RT6_LOOKUP_F_HAS_SADDR;
Thomas Grafc71099a2006-08-04 23:20:06 -0700775 struct flowi fl = {
776 .iif = skb->dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +0000777 .fl6_dst = iph->daddr,
778 .fl6_src = iph->saddr,
779 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900780 .mark = skb->mark,
Thomas Grafc71099a2006-08-04 23:20:06 -0700781 .proto = iph->nexthdr,
782 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700783
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800784 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700785 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700786
Eric Dumazetadf30902009-06-02 05:19:30 +0000787 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700788}
789
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800790static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
Thomas Grafc71099a2006-08-04 23:20:06 -0700791 struct flowi *fl, int flags)
792{
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800793 return ip6_pol_route(net, table, fl->oif, fl, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700794}
795
Daniel Lezcano4591db42008-03-05 10:48:10 -0800796struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
797 struct flowi *fl)
Thomas Grafc71099a2006-08-04 23:20:06 -0700798{
799 int flags = 0;
800
Brian Haley6057fd72010-05-28 23:02:35 -0700801 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700802 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700803
Thomas Grafadaa70b2006-10-13 15:01:03 -0700804 if (!ipv6_addr_any(&fl->fl6_src))
805 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000806 else if (sk)
807 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700808
Daniel Lezcano4591db42008-03-05 10:48:10 -0800809 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810}
811
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900812EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813
David S. Miller14e50e52007-05-24 18:17:54 -0700814int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
815{
816 struct rt6_info *ort = (struct rt6_info *) *dstp;
817 struct rt6_info *rt = (struct rt6_info *)
818 dst_alloc(&ip6_dst_blackhole_ops);
819 struct dst_entry *new = NULL;
820
821 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700822 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700823
824 atomic_set(&new->__refcnt, 1);
825 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800826 new->input = dst_discard;
827 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700828
Changli Gaod8d1f302010-06-10 23:31:35 -0700829 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
830 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -0700831 if (new->dev)
832 dev_hold(new->dev);
833 rt->rt6i_idev = ort->rt6i_idev;
834 if (rt->rt6i_idev)
835 in6_dev_hold(rt->rt6i_idev);
836 rt->rt6i_expires = 0;
837
838 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
839 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
840 rt->rt6i_metric = 0;
841
842 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
843#ifdef CONFIG_IPV6_SUBTREES
844 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
845#endif
846
847 dst_free(new);
848 }
849
850 dst_release(*dstp);
851 *dstp = new;
Eric Dumazeta02cec22010-09-22 20:43:57 +0000852 return new ? 0 : -ENOMEM;
David S. Miller14e50e52007-05-24 18:17:54 -0700853}
854EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
855
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856/*
857 * Destination cache support functions
858 */
859
860static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
861{
862 struct rt6_info *rt;
863
864 rt = (struct rt6_info *) dst;
865
Herbert Xu10414442010-03-18 23:00:22 +0000866 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 return dst;
868
869 return NULL;
870}
871
872static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
873{
874 struct rt6_info *rt = (struct rt6_info *) dst;
875
876 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000877 if (rt->rt6i_flags & RTF_CACHE) {
878 if (rt6_check_expired(rt)) {
879 ip6_del_rt(rt);
880 dst = NULL;
881 }
882 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000884 dst = NULL;
885 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000887 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888}
889
890static void ip6_link_failure(struct sk_buff *skb)
891{
892 struct rt6_info *rt;
893
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000894 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895
Eric Dumazetadf30902009-06-02 05:19:30 +0000896 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897 if (rt) {
898 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700899 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 rt->rt6i_flags |= RTF_EXPIRES;
901 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
902 rt->rt6i_node->fn_sernum = -1;
903 }
904}
905
906static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
907{
908 struct rt6_info *rt6 = (struct rt6_info*)dst;
909
910 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
911 rt6->rt6i_flags |= RTF_MODIFIED;
912 if (mtu < IPV6_MIN_MTU) {
913 mtu = IPV6_MIN_MTU;
914 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
915 }
916 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700917 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 }
919}
920
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921static int ipv6_get_mtu(struct net_device *dev);
922
Daniel Lezcano55786892008-03-04 13:47:47 -0800923static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924{
925 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
926
Daniel Lezcano55786892008-03-04 13:47:47 -0800927 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
928 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929
930 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900931 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
932 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
933 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 * rely only on pmtu discovery"
935 */
936 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
937 mtu = IPV6_MAXPLEN;
938 return mtu;
939}
940
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800941static struct dst_entry *icmp6_dst_gc_list;
942static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700943
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800944struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900946 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947{
948 struct rt6_info *rt;
949 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900950 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951
952 if (unlikely(idev == NULL))
953 return NULL;
954
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000955 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 if (unlikely(rt == NULL)) {
957 in6_dev_put(idev);
958 goto out;
959 }
960
961 dev_hold(dev);
962 if (neigh)
963 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -0800964 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -0800966 if (IS_ERR(neigh))
967 neigh = NULL;
968 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969
970 rt->rt6i_dev = dev;
971 rt->rt6i_idev = idev;
972 rt->rt6i_nexthop = neigh;
Changli Gaod8d1f302010-06-10 23:31:35 -0700973 atomic_set(&rt->dst.__refcnt, 1);
974 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
975 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
976 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
977 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
979#if 0 /* there's no chance to use these for ndisc */
Changli Gaod8d1f302010-06-10 23:31:35 -0700980 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900981 ? DST_HOST
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 : 0;
983 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
984 rt->rt6i_dst.plen = 128;
985#endif
986
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800987 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -0700988 rt->dst.next = icmp6_dst_gc_list;
989 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -0800990 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991
Daniel Lezcano55786892008-03-04 13:47:47 -0800992 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993
994out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700995 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996}
997
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -0700998int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999{
1000 struct dst_entry *dst, *next, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001001 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002
1003 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001004
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001005 spin_lock_bh(&icmp6_dst_lock);
1006 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001007
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 while ((dst = *pprev) != NULL) {
1009 if (!atomic_read(&dst->__refcnt)) {
1010 *pprev = dst->next;
1011 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 } else {
1013 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001014 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015 }
1016 }
1017
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001018 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001019
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001020 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021}
1022
David S. Miller1e493d12008-09-10 17:27:15 -07001023static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1024 void *arg)
1025{
1026 struct dst_entry *dst, **pprev;
1027
1028 spin_lock_bh(&icmp6_dst_lock);
1029 pprev = &icmp6_dst_gc_list;
1030 while ((dst = *pprev) != NULL) {
1031 struct rt6_info *rt = (struct rt6_info *) dst;
1032 if (func(rt, arg)) {
1033 *pprev = dst->next;
1034 dst_free(dst);
1035 } else {
1036 pprev = &dst->next;
1037 }
1038 }
1039 spin_unlock_bh(&icmp6_dst_lock);
1040}
1041
Daniel Lezcano569d3642008-01-18 03:56:57 -08001042static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001045 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001046 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1047 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1048 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1049 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1050 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001051 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052
Eric Dumazetfc66f952010-10-08 06:37:34 +00001053 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001054 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001055 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 goto out;
1057
Benjamin Thery6891a342008-03-04 13:49:47 -08001058 net->ipv6.ip6_rt_gc_expire++;
1059 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1060 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001061 entries = dst_entries_get_slow(ops);
1062 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001063 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001065 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001066 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067}
1068
1069/* Clean host part of a prefix. Not necessary in radix tree,
1070 but results in cleaner routing tables.
1071
1072 Remove it only when all the things will work!
1073 */
1074
1075static int ipv6_get_mtu(struct net_device *dev)
1076{
1077 int mtu = IPV6_MIN_MTU;
1078 struct inet6_dev *idev;
1079
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001080 rcu_read_lock();
1081 idev = __in6_dev_get(dev);
1082 if (idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 mtu = idev->cnf.mtu6;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001084 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 return mtu;
1086}
1087
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001088int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089{
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001090 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1091 if (hoplimit < 0) {
1092 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001093 struct inet6_dev *idev;
1094
1095 rcu_read_lock();
1096 idev = __in6_dev_get(dev);
1097 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001098 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001099 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001100 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001101 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 }
1103 return hoplimit;
1104}
1105
1106/*
1107 *
1108 */
1109
Thomas Graf86872cb2006-08-22 00:01:08 -07001110int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111{
1112 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001113 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 struct rt6_info *rt = NULL;
1115 struct net_device *dev = NULL;
1116 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001117 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 int addr_type;
1119
Thomas Graf86872cb2006-08-22 00:01:08 -07001120 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 return -EINVAL;
1122#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001123 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 return -EINVAL;
1125#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001126 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001128 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 if (!dev)
1130 goto out;
1131 idev = in6_dev_get(dev);
1132 if (!idev)
1133 goto out;
1134 }
1135
Thomas Graf86872cb2006-08-22 00:01:08 -07001136 if (cfg->fc_metric == 0)
1137 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138
Daniel Lezcano55786892008-03-04 13:47:47 -08001139 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001140 if (table == NULL) {
1141 err = -ENOBUFS;
1142 goto out;
1143 }
1144
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001145 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146
1147 if (rt == NULL) {
1148 err = -ENOMEM;
1149 goto out;
1150 }
1151
Changli Gaod8d1f302010-06-10 23:31:35 -07001152 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001153 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1154 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1155 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
Thomas Graf86872cb2006-08-22 00:01:08 -07001157 if (cfg->fc_protocol == RTPROT_UNSPEC)
1158 cfg->fc_protocol = RTPROT_BOOT;
1159 rt->rt6i_protocol = cfg->fc_protocol;
1160
1161 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162
1163 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001164 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001165 else if (cfg->fc_flags & RTF_LOCAL)
1166 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001168 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169
Changli Gaod8d1f302010-06-10 23:31:35 -07001170 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171
Thomas Graf86872cb2006-08-22 00:01:08 -07001172 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1173 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 if (rt->rt6i_dst.plen == 128)
Changli Gaod8d1f302010-06-10 23:31:35 -07001175 rt->dst.flags = DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176
1177#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001178 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1179 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180#endif
1181
Thomas Graf86872cb2006-08-22 00:01:08 -07001182 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183
1184 /* We cannot add true routes via loopback here,
1185 they would result in kernel looping; promote them to reject routes
1186 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001187 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001188 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1189 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001191 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 if (dev) {
1193 dev_put(dev);
1194 in6_dev_put(idev);
1195 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001196 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 dev_hold(dev);
1198 idev = in6_dev_get(dev);
1199 if (!idev) {
1200 err = -ENODEV;
1201 goto out;
1202 }
1203 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001204 rt->dst.output = ip6_pkt_discard_out;
1205 rt->dst.input = ip6_pkt_discard;
1206 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1208 goto install_route;
1209 }
1210
Thomas Graf86872cb2006-08-22 00:01:08 -07001211 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 struct in6_addr *gw_addr;
1213 int gwa_type;
1214
Thomas Graf86872cb2006-08-22 00:01:08 -07001215 gw_addr = &cfg->fc_gateway;
1216 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217 gwa_type = ipv6_addr_type(gw_addr);
1218
1219 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1220 struct rt6_info *grt;
1221
1222 /* IPv6 strictly inhibits using not link-local
1223 addresses as nexthop address.
1224 Otherwise, router will not able to send redirects.
1225 It is very good, but in some (rare!) circumstances
1226 (SIT, PtP, NBMA NOARP links) it is handy to allow
1227 some exceptions. --ANK
1228 */
1229 err = -EINVAL;
1230 if (!(gwa_type&IPV6_ADDR_UNICAST))
1231 goto out;
1232
Daniel Lezcano55786892008-03-04 13:47:47 -08001233 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234
1235 err = -EHOSTUNREACH;
1236 if (grt == NULL)
1237 goto out;
1238 if (dev) {
1239 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001240 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 goto out;
1242 }
1243 } else {
1244 dev = grt->rt6i_dev;
1245 idev = grt->rt6i_idev;
1246 dev_hold(dev);
1247 in6_dev_hold(grt->rt6i_idev);
1248 }
1249 if (!(grt->rt6i_flags&RTF_GATEWAY))
1250 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001251 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252
1253 if (err)
1254 goto out;
1255 }
1256 err = -EINVAL;
1257 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1258 goto out;
1259 }
1260
1261 err = -ENODEV;
1262 if (dev == NULL)
1263 goto out;
1264
Thomas Graf86872cb2006-08-22 00:01:08 -07001265 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1267 if (IS_ERR(rt->rt6i_nexthop)) {
1268 err = PTR_ERR(rt->rt6i_nexthop);
1269 rt->rt6i_nexthop = NULL;
1270 goto out;
1271 }
1272 }
1273
Thomas Graf86872cb2006-08-22 00:01:08 -07001274 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275
1276install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001277 if (cfg->fc_mx) {
1278 struct nlattr *nla;
1279 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280
Thomas Graf86872cb2006-08-22 00:01:08 -07001281 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001282 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001283
1284 if (type) {
1285 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286 err = -EINVAL;
1287 goto out;
1288 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001289
Changli Gaod8d1f302010-06-10 23:31:35 -07001290 rt->dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 }
1293 }
1294
Changli Gaod8d1f302010-06-10 23:31:35 -07001295 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1296 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1297 if (!dst_mtu(&rt->dst))
1298 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1299 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1300 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1301 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001303 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001304
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001305 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001306
Thomas Graf86872cb2006-08-22 00:01:08 -07001307 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308
1309out:
1310 if (dev)
1311 dev_put(dev);
1312 if (idev)
1313 in6_dev_put(idev);
1314 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001315 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 return err;
1317}
1318
Thomas Graf86872cb2006-08-22 00:01:08 -07001319static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320{
1321 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001322 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001323 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001325 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001326 return -ENOENT;
1327
Thomas Grafc71099a2006-08-04 23:20:06 -07001328 table = rt->rt6i_table;
1329 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
Thomas Graf86872cb2006-08-22 00:01:08 -07001331 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001332 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
Thomas Grafc71099a2006-08-04 23:20:06 -07001334 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335
1336 return err;
1337}
1338
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001339int ip6_del_rt(struct rt6_info *rt)
1340{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001341 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001342 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001343 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001344 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001345}
1346
Thomas Graf86872cb2006-08-22 00:01:08 -07001347static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348{
Thomas Grafc71099a2006-08-04 23:20:06 -07001349 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 struct fib6_node *fn;
1351 struct rt6_info *rt;
1352 int err = -ESRCH;
1353
Daniel Lezcano55786892008-03-04 13:47:47 -08001354 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001355 if (table == NULL)
1356 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357
Thomas Grafc71099a2006-08-04 23:20:06 -07001358 read_lock_bh(&table->tb6_lock);
1359
1360 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001361 &cfg->fc_dst, cfg->fc_dst_len,
1362 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001363
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001365 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001366 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001368 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001370 if (cfg->fc_flags & RTF_GATEWAY &&
1371 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001373 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001375 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001376 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377
Thomas Graf86872cb2006-08-22 00:01:08 -07001378 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 }
1380 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001381 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382
1383 return err;
1384}
1385
1386/*
1387 * Handle redirects
1388 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001389struct ip6rd_flowi {
1390 struct flowi fl;
1391 struct in6_addr gateway;
1392};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001394static struct rt6_info *__ip6_route_redirect(struct net *net,
1395 struct fib6_table *table,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001396 struct flowi *fl,
1397 int flags)
1398{
1399 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1400 struct rt6_info *rt;
1401 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001402
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001404 * Get the "current" route for this destination and
1405 * check if the redirect has come from approriate router.
1406 *
1407 * RFC 2461 specifies that redirects should only be
1408 * accepted if they come from the nexthop to the target.
1409 * Due to the way the routes are chosen, this notion
1410 * is a bit fuzzy and one might need to check all possible
1411 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413
Thomas Grafc71099a2006-08-04 23:20:06 -07001414 read_lock_bh(&table->tb6_lock);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001415 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001416restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001417 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001418 /*
1419 * Current route is on-link; redirect is always invalid.
1420 *
1421 * Seems, previous statement is not true. It could
1422 * be node, which looks for us as on-link (f.e. proxy ndisc)
1423 * But then router serving it might decide, that we should
1424 * know truth 8)8) --ANK (980726).
1425 */
1426 if (rt6_check_expired(rt))
1427 continue;
1428 if (!(rt->rt6i_flags & RTF_GATEWAY))
1429 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001430 if (fl->oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001431 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001432 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001433 continue;
1434 break;
1435 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001436
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001437 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001438 rt = net->ipv6.ip6_null_entry;
1439 BACKTRACK(net, &fl->fl6_src);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001440out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001441 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001442
1443 read_unlock_bh(&table->tb6_lock);
1444
1445 return rt;
1446};
1447
1448static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1449 struct in6_addr *src,
1450 struct in6_addr *gateway,
1451 struct net_device *dev)
1452{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001453 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001454 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001455 struct ip6rd_flowi rdfl = {
1456 .fl = {
1457 .oif = dev->ifindex,
Changli Gao58116622010-11-12 18:43:55 +00001458 .fl6_dst = *dest,
1459 .fl6_src = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001460 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001461 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001462
Brian Haley86c36ce2009-10-07 13:58:01 -07001463 ipv6_addr_copy(&rdfl.gateway, gateway);
1464
Thomas Grafadaa70b2006-10-13 15:01:03 -07001465 if (rt6_need_strict(dest))
1466 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001467
Daniel Lezcano55786892008-03-04 13:47:47 -08001468 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001469 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001470}
1471
1472void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1473 struct in6_addr *saddr,
1474 struct neighbour *neigh, u8 *lladdr, int on_link)
1475{
1476 struct rt6_info *rt, *nrt = NULL;
1477 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001478 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001479
1480 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1481
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001482 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 if (net_ratelimit())
1484 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1485 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001486 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 }
1488
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 /*
1490 * We have finally decided to accept it.
1491 */
1492
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001493 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1495 NEIGH_UPDATE_F_OVERRIDE|
1496 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1497 NEIGH_UPDATE_F_ISROUTER))
1498 );
1499
1500 /*
1501 * Redirect received -> path was valid.
1502 * Look, redirects are sent only in response to data packets,
1503 * so that this nexthop apparently is reachable. --ANK
1504 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001505 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506
1507 /* Duplicate redirect: silently ignore. */
Changli Gaod8d1f302010-06-10 23:31:35 -07001508 if (neigh == rt->dst.neighbour)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 goto out;
1510
1511 nrt = ip6_rt_copy(rt);
1512 if (nrt == NULL)
1513 goto out;
1514
1515 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1516 if (on_link)
1517 nrt->rt6i_flags &= ~RTF_GATEWAY;
1518
1519 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1520 nrt->rt6i_dst.plen = 128;
Changli Gaod8d1f302010-06-10 23:31:35 -07001521 nrt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522
1523 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1524 nrt->rt6i_nexthop = neigh_clone(neigh);
1525 /* Reset pmtu, it may be better */
Changli Gaod8d1f302010-06-10 23:31:35 -07001526 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1527 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1528 dst_mtu(&nrt->dst));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529
Thomas Graf40e22e82006-08-22 00:00:45 -07001530 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 goto out;
1532
Changli Gaod8d1f302010-06-10 23:31:35 -07001533 netevent.old = &rt->dst;
1534 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001535 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1536
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001538 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 return;
1540 }
1541
1542out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001543 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544}
1545
1546/*
1547 * Handle ICMP "packet too big" messages
1548 * i.e. Path MTU discovery
1549 */
1550
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001551static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1552 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553{
1554 struct rt6_info *rt, *nrt;
1555 int allfrag = 0;
1556
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001557 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 if (rt == NULL)
1559 return;
1560
Changli Gaod8d1f302010-06-10 23:31:35 -07001561 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 goto out;
1563
1564 if (pmtu < IPV6_MIN_MTU) {
1565 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001566 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 * MTU (1280) and a fragment header should always be included
1568 * after a node receiving Too Big message reporting PMTU is
1569 * less than the IPv6 Minimum Link MTU.
1570 */
1571 pmtu = IPV6_MIN_MTU;
1572 allfrag = 1;
1573 }
1574
1575 /* New mtu received -> path was valid.
1576 They are sent only in response to data packets,
1577 so that this nexthop apparently is reachable. --ANK
1578 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001579 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580
1581 /* Host route. If it is static, it would be better
1582 not to override it, but add new one, so that
1583 when cache entry will expire old pmtu
1584 would return automatically.
1585 */
1586 if (rt->rt6i_flags & RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001587 rt->dst.metrics[RTAX_MTU-1] = pmtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588 if (allfrag)
Changli Gaod8d1f302010-06-10 23:31:35 -07001589 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1590 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1592 goto out;
1593 }
1594
1595 /* Network route.
1596 Two cases are possible:
1597 1. It is connected route. Action: COW
1598 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1599 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001600 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001601 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001602 else
1603 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001604
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001605 if (nrt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001606 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001607 if (allfrag)
Changli Gaod8d1f302010-06-10 23:31:35 -07001608 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001609
1610 /* According to RFC 1981, detecting PMTU increase shouldn't be
1611 * happened within 5 mins, the recommended timer is 10 mins.
1612 * Here this route expiration time is set to ip6_rt_mtu_expires
1613 * which is 10 mins. After 10 mins the decreased pmtu is expired
1614 * and detecting PMTU increase will be automatically happened.
1615 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001616 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001617 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1618
Thomas Graf40e22e82006-08-22 00:00:45 -07001619 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001622 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623}
1624
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001625void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1626 struct net_device *dev, u32 pmtu)
1627{
1628 struct net *net = dev_net(dev);
1629
1630 /*
1631 * RFC 1981 states that a node "MUST reduce the size of the packets it
1632 * is sending along the path" that caused the Packet Too Big message.
1633 * Since it's not possible in the general case to determine which
1634 * interface was used to send the original packet, we update the MTU
1635 * on the interface that will be used to send future packets. We also
1636 * update the MTU on the interface that received the Packet Too Big in
1637 * case the original packet was forced out that interface with
1638 * SO_BINDTODEVICE or similar. This is the next best thing to the
1639 * correct behaviour, which would be to update the MTU on all
1640 * interfaces.
1641 */
1642 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1643 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1644}
1645
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646/*
1647 * Misc support functions
1648 */
1649
1650static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1651{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001652 struct net *net = dev_net(ort->rt6i_dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001653 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654
1655 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001656 rt->dst.input = ort->dst.input;
1657 rt->dst.output = ort->dst.output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
Changli Gaod8d1f302010-06-10 23:31:35 -07001659 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1660 rt->dst.error = ort->dst.error;
1661 rt->dst.dev = ort->dst.dev;
1662 if (rt->dst.dev)
1663 dev_hold(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 rt->rt6i_idev = ort->rt6i_idev;
1665 if (rt->rt6i_idev)
1666 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001667 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 rt->rt6i_expires = 0;
1669
1670 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1671 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1672 rt->rt6i_metric = 0;
1673
1674 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1675#ifdef CONFIG_IPV6_SUBTREES
1676 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1677#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001678 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 }
1680 return rt;
1681}
1682
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001683#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001684static struct rt6_info *rt6_get_route_info(struct net *net,
1685 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001686 struct in6_addr *gwaddr, int ifindex)
1687{
1688 struct fib6_node *fn;
1689 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001690 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001691
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001692 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001693 if (table == NULL)
1694 return NULL;
1695
1696 write_lock_bh(&table->tb6_lock);
1697 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001698 if (!fn)
1699 goto out;
1700
Changli Gaod8d1f302010-06-10 23:31:35 -07001701 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001702 if (rt->rt6i_dev->ifindex != ifindex)
1703 continue;
1704 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1705 continue;
1706 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1707 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001708 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001709 break;
1710 }
1711out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001712 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001713 return rt;
1714}
1715
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001716static struct rt6_info *rt6_add_route_info(struct net *net,
1717 struct in6_addr *prefix, int prefixlen,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001718 struct in6_addr *gwaddr, int ifindex,
1719 unsigned pref)
1720{
Thomas Graf86872cb2006-08-22 00:01:08 -07001721 struct fib6_config cfg = {
1722 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001723 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001724 .fc_ifindex = ifindex,
1725 .fc_dst_len = prefixlen,
1726 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1727 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001728 .fc_nlinfo.pid = 0,
1729 .fc_nlinfo.nlh = NULL,
1730 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001731 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001732
Thomas Graf86872cb2006-08-22 00:01:08 -07001733 ipv6_addr_copy(&cfg.fc_dst, prefix);
1734 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1735
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001736 /* We should treat it as a default route if prefix length is 0. */
1737 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001738 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001739
Thomas Graf86872cb2006-08-22 00:01:08 -07001740 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001741
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001742 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001743}
1744#endif
1745
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001747{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001749 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001751 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001752 if (table == NULL)
1753 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754
Thomas Grafc71099a2006-08-04 23:20:06 -07001755 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001756 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001758 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1760 break;
1761 }
1762 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001763 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001764 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 return rt;
1766}
1767
1768struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001769 struct net_device *dev,
1770 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771{
Thomas Graf86872cb2006-08-22 00:01:08 -07001772 struct fib6_config cfg = {
1773 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001774 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001775 .fc_ifindex = dev->ifindex,
1776 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1777 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001778 .fc_nlinfo.pid = 0,
1779 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001780 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001781 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782
Thomas Graf86872cb2006-08-22 00:01:08 -07001783 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784
Thomas Graf86872cb2006-08-22 00:01:08 -07001785 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787 return rt6_get_dflt_router(gwaddr, dev);
1788}
1789
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001790void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791{
1792 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001793 struct fib6_table *table;
1794
1795 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001796 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001797 if (table == NULL)
1798 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799
1800restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001801 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001802 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001804 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001805 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001806 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 goto restart;
1808 }
1809 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001810 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811}
1812
Daniel Lezcano55786892008-03-04 13:47:47 -08001813static void rtmsg_to_fib6_config(struct net *net,
1814 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001815 struct fib6_config *cfg)
1816{
1817 memset(cfg, 0, sizeof(*cfg));
1818
1819 cfg->fc_table = RT6_TABLE_MAIN;
1820 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1821 cfg->fc_metric = rtmsg->rtmsg_metric;
1822 cfg->fc_expires = rtmsg->rtmsg_info;
1823 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1824 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1825 cfg->fc_flags = rtmsg->rtmsg_flags;
1826
Daniel Lezcano55786892008-03-04 13:47:47 -08001827 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001828
Thomas Graf86872cb2006-08-22 00:01:08 -07001829 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1830 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1831 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1832}
1833
Daniel Lezcano55786892008-03-04 13:47:47 -08001834int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835{
Thomas Graf86872cb2006-08-22 00:01:08 -07001836 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001837 struct in6_rtmsg rtmsg;
1838 int err;
1839
1840 switch(cmd) {
1841 case SIOCADDRT: /* Add a route */
1842 case SIOCDELRT: /* Delete a route */
1843 if (!capable(CAP_NET_ADMIN))
1844 return -EPERM;
1845 err = copy_from_user(&rtmsg, arg,
1846 sizeof(struct in6_rtmsg));
1847 if (err)
1848 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001849
Daniel Lezcano55786892008-03-04 13:47:47 -08001850 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001851
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 rtnl_lock();
1853 switch (cmd) {
1854 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001855 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856 break;
1857 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001858 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 break;
1860 default:
1861 err = -EINVAL;
1862 }
1863 rtnl_unlock();
1864
1865 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001866 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867
1868 return -EINVAL;
1869}
1870
1871/*
1872 * Drop the packet on the floor
1873 */
1874
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001875static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001877 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001878 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001879 switch (ipstats_mib_noroutes) {
1880 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001881 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001882 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001883 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1884 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001885 break;
1886 }
1887 /* FALLTHROUGH */
1888 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001889 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1890 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001891 break;
1892 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001893 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 kfree_skb(skb);
1895 return 0;
1896}
1897
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001898static int ip6_pkt_discard(struct sk_buff *skb)
1899{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001900 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001901}
1902
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001903static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904{
Eric Dumazetadf30902009-06-02 05:19:30 +00001905 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001906 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907}
1908
David S. Miller6723ab52006-10-18 21:20:57 -07001909#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1910
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001911static int ip6_pkt_prohibit(struct sk_buff *skb)
1912{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001913 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001914}
1915
1916static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1917{
Eric Dumazetadf30902009-06-02 05:19:30 +00001918 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001919 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07001920}
1921
David S. Miller6723ab52006-10-18 21:20:57 -07001922#endif
1923
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924/*
1925 * Allocate a dst for local (unicast / anycast) address.
1926 */
1927
1928struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1929 const struct in6_addr *addr,
1930 int anycast)
1931{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001932 struct net *net = dev_net(idev->dev);
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001933 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -08001934 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935
Ben Greear40385652010-11-08 12:33:48 +00001936 if (rt == NULL) {
1937 if (net_ratelimit())
1938 pr_warning("IPv6: Maximum number of routes reached,"
1939 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00001941 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942
Daniel Lezcano55786892008-03-04 13:47:47 -08001943 dev_hold(net->loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944 in6_dev_hold(idev);
1945
Changli Gaod8d1f302010-06-10 23:31:35 -07001946 rt->dst.flags = DST_HOST;
1947 rt->dst.input = ip6_input;
1948 rt->dst.output = ip6_output;
Daniel Lezcano55786892008-03-04 13:47:47 -08001949 rt->rt6i_dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07001951 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1952 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1953 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1954 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955
1956 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001957 if (anycast)
1958 rt->rt6i_flags |= RTF_ANYCAST;
1959 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08001961 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1962 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001963 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08001964
1965 /* We are casting this because that is the return
1966 * value type. But an errno encoded pointer is the
1967 * same regardless of the underlying pointer type,
1968 * and that's what we are returning. So this is OK.
1969 */
1970 return (struct rt6_info *) neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 }
David S. Miller14deae42009-01-04 16:04:39 -08001972 rt->rt6i_nexthop = neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973
1974 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1975 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08001976 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977
Changli Gaod8d1f302010-06-10 23:31:35 -07001978 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979
1980 return rt;
1981}
1982
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001983struct arg_dev_net {
1984 struct net_device *dev;
1985 struct net *net;
1986};
1987
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988static int fib6_ifdown(struct rt6_info *rt, void *arg)
1989{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001990 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1991 struct net *net = ((struct arg_dev_net *)arg)->net;
1992
1993 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1994 rt != net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995 RT6_TRACE("deleted by ifdown %p\n", rt);
1996 return -1;
1997 }
1998 return 0;
1999}
2000
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002001void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002002{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002003 struct arg_dev_net adn = {
2004 .dev = dev,
2005 .net = net,
2006 };
2007
2008 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002009 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010}
2011
2012struct rt6_mtu_change_arg
2013{
2014 struct net_device *dev;
2015 unsigned mtu;
2016};
2017
2018static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2019{
2020 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2021 struct inet6_dev *idev;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002022 struct net *net = dev_net(arg->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002023
2024 /* In IPv6 pmtu discovery is not optional,
2025 so that RTAX_MTU lock cannot disable it.
2026 We still use this lock to block changes
2027 caused by addrconf/ndisc.
2028 */
2029
2030 idev = __in6_dev_get(arg->dev);
2031 if (idev == NULL)
2032 return 0;
2033
2034 /* For administrative MTU increase, there is no way to discover
2035 IPv6 PMTU increase, so PMTU increase should be updated here.
2036 Since RFC 1981 doesn't include administrative MTU increase
2037 update PMTU increase is a MUST. (i.e. jumbo frame)
2038 */
2039 /*
2040 If new MTU is less than route PMTU, this new MTU will be the
2041 lowest MTU in the path, update the route PMTU to reflect PMTU
2042 decreases; if new MTU is greater than route PMTU, and the
2043 old MTU is the lowest MTU in the path, update the route PMTU
2044 to reflect the increase. In this case if the other nodes' MTU
2045 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2046 PMTU discouvery.
2047 */
2048 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002049 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2050 (dst_mtu(&rt->dst) >= arg->mtu ||
2051 (dst_mtu(&rt->dst) < arg->mtu &&
2052 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2053 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2054 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002055 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 return 0;
2057}
2058
2059void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2060{
Thomas Grafc71099a2006-08-04 23:20:06 -07002061 struct rt6_mtu_change_arg arg = {
2062 .dev = dev,
2063 .mtu = mtu,
2064 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002066 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067}
2068
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002069static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002070 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002071 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002072 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002073 [RTA_PRIORITY] = { .type = NLA_U32 },
2074 [RTA_METRICS] = { .type = NLA_NESTED },
2075};
2076
2077static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2078 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002079{
Thomas Graf86872cb2006-08-22 00:01:08 -07002080 struct rtmsg *rtm;
2081 struct nlattr *tb[RTA_MAX+1];
2082 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083
Thomas Graf86872cb2006-08-22 00:01:08 -07002084 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2085 if (err < 0)
2086 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087
Thomas Graf86872cb2006-08-22 00:01:08 -07002088 err = -EINVAL;
2089 rtm = nlmsg_data(nlh);
2090 memset(cfg, 0, sizeof(*cfg));
2091
2092 cfg->fc_table = rtm->rtm_table;
2093 cfg->fc_dst_len = rtm->rtm_dst_len;
2094 cfg->fc_src_len = rtm->rtm_src_len;
2095 cfg->fc_flags = RTF_UP;
2096 cfg->fc_protocol = rtm->rtm_protocol;
2097
2098 if (rtm->rtm_type == RTN_UNREACHABLE)
2099 cfg->fc_flags |= RTF_REJECT;
2100
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002101 if (rtm->rtm_type == RTN_LOCAL)
2102 cfg->fc_flags |= RTF_LOCAL;
2103
Thomas Graf86872cb2006-08-22 00:01:08 -07002104 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2105 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002106 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002107
2108 if (tb[RTA_GATEWAY]) {
2109 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2110 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002112
2113 if (tb[RTA_DST]) {
2114 int plen = (rtm->rtm_dst_len + 7) >> 3;
2115
2116 if (nla_len(tb[RTA_DST]) < plen)
2117 goto errout;
2118
2119 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002121
2122 if (tb[RTA_SRC]) {
2123 int plen = (rtm->rtm_src_len + 7) >> 3;
2124
2125 if (nla_len(tb[RTA_SRC]) < plen)
2126 goto errout;
2127
2128 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002130
2131 if (tb[RTA_OIF])
2132 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2133
2134 if (tb[RTA_PRIORITY])
2135 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2136
2137 if (tb[RTA_METRICS]) {
2138 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2139 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002141
2142 if (tb[RTA_TABLE])
2143 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2144
2145 err = 0;
2146errout:
2147 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148}
2149
Thomas Grafc127ea22007-03-22 11:58:32 -07002150static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151{
Thomas Graf86872cb2006-08-22 00:01:08 -07002152 struct fib6_config cfg;
2153 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154
Thomas Graf86872cb2006-08-22 00:01:08 -07002155 err = rtm_to_fib6_config(skb, nlh, &cfg);
2156 if (err < 0)
2157 return err;
2158
2159 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002160}
2161
Thomas Grafc127ea22007-03-22 11:58:32 -07002162static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002163{
Thomas Graf86872cb2006-08-22 00:01:08 -07002164 struct fib6_config cfg;
2165 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166
Thomas Graf86872cb2006-08-22 00:01:08 -07002167 err = rtm_to_fib6_config(skb, nlh, &cfg);
2168 if (err < 0)
2169 return err;
2170
2171 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002172}
2173
Thomas Graf339bf982006-11-10 14:10:15 -08002174static inline size_t rt6_nlmsg_size(void)
2175{
2176 return NLMSG_ALIGN(sizeof(struct rtmsg))
2177 + nla_total_size(16) /* RTA_SRC */
2178 + nla_total_size(16) /* RTA_DST */
2179 + nla_total_size(16) /* RTA_GATEWAY */
2180 + nla_total_size(16) /* RTA_PREFSRC */
2181 + nla_total_size(4) /* RTA_TABLE */
2182 + nla_total_size(4) /* RTA_IIF */
2183 + nla_total_size(4) /* RTA_OIF */
2184 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002185 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002186 + nla_total_size(sizeof(struct rta_cacheinfo));
2187}
2188
Brian Haley191cd582008-08-14 15:33:21 -07002189static int rt6_fill_node(struct net *net,
2190 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002191 struct in6_addr *dst, struct in6_addr *src,
2192 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002193 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194{
2195 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002196 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002197 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002198 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199
2200 if (prefix) { /* user wants prefix routes only */
2201 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2202 /* success since this is not a prefix route */
2203 return 1;
2204 }
2205 }
2206
Thomas Graf2d7202b2006-08-22 00:01:27 -07002207 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2208 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002209 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002210
2211 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 rtm->rtm_family = AF_INET6;
2213 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2214 rtm->rtm_src_len = rt->rt6i_src.plen;
2215 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002216 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002217 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002218 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002219 table = RT6_TABLE_UNSPEC;
2220 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002221 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 if (rt->rt6i_flags&RTF_REJECT)
2223 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002224 else if (rt->rt6i_flags&RTF_LOCAL)
2225 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2227 rtm->rtm_type = RTN_LOCAL;
2228 else
2229 rtm->rtm_type = RTN_UNICAST;
2230 rtm->rtm_flags = 0;
2231 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2232 rtm->rtm_protocol = rt->rt6i_protocol;
2233 if (rt->rt6i_flags&RTF_DYNAMIC)
2234 rtm->rtm_protocol = RTPROT_REDIRECT;
2235 else if (rt->rt6i_flags & RTF_ADDRCONF)
2236 rtm->rtm_protocol = RTPROT_KERNEL;
2237 else if (rt->rt6i_flags&RTF_DEFAULT)
2238 rtm->rtm_protocol = RTPROT_RA;
2239
2240 if (rt->rt6i_flags&RTF_CACHE)
2241 rtm->rtm_flags |= RTM_F_CLONED;
2242
2243 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002244 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002245 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002247 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248#ifdef CONFIG_IPV6_SUBTREES
2249 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002250 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002251 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002252 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002253 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002255 if (iif) {
2256#ifdef CONFIG_IPV6_MROUTE
2257 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002258 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002259 if (err <= 0) {
2260 if (!nowait) {
2261 if (err == 0)
2262 return 0;
2263 goto nla_put_failure;
2264 } else {
2265 if (err == -EMSGSIZE)
2266 goto nla_put_failure;
2267 }
2268 }
2269 } else
2270#endif
2271 NLA_PUT_U32(skb, RTA_IIF, iif);
2272 } else if (dst) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002273 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 struct in6_addr saddr_buf;
Brian Haley191cd582008-08-14 15:33:21 -07002275 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
YOSHIFUJI Hideaki7cbca672008-03-25 09:37:42 +09002276 dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002277 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002279
Changli Gaod8d1f302010-06-10 23:31:35 -07002280 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002281 goto nla_put_failure;
2282
Changli Gaod8d1f302010-06-10 23:31:35 -07002283 if (rt->dst.neighbour)
2284 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
Thomas Graf2d7202b2006-08-22 00:01:27 -07002285
Changli Gaod8d1f302010-06-10 23:31:35 -07002286 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002287 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2288
2289 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002290
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002291 if (!(rt->rt6i_flags & RTF_EXPIRES))
2292 expires = 0;
2293 else if (rt->rt6i_expires - jiffies < INT_MAX)
2294 expires = rt->rt6i_expires - jiffies;
2295 else
2296 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002297
Changli Gaod8d1f302010-06-10 23:31:35 -07002298 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2299 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002300 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301
Thomas Graf2d7202b2006-08-22 00:01:27 -07002302 return nlmsg_end(skb, nlh);
2303
2304nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002305 nlmsg_cancel(skb, nlh);
2306 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307}
2308
Patrick McHardy1b43af52006-08-10 23:11:17 -07002309int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310{
2311 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2312 int prefix;
2313
Thomas Graf2d7202b2006-08-22 00:01:27 -07002314 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2315 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2317 } else
2318 prefix = 0;
2319
Brian Haley191cd582008-08-14 15:33:21 -07002320 return rt6_fill_node(arg->net,
2321 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002322 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002323 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324}
2325
Thomas Grafc127ea22007-03-22 11:58:32 -07002326static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002328 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002329 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002331 struct sk_buff *skb;
2332 struct rtmsg *rtm;
2333 struct flowi fl;
2334 int err, iif = 0;
2335
2336 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2337 if (err < 0)
2338 goto errout;
2339
2340 err = -EINVAL;
2341 memset(&fl, 0, sizeof(fl));
2342
2343 if (tb[RTA_SRC]) {
2344 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2345 goto errout;
2346
2347 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2348 }
2349
2350 if (tb[RTA_DST]) {
2351 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2352 goto errout;
2353
2354 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2355 }
2356
2357 if (tb[RTA_IIF])
2358 iif = nla_get_u32(tb[RTA_IIF]);
2359
2360 if (tb[RTA_OIF])
2361 fl.oif = nla_get_u32(tb[RTA_OIF]);
2362
2363 if (iif) {
2364 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002365 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002366 if (!dev) {
2367 err = -ENODEV;
2368 goto errout;
2369 }
2370 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371
2372 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002373 if (skb == NULL) {
2374 err = -ENOBUFS;
2375 goto errout;
2376 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002377
2378 /* Reserve room for dummy headers, this skb can pass
2379 through good chunk of routing engine.
2380 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002381 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2383
Daniel Lezcano8a3edd82008-03-07 11:14:16 -08002384 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
Changli Gaod8d1f302010-06-10 23:31:35 -07002385 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386
Brian Haley191cd582008-08-14 15:33:21 -07002387 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002389 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002391 kfree_skb(skb);
2392 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 }
2394
Daniel Lezcano55786892008-03-04 13:47:47 -08002395 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002396errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398}
2399
Thomas Graf86872cb2006-08-22 00:01:08 -07002400void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401{
2402 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002403 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002404 u32 seq;
2405 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002407 err = -ENOBUFS;
2408 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002409
Thomas Graf339bf982006-11-10 14:10:15 -08002410 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002411 if (skb == NULL)
2412 goto errout;
2413
Brian Haley191cd582008-08-14 15:33:21 -07002414 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002415 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002416 if (err < 0) {
2417 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2418 WARN_ON(err == -EMSGSIZE);
2419 kfree_skb(skb);
2420 goto errout;
2421 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002422 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2423 info->nlh, gfp_any());
2424 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002425errout:
2426 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002427 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428}
2429
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002430static int ip6_route_dev_notify(struct notifier_block *this,
2431 unsigned long event, void *data)
2432{
2433 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002434 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002435
2436 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002437 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002438 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2439#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002440 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002441 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002442 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002443 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2444#endif
2445 }
2446
2447 return NOTIFY_OK;
2448}
2449
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450/*
2451 * /proc
2452 */
2453
2454#ifdef CONFIG_PROC_FS
2455
2456#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2457
2458struct rt6_proc_arg
2459{
2460 char *buffer;
2461 int offset;
2462 int length;
2463 int skip;
2464 int len;
2465};
2466
2467static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2468{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002469 struct seq_file *m = p_arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002471 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472
2473#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002474 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002476 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477#endif
2478
2479 if (rt->rt6i_nexthop) {
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002480 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002481 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002482 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483 }
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002484 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002485 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2486 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002487 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 return 0;
2489}
2490
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002491static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002493 struct net *net = (struct net *)m->private;
2494 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002495 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496}
2497
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002498static int ipv6_route_open(struct inode *inode, struct file *file)
2499{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002500 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002501}
2502
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002503static const struct file_operations ipv6_route_proc_fops = {
2504 .owner = THIS_MODULE,
2505 .open = ipv6_route_open,
2506 .read = seq_read,
2507 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002508 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002509};
2510
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2512{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002513 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002515 net->ipv6.rt6_stats->fib_nodes,
2516 net->ipv6.rt6_stats->fib_route_nodes,
2517 net->ipv6.rt6_stats->fib_rt_alloc,
2518 net->ipv6.rt6_stats->fib_rt_entries,
2519 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002520 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002521 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522
2523 return 0;
2524}
2525
2526static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2527{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002528 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002529}
2530
Arjan van de Ven9a321442007-02-12 00:55:35 -08002531static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 .owner = THIS_MODULE,
2533 .open = rt6_stats_seq_open,
2534 .read = seq_read,
2535 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002536 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537};
2538#endif /* CONFIG_PROC_FS */
2539
2540#ifdef CONFIG_SYSCTL
2541
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002543int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544 void __user *buffer, size_t *lenp, loff_t *ppos)
2545{
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002546 struct net *net = current->nsproxy->net_ns;
2547 int delay = net->ipv6.sysctl.flush_delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548 if (write) {
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002549 proc_dointvec(ctl, write, buffer, lenp, ppos);
Daniel Lezcano5b7c9312008-03-03 23:28:58 -08002550 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 return 0;
2552 } else
2553 return -EINVAL;
2554}
2555
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002556ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002557 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002559 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002561 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002562 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 },
2564 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002566 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567 .maxlen = sizeof(int),
2568 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002569 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570 },
2571 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002573 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574 .maxlen = sizeof(int),
2575 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002576 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577 },
2578 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002580 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002581 .maxlen = sizeof(int),
2582 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002583 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 },
2585 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002587 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 .maxlen = sizeof(int),
2589 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002590 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002591 },
2592 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002593 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002594 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595 .maxlen = sizeof(int),
2596 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002597 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002598 },
2599 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002600 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002601 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 .maxlen = sizeof(int),
2603 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002604 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605 },
2606 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002608 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002609 .maxlen = sizeof(int),
2610 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002611 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612 },
2613 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002615 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 .maxlen = sizeof(int),
2617 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002618 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002619 },
2620 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002622 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623 .maxlen = sizeof(int),
2624 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002625 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002626 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002627 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628};
2629
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002630struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002631{
2632 struct ctl_table *table;
2633
2634 table = kmemdup(ipv6_route_table_template,
2635 sizeof(ipv6_route_table_template),
2636 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002637
2638 if (table) {
2639 table[0].data = &net->ipv6.sysctl.flush_delay;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002640 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002641 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2642 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2643 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2644 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2645 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2646 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2647 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002648 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002649 }
2650
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002651 return table;
2652}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653#endif
2654
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002655static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002656{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002657 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002658
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002659 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2660 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002661
Eric Dumazetfc66f952010-10-08 06:37:34 +00002662 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2663 goto out_ip6_dst_ops;
2664
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002665 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2666 sizeof(*net->ipv6.ip6_null_entry),
2667 GFP_KERNEL);
2668 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002669 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002670 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002671 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002672 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002673
2674#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2675 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2676 sizeof(*net->ipv6.ip6_prohibit_entry),
2677 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002678 if (!net->ipv6.ip6_prohibit_entry)
2679 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002680 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002681 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002682 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002683
2684 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2685 sizeof(*net->ipv6.ip6_blk_hole_entry),
2686 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002687 if (!net->ipv6.ip6_blk_hole_entry)
2688 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002689 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002690 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002691 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002692#endif
2693
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002694 net->ipv6.sysctl.flush_delay = 0;
2695 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2696 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2697 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2698 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2699 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2700 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2701 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2702
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002703#ifdef CONFIG_PROC_FS
2704 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2705 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2706#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002707 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2708
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002709 ret = 0;
2710out:
2711 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002712
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002713#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2714out_ip6_prohibit_entry:
2715 kfree(net->ipv6.ip6_prohibit_entry);
2716out_ip6_null_entry:
2717 kfree(net->ipv6.ip6_null_entry);
2718#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002719out_ip6_dst_entries:
2720 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002721out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002722 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002723}
2724
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002725static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002726{
2727#ifdef CONFIG_PROC_FS
2728 proc_net_remove(net, "ipv6_route");
2729 proc_net_remove(net, "rt6_stats");
2730#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002731 kfree(net->ipv6.ip6_null_entry);
2732#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2733 kfree(net->ipv6.ip6_prohibit_entry);
2734 kfree(net->ipv6.ip6_blk_hole_entry);
2735#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002736 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002737}
2738
2739static struct pernet_operations ip6_route_net_ops = {
2740 .init = ip6_route_net_init,
2741 .exit = ip6_route_net_exit,
2742};
2743
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002744static struct notifier_block ip6_route_dev_notifier = {
2745 .notifier_call = ip6_route_dev_notify,
2746 .priority = 0,
2747};
2748
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002749int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002751 int ret;
2752
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002753 ret = -ENOMEM;
2754 ip6_dst_ops_template.kmem_cachep =
2755 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2756 SLAB_HWCACHE_ALIGN, NULL);
2757 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002758 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002759
Eric Dumazetfc66f952010-10-08 06:37:34 +00002760 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002761 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002762 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002763
Eric Dumazetfc66f952010-10-08 06:37:34 +00002764 ret = register_pernet_subsys(&ip6_route_net_ops);
2765 if (ret)
2766 goto out_dst_entries;
2767
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002768 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2769
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002770 /* Registering of the loopback is done before this portion of code,
2771 * the loopback reference in rt6_info will not be taken, do it
2772 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002773 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002774 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2775 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002776 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002777 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002778 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002779 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2780 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002781 ret = fib6_init();
2782 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002783 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002784
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002785 ret = xfrm6_init();
2786 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002787 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002788
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002789 ret = fib6_rules_init();
2790 if (ret)
2791 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002792
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002793 ret = -ENOBUFS;
2794 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2795 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2796 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2797 goto fib6_rules_init;
2798
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002799 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002800 if (ret)
2801 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002802
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002803out:
2804 return ret;
2805
2806fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002807 fib6_rules_cleanup();
2808xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002809 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002810out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002811 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002812out_register_subsys:
2813 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002814out_dst_entries:
2815 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002816out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002817 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002818 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002819}
2820
2821void ip6_route_cleanup(void)
2822{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002823 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002824 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002825 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002826 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002827 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002828 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002829 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002830}