blob: 999a982ad3fd7d7abac40211b50320fc4c038109 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Joe Perchesf3213832012-05-15 14:11:53 +000027#define pr_fmt(fmt) "IPv6: " fmt
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040031#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080045#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090046#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020047#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070058#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070059#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
Gao feng1716a962012-04-06 00:13:10 +000067static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
Eric Dumazet21efcfa2011-07-19 20:18:36 +000068 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070069static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080070static unsigned int ip6_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +000071static unsigned int ip6_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070072static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080076static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070077
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080083#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080084static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000085 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
Eric Dumazet95c96172012-04-15 05:58:06 +000087 unsigned int pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080088static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000089 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080091#endif
92
David S. Miller06582542011-01-27 14:58:42 -080093static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94{
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
Yan, Zheng8e2ec632011-09-05 21:34:30 +000099 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
David S. Miller06582542011-01-27 14:58:42 -0800102 if (!rt->rt6i_peer)
103 rt6_bind_peer(rt, 1);
104
105 peer = rt->rt6i_peer;
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124}
125
David S. Miller39232972012-01-26 15:22:32 -0500126static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
127{
128 struct in6_addr *p = &rt->rt6i_gateway;
129
David S. Millera7563f32012-01-26 16:29:16 -0500130 if (!ipv6_addr_any(p))
David S. Miller39232972012-01-26 15:22:32 -0500131 return (const void *) p;
132 return daddr;
133}
134
David S. Millerd3aaeb32011-07-18 00:40:17 -0700135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
David S. Miller39232972012-01-26 15:22:32 -0500137 struct rt6_info *rt = (struct rt6_info *) dst;
138 struct neighbour *n;
139
140 daddr = choose_neigh_daddr(rt, daddr);
141 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500142 if (n)
143 return n;
144 return neigh_create(&nd_tbl, daddr, dst->dev);
145}
146
David S. Miller8ade06c2011-12-29 18:51:57 -0500147static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
David S. Millerf83c7792011-12-28 15:41:23 -0500148{
David S. Miller8ade06c2011-12-29 18:51:57 -0500149 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
150 if (!n) {
151 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
152 if (IS_ERR(n))
153 return PTR_ERR(n);
154 }
David S. Millerf83c7792011-12-28 15:41:23 -0500155 dst_set_neighbour(&rt->dst, n);
156
157 return 0;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700158}
159
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800160static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800162 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 .gc = ip6_dst_gc,
164 .gc_thresh = 1024,
165 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800166 .default_advmss = ip6_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000167 .mtu = ip6_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800168 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 .destroy = ip6_dst_destroy,
170 .ifdown = ip6_dst_ifdown,
171 .negative_advice = ip6_negative_advice,
172 .link_failure = ip6_link_failure,
173 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700174 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700175 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176};
177
Steffen Klassertebb762f2011-11-23 02:12:51 +0000178static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -0800179{
Steffen Klassert618f9bc2011-11-23 02:13:31 +0000180 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
181
182 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -0800183}
184
David S. Miller14e50e52007-05-24 18:17:54 -0700185static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
186{
187}
188
Held Bernhard0972ddb2011-04-24 22:07:32 +0000189static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
190 unsigned long old)
191{
192 return NULL;
193}
194
David S. Miller14e50e52007-05-24 18:17:54 -0700195static struct dst_ops ip6_dst_blackhole_ops = {
196 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800197 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700198 .destroy = ip6_dst_destroy,
199 .check = ip6_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000200 .mtu = ip6_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800201 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700202 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000203 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700204 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700205};
206
David S. Miller62fa8a82011-01-26 20:51:05 -0800207static const u32 ip6_template_metrics[RTAX_MAX] = {
208 [RTAX_HOPLIMIT - 1] = 255,
209};
210
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800211static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700217 .input = ip6_pkt_discard,
218 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700221 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
Thomas Graf101367c2006-08-04 03:39:02 -0700226#ifdef CONFIG_IPV6_MULTIPLE_TABLES
227
David S. Miller6723ab52006-10-18 21:20:57 -0700228static int ip6_pkt_prohibit(struct sk_buff *skb);
229static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700230
Adrian Bunk280a34c2008-04-21 02:29:32 -0700231static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700232 .dst = {
233 .__refcnt = ATOMIC_INIT(1),
234 .__use = 1,
235 .obsolete = -1,
236 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700237 .input = ip6_pkt_prohibit,
238 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700239 },
240 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700241 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700242 .rt6i_metric = ~(u32) 0,
243 .rt6i_ref = ATOMIC_INIT(1),
244};
245
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800246static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700247 .dst = {
248 .__refcnt = ATOMIC_INIT(1),
249 .__use = 1,
250 .obsolete = -1,
251 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700252 .input = dst_discard,
253 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700254 },
255 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700256 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700257 .rt6i_metric = ~(u32) 0,
258 .rt6i_ref = ATOMIC_INIT(1),
259};
260
261#endif
262
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700264static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700265 struct net_device *dev,
266 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267{
David S. Miller957c6652011-06-24 15:25:00 -0700268 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700269
David S. Miller38308472011-12-03 18:02:47 -0500270 if (rt)
Madalin Bucurfbe58182011-09-26 07:04:56 +0000271 memset(&rt->rt6i_table, 0,
David S. Miller38308472011-12-03 18:02:47 -0500272 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700273
274 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275}
276
277static void ip6_dst_destroy(struct dst_entry *dst)
278{
279 struct rt6_info *rt = (struct rt6_info *)dst;
280 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800281 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000283 if (!(rt->dst.flags & DST_HOST))
284 dst_destroy_metrics_generic(dst);
285
David S. Miller38308472011-12-03 18:02:47 -0500286 if (idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287 rt->rt6i_idev = NULL;
288 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900289 }
Gao feng1716a962012-04-06 00:13:10 +0000290
291 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
292 dst_release(dst->from);
293
David S. Millerb3419362010-11-30 12:27:11 -0800294 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800295 rt->rt6i_peer = NULL;
296 inet_putpeer(peer);
297 }
298}
299
David S. Miller6431cbc2011-02-07 20:38:06 -0800300static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
301
302static u32 rt6_peer_genid(void)
303{
304 return atomic_read(&__rt6_peer_genid);
305}
306
David S. Millerb3419362010-11-30 12:27:11 -0800307void rt6_bind_peer(struct rt6_info *rt, int create)
308{
309 struct inet_peer *peer;
310
David S. Millerb3419362010-11-30 12:27:11 -0800311 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
312 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
313 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800314 else
315 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316}
317
318static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
319 int how)
320{
321 struct rt6_info *rt = (struct rt6_info *)dst;
322 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800323 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900324 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325
David S. Miller38308472011-12-03 18:02:47 -0500326 if (dev != loopback_dev && idev && idev->dev == dev) {
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800327 struct inet6_dev *loopback_idev =
328 in6_dev_get(loopback_dev);
David S. Miller38308472011-12-03 18:02:47 -0500329 if (loopback_idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 rt->rt6i_idev = loopback_idev;
331 in6_dev_put(idev);
332 }
333 }
334}
335
Eric Dumazeta50feda2012-05-18 18:57:34 +0000336static bool rt6_check_expired(const struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
Gao feng1716a962012-04-06 00:13:10 +0000338 struct rt6_info *ort = NULL;
339
340 if (rt->rt6i_flags & RTF_EXPIRES) {
341 if (time_after(jiffies, rt->dst.expires))
Eric Dumazeta50feda2012-05-18 18:57:34 +0000342 return true;
Gao feng1716a962012-04-06 00:13:10 +0000343 } else if (rt->dst.from) {
344 ort = (struct rt6_info *) rt->dst.from;
345 return (ort->rt6i_flags & RTF_EXPIRES) &&
346 time_after(jiffies, ort->dst.expires);
347 }
Eric Dumazeta50feda2012-05-18 18:57:34 +0000348 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349}
350
Eric Dumazeta50feda2012-05-18 18:57:34 +0000351static bool rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700352{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000353 return ipv6_addr_type(daddr) &
354 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700355}
356
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700358 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 */
360
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800361static inline struct rt6_info *rt6_device_match(struct net *net,
362 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000363 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700365 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366{
367 struct rt6_info *local = NULL;
368 struct rt6_info *sprt;
369
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900370 if (!oif && ipv6_addr_any(saddr))
371 goto out;
372
Changli Gaod8d1f302010-06-10 23:31:35 -0700373 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -0500374 struct net_device *dev = sprt->dst.dev;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900375
376 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 if (dev->ifindex == oif)
378 return sprt;
379 if (dev->flags & IFF_LOOPBACK) {
David S. Miller38308472011-12-03 18:02:47 -0500380 if (!sprt->rt6i_idev ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700382 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900384 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 local->rt6i_idev->dev->ifindex == oif))
386 continue;
387 }
388 local = sprt;
389 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900390 } else {
391 if (ipv6_chk_addr(net, saddr, dev,
392 flags & RT6_LOOKUP_F_IFACE))
393 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900395 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900397 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 if (local)
399 return local;
400
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700401 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800402 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900404out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 return rt;
406}
407
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800408#ifdef CONFIG_IPV6_ROUTER_PREF
409static void rt6_probe(struct rt6_info *rt)
410{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000411 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800412 /*
413 * Okay, this does not seem to be appropriate
414 * for now, however, we need to check if it
415 * is really so; aka Router Reachability Probing.
416 *
417 * Router Reachability Probe MUST be rate-limited
418 * to no more than one per minute.
419 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000420 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +0000421 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800422 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000423 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800424 read_lock_bh(&neigh->lock);
425 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800426 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800427 struct in6_addr mcaddr;
428 struct in6_addr *target;
429
430 neigh->updated = jiffies;
431 read_unlock_bh(&neigh->lock);
432
433 target = (struct in6_addr *)&neigh->primary_key;
434 addrconf_addr_solict_mult(target, &mcaddr);
David S. Millerd1918542011-12-28 20:19:20 -0500435 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000436 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800437 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000438 }
439out:
440 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800441}
442#else
443static inline void rt6_probe(struct rt6_info *rt)
444{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800445}
446#endif
447
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800449 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700451static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452{
David S. Millerd1918542011-12-28 20:19:20 -0500453 struct net_device *dev = rt->dst.dev;
David S. Miller161980f2007-04-06 11:42:27 -0700454 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800455 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700456 if ((dev->flags & IFF_LOOPBACK) &&
457 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
458 return 1;
459 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460}
461
Dave Jonesb6f99a22007-03-22 12:27:49 -0700462static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000464 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800465 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000466
467 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +0000468 neigh = dst_get_neighbour_noref(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700469 if (rt->rt6i_flags & RTF_NONEXTHOP ||
470 !(rt->rt6i_flags & RTF_GATEWAY))
471 m = 1;
472 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800473 read_lock_bh(&neigh->lock);
474 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700475 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800476#ifdef CONFIG_IPV6_ROUTER_PREF
477 else if (neigh->nud_state & NUD_FAILED)
478 m = 0;
479#endif
480 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800481 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800482 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800483 } else
484 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000485 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800486 return m;
487}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800489static int rt6_score_route(struct rt6_info *rt, int oif,
490 int strict)
491{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700492 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900493
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700494 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700495 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800496 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800497#ifdef CONFIG_IPV6_ROUTER_PREF
498 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
499#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700500 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800501 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800502 return -1;
503 return m;
504}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505
David S. Millerf11e6652007-03-24 20:36:25 -0700506static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
507 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800508{
David S. Millerf11e6652007-03-24 20:36:25 -0700509 int m;
510
511 if (rt6_check_expired(rt))
512 goto out;
513
514 m = rt6_score_route(rt, oif, strict);
515 if (m < 0)
516 goto out;
517
518 if (m > *mpri) {
519 if (strict & RT6_LOOKUP_F_REACHABLE)
520 rt6_probe(match);
521 *mpri = m;
522 match = rt;
523 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
524 rt6_probe(rt);
525 }
526
527out:
528 return match;
529}
530
531static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
532 struct rt6_info *rr_head,
533 u32 metric, int oif, int strict)
534{
535 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800536 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537
David S. Millerf11e6652007-03-24 20:36:25 -0700538 match = NULL;
539 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700540 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700541 match = find_match(rt, oif, strict, &mpri, match);
542 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700543 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700544 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800545
David S. Millerf11e6652007-03-24 20:36:25 -0700546 return match;
547}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800548
David S. Millerf11e6652007-03-24 20:36:25 -0700549static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
550{
551 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800552 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553
David S. Millerf11e6652007-03-24 20:36:25 -0700554 rt0 = fn->rr_ptr;
555 if (!rt0)
556 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557
David S. Millerf11e6652007-03-24 20:36:25 -0700558 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800560 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700561 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700562 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700563
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800564 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700565 if (!next || next->rt6i_metric != rt0->rt6i_metric)
566 next = fn->leaf;
567
568 if (next != rt0)
569 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 }
571
David S. Millerd1918542011-12-28 20:19:20 -0500572 net = dev_net(rt0->dst.dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000573 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574}
575
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800576#ifdef CONFIG_IPV6_ROUTE_INFO
577int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000578 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800579{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900580 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800581 struct route_info *rinfo = (struct route_info *) opt;
582 struct in6_addr prefix_buf, *prefix;
583 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900584 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800585 struct rt6_info *rt;
586
587 if (len < sizeof(struct route_info)) {
588 return -EINVAL;
589 }
590
591 /* Sanity check for prefix_len and length */
592 if (rinfo->length > 3) {
593 return -EINVAL;
594 } else if (rinfo->prefix_len > 128) {
595 return -EINVAL;
596 } else if (rinfo->prefix_len > 64) {
597 if (rinfo->length < 2) {
598 return -EINVAL;
599 }
600 } else if (rinfo->prefix_len > 0) {
601 if (rinfo->length < 1) {
602 return -EINVAL;
603 }
604 }
605
606 pref = rinfo->route_pref;
607 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000608 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800609
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900610 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800611
612 if (rinfo->length == 3)
613 prefix = (struct in6_addr *)rinfo->prefix;
614 else {
615 /* this function is safe */
616 ipv6_addr_prefix(&prefix_buf,
617 (struct in6_addr *)rinfo->prefix,
618 rinfo->prefix_len);
619 prefix = &prefix_buf;
620 }
621
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800622 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
623 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800624
625 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700626 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800627 rt = NULL;
628 }
629
630 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800631 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800632 pref);
633 else if (rt)
634 rt->rt6i_flags = RTF_ROUTEINFO |
635 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
636
637 if (rt) {
Gao feng1716a962012-04-06 00:13:10 +0000638 if (!addrconf_finite_timeout(lifetime))
639 rt6_clean_expires(rt);
640 else
641 rt6_set_expires(rt, jiffies + HZ * lifetime);
642
Changli Gaod8d1f302010-06-10 23:31:35 -0700643 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800644 }
645 return 0;
646}
647#endif
648
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800649#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700650do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800651 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700652 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700653 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700654 if (fn->fn_flags & RTN_TL_ROOT) \
655 goto out; \
656 pn = fn->parent; \
657 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800658 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700659 else \
660 fn = pn; \
661 if (fn->fn_flags & RTN_RTINFO) \
662 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700663 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700664 } \
David S. Miller38308472011-12-03 18:02:47 -0500665} while (0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700666
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800667static struct rt6_info *ip6_pol_route_lookup(struct net *net,
668 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500669 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670{
671 struct fib6_node *fn;
672 struct rt6_info *rt;
673
Thomas Grafc71099a2006-08-04 23:20:06 -0700674 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500675 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700676restart:
677 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500678 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
679 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700680out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700681 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700682 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700683 return rt;
684
685}
686
Florian Westphalea6e5742011-09-05 16:05:44 +0200687struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
688 int flags)
689{
690 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
691}
692EXPORT_SYMBOL_GPL(ip6_route_lookup);
693
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900694struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
695 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700696{
David S. Miller4c9483b2011-03-12 16:22:43 -0500697 struct flowi6 fl6 = {
698 .flowi6_oif = oif,
699 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700700 };
701 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700702 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700703
Thomas Grafadaa70b2006-10-13 15:01:03 -0700704 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500705 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700706 flags |= RT6_LOOKUP_F_HAS_SADDR;
707 }
708
David S. Miller4c9483b2011-03-12 16:22:43 -0500709 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700710 if (dst->error == 0)
711 return (struct rt6_info *) dst;
712
713 dst_release(dst);
714
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 return NULL;
716}
717
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900718EXPORT_SYMBOL(rt6_lookup);
719
Thomas Grafc71099a2006-08-04 23:20:06 -0700720/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721 It takes new route entry, the addition fails by any reason the
722 route is freed. In any case, if caller does not hold it, it may
723 be destroyed.
724 */
725
Thomas Graf86872cb2006-08-22 00:01:08 -0700726static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727{
728 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700729 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
Thomas Grafc71099a2006-08-04 23:20:06 -0700731 table = rt->rt6i_table;
732 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700733 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700734 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
736 return err;
737}
738
Thomas Graf40e22e82006-08-22 00:00:45 -0700739int ip6_ins_rt(struct rt6_info *rt)
740{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800741 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -0500742 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800743 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800744 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700745}
746
Gao feng1716a962012-04-06 00:13:10 +0000747static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000748 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000749 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 struct rt6_info *rt;
752
753 /*
754 * Clone the route.
755 */
756
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000757 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758
759 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800760 int attempts = !in_softirq();
761
David S. Miller38308472011-12-03 18:02:47 -0500762 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
David S. Millerbb3c3682011-12-13 17:35:06 -0500763 if (ort->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000764 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900765 rt->rt6i_flags |= RTF_ANYCAST;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000766 rt->rt6i_gateway = *daddr;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900767 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770
771#ifdef CONFIG_IPV6_SUBTREES
772 if (rt->rt6i_src.plen && saddr) {
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000773 rt->rt6i_src.addr = *saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 rt->rt6i_src.plen = 128;
775 }
776#endif
777
David S. Miller14deae42009-01-04 16:04:39 -0800778 retry:
David S. Miller8ade06c2011-12-29 18:51:57 -0500779 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
David S. Millerd1918542011-12-28 20:19:20 -0500780 struct net *net = dev_net(rt->dst.dev);
David S. Miller14deae42009-01-04 16:04:39 -0800781 int saved_rt_min_interval =
782 net->ipv6.sysctl.ip6_rt_gc_min_interval;
783 int saved_rt_elasticity =
784 net->ipv6.sysctl.ip6_rt_gc_elasticity;
785
786 if (attempts-- > 0) {
787 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
788 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
789
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000790 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800791
792 net->ipv6.sysctl.ip6_rt_gc_elasticity =
793 saved_rt_elasticity;
794 net->ipv6.sysctl.ip6_rt_gc_min_interval =
795 saved_rt_min_interval;
796 goto retry;
797 }
798
Joe Perchesf3213832012-05-15 14:11:53 +0000799 net_warn_ratelimited("Neighbour table overflow\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700800 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800801 return NULL;
802 }
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800803 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800805 return rt;
806}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000808static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
809 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800810{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000811 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
812
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800813 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800814 rt->rt6i_flags |= RTF_CACHE;
David Miller27217452011-12-02 16:52:08 +0000815 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800816 }
817 return rt;
818}
819
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800820static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500821 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822{
823 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800824 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700825 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800827 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700828 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700830 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831
832relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700833 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800835restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500836 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837
838restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700839 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800840
David S. Miller4c9483b2011-03-12 16:22:43 -0500841 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800842 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800843 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800844 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845
Changli Gaod8d1f302010-06-10 23:31:35 -0700846 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700847 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800848
David Miller27217452011-12-02 16:52:08 +0000849 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500850 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800851 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500852 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800853 else
854 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800855
Changli Gaod8d1f302010-06-10 23:31:35 -0700856 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800857 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800858
Changli Gaod8d1f302010-06-10 23:31:35 -0700859 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800860 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700861 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800862 if (!err)
863 goto out2;
864 }
865
866 if (--attempts <= 0)
867 goto out2;
868
869 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700870 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800871 * released someone could insert this route. Relookup.
872 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700873 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800874 goto relookup;
875
876out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800877 if (reachable) {
878 reachable = 0;
879 goto restart_2;
880 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700881 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700882 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700884 rt->dst.lastuse = jiffies;
885 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700886
887 return rt;
888}
889
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800890static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500891 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700892{
David S. Miller4c9483b2011-03-12 16:22:43 -0500893 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700894}
895
Shmulik Ladkani72331bc2012-04-01 04:03:45 +0000896static struct dst_entry *ip6_route_input_lookup(struct net *net,
897 struct net_device *dev,
898 struct flowi6 *fl6, int flags)
899{
900 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
901 flags |= RT6_LOOKUP_F_IFACE;
902
903 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
904}
905
Thomas Grafc71099a2006-08-04 23:20:06 -0700906void ip6_route_input(struct sk_buff *skb)
907{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000908 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900909 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700910 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500911 struct flowi6 fl6 = {
912 .flowi6_iif = skb->dev->ifindex,
913 .daddr = iph->daddr,
914 .saddr = iph->saddr,
David S. Miller38308472011-12-03 18:02:47 -0500915 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
David S. Miller4c9483b2011-03-12 16:22:43 -0500916 .flowi6_mark = skb->mark,
917 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700918 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700919
Shmulik Ladkani72331bc2012-04-01 04:03:45 +0000920 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
Thomas Grafc71099a2006-08-04 23:20:06 -0700921}
922
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800923static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500924 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700925{
David S. Miller4c9483b2011-03-12 16:22:43 -0500926 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700927}
928
Florian Westphal9c7a4f9c2011-03-22 19:17:36 -0700929struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500930 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700931{
932 int flags = 0;
933
David S. Miller4c9483b2011-03-12 16:22:43 -0500934 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700935 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700936
David S. Miller4c9483b2011-03-12 16:22:43 -0500937 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700938 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000939 else if (sk)
940 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700941
David S. Miller4c9483b2011-03-12 16:22:43 -0500942 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943}
944
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900945EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946
David S. Miller2774c132011-03-01 14:59:04 -0800947struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700948{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700949 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700950 struct dst_entry *new = NULL;
951
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700952 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700953 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700954 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
955
Changli Gaod8d1f302010-06-10 23:31:35 -0700956 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700957
David S. Miller14e50e52007-05-24 18:17:54 -0700958 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800959 new->input = dst_discard;
960 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700961
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000962 if (dst_metrics_read_only(&ort->dst))
963 new->_metrics = ort->dst._metrics;
964 else
965 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700966 rt->rt6i_idev = ort->rt6i_idev;
967 if (rt->rt6i_idev)
968 in6_dev_hold(rt->rt6i_idev);
David S. Miller14e50e52007-05-24 18:17:54 -0700969
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000970 rt->rt6i_gateway = ort->rt6i_gateway;
Gao feng1716a962012-04-06 00:13:10 +0000971 rt->rt6i_flags = ort->rt6i_flags;
972 rt6_clean_expires(rt);
David S. Miller14e50e52007-05-24 18:17:54 -0700973 rt->rt6i_metric = 0;
974
975 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
976#ifdef CONFIG_IPV6_SUBTREES
977 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
978#endif
979
980 dst_free(new);
981 }
982
David S. Miller69ead7a2011-03-01 14:45:33 -0800983 dst_release(dst_orig);
984 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700985}
David S. Miller14e50e52007-05-24 18:17:54 -0700986
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987/*
988 * Destination cache support functions
989 */
990
991static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
992{
993 struct rt6_info *rt;
994
995 rt = (struct rt6_info *) dst;
996
David S. Miller6431cbc2011-02-07 20:38:06 -0800997 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
998 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
999 if (!rt->rt6i_peer)
1000 rt6_bind_peer(rt, 0);
1001 rt->rt6i_peer_genid = rt6_peer_genid();
1002 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -08001004 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 return NULL;
1006}
1007
1008static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1009{
1010 struct rt6_info *rt = (struct rt6_info *) dst;
1011
1012 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001013 if (rt->rt6i_flags & RTF_CACHE) {
1014 if (rt6_check_expired(rt)) {
1015 ip6_del_rt(rt);
1016 dst = NULL;
1017 }
1018 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001020 dst = NULL;
1021 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001023 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024}
1025
1026static void ip6_link_failure(struct sk_buff *skb)
1027{
1028 struct rt6_info *rt;
1029
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001030 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
Eric Dumazetadf30902009-06-02 05:19:30 +00001032 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 if (rt) {
Gao feng1716a962012-04-06 00:13:10 +00001034 if (rt->rt6i_flags & RTF_CACHE)
1035 rt6_update_expires(rt, 0);
1036 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 rt->rt6i_node->fn_sernum = -1;
1038 }
1039}
1040
1041static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1042{
1043 struct rt6_info *rt6 = (struct rt6_info*)dst;
1044
1045 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1046 rt6->rt6i_flags |= RTF_MODIFIED;
1047 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001048 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001050 features |= RTAX_FEATURE_ALLFRAG;
1051 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 }
David S. Millerdefb3512010-12-08 21:16:57 -08001053 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 }
1055}
1056
David S. Miller0dbaee32010-12-13 12:52:14 -08001057static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058{
David S. Miller0dbaee32010-12-13 12:52:14 -08001059 struct net_device *dev = dst->dev;
1060 unsigned int mtu = dst_mtu(dst);
1061 struct net *net = dev_net(dev);
1062
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1064
Daniel Lezcano55786892008-03-04 13:47:47 -08001065 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1066 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067
1068 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001069 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1070 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1071 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 * rely only on pmtu discovery"
1073 */
1074 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1075 mtu = IPV6_MAXPLEN;
1076 return mtu;
1077}
1078
Steffen Klassertebb762f2011-11-23 02:12:51 +00001079static unsigned int ip6_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001080{
David S. Millerd33e4552010-12-14 13:01:14 -08001081 struct inet6_dev *idev;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001082 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1083
1084 if (mtu)
1085 return mtu;
1086
1087 mtu = IPV6_MIN_MTU;
David S. Millerd33e4552010-12-14 13:01:14 -08001088
1089 rcu_read_lock();
1090 idev = __in6_dev_get(dst->dev);
1091 if (idev)
1092 mtu = idev->cnf.mtu6;
1093 rcu_read_unlock();
1094
1095 return mtu;
1096}
1097
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001098static struct dst_entry *icmp6_dst_gc_list;
1099static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001100
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001101struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102 struct neighbour *neigh,
David S. Miller87a11572011-12-06 17:04:13 -05001103 struct flowi6 *fl6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104{
David S. Miller87a11572011-12-06 17:04:13 -05001105 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 struct rt6_info *rt;
1107 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001108 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109
David S. Miller38308472011-12-03 18:02:47 -05001110 if (unlikely(!idev))
Eric Dumazet122bdf62012-03-14 21:13:11 +00001111 return ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112
David S. Miller957c6652011-06-24 15:25:00 -07001113 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
David S. Miller38308472011-12-03 18:02:47 -05001114 if (unlikely(!rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 in6_dev_put(idev);
David S. Miller87a11572011-12-06 17:04:13 -05001116 dst = ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 goto out;
1118 }
1119
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 if (neigh)
1121 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001122 else {
David S. Millerf83c7792011-12-28 15:41:23 -05001123 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
David S. Millerb43faac2011-12-13 16:48:21 -05001124 if (IS_ERR(neigh)) {
RongQing.Li252c3d82012-01-12 22:33:46 +00001125 in6_dev_put(idev);
David S. Millerb43faac2011-12-13 16:48:21 -05001126 dst_free(&rt->dst);
1127 return ERR_CAST(neigh);
1128 }
David S. Miller14deae42009-01-04 16:04:39 -08001129 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001131 rt->dst.flags |= DST_HOST;
1132 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001133 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001134 atomic_set(&rt->dst.__refcnt, 1);
David S. Miller87a11572011-12-06 17:04:13 -05001135 rt->rt6i_dst.addr = fl6->daddr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001136 rt->rt6i_dst.plen = 128;
1137 rt->rt6i_idev = idev;
Gao feng70116872011-10-28 02:46:57 +00001138 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001140 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001141 rt->dst.next = icmp6_dst_gc_list;
1142 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001143 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144
Daniel Lezcano55786892008-03-04 13:47:47 -08001145 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146
David S. Miller87a11572011-12-06 17:04:13 -05001147 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1148
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149out:
David S. Miller87a11572011-12-06 17:04:13 -05001150 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151}
1152
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001153int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154{
Hagen Paul Pfeifere9476e952011-02-25 05:45:19 +00001155 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001156 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001158 spin_lock_bh(&icmp6_dst_lock);
1159 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001160
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 while ((dst = *pprev) != NULL) {
1162 if (!atomic_read(&dst->__refcnt)) {
1163 *pprev = dst->next;
1164 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 } else {
1166 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001167 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 }
1169 }
1170
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001171 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001172
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001173 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174}
1175
David S. Miller1e493d12008-09-10 17:27:15 -07001176static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1177 void *arg)
1178{
1179 struct dst_entry *dst, **pprev;
1180
1181 spin_lock_bh(&icmp6_dst_lock);
1182 pprev = &icmp6_dst_gc_list;
1183 while ((dst = *pprev) != NULL) {
1184 struct rt6_info *rt = (struct rt6_info *) dst;
1185 if (func(rt, arg)) {
1186 *pprev = dst->next;
1187 dst_free(dst);
1188 } else {
1189 pprev = &dst->next;
1190 }
1191 }
1192 spin_unlock_bh(&icmp6_dst_lock);
1193}
1194
Daniel Lezcano569d3642008-01-18 03:56:57 -08001195static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001198 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001199 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1200 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1201 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1202 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1203 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001204 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205
Eric Dumazetfc66f952010-10-08 06:37:34 +00001206 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001207 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001208 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 goto out;
1210
Benjamin Thery6891a342008-03-04 13:49:47 -08001211 net->ipv6.ip6_rt_gc_expire++;
1212 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1213 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001214 entries = dst_entries_get_slow(ops);
1215 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001216 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001218 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001219 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220}
1221
1222/* Clean host part of a prefix. Not necessary in radix tree,
1223 but results in cleaner routing tables.
1224
1225 Remove it only when all the things will work!
1226 */
1227
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001228int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229{
David S. Miller5170ae82010-12-12 21:35:57 -08001230 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001231 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001232 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001233 struct inet6_dev *idev;
1234
1235 rcu_read_lock();
1236 idev = __in6_dev_get(dev);
1237 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001238 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001239 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001240 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001241 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 }
1243 return hoplimit;
1244}
David S. Millerabbf46a2010-12-12 21:14:46 -08001245EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246
1247/*
1248 *
1249 */
1250
Thomas Graf86872cb2006-08-22 00:01:08 -07001251int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252{
1253 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001254 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 struct rt6_info *rt = NULL;
1256 struct net_device *dev = NULL;
1257 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001258 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 int addr_type;
1260
Thomas Graf86872cb2006-08-22 00:01:08 -07001261 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 return -EINVAL;
1263#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001264 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 return -EINVAL;
1266#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001267 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001269 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 if (!dev)
1271 goto out;
1272 idev = in6_dev_get(dev);
1273 if (!idev)
1274 goto out;
1275 }
1276
Thomas Graf86872cb2006-08-22 00:01:08 -07001277 if (cfg->fc_metric == 0)
1278 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279
Matti Vaittinend71314b2011-11-14 00:14:49 +00001280 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05001281 if (cfg->fc_nlinfo.nlh &&
1282 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00001283 table = fib6_get_table(net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05001284 if (!table) {
Joe Perchesf3213832012-05-15 14:11:53 +00001285 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
Matti Vaittinend71314b2011-11-14 00:14:49 +00001286 table = fib6_new_table(net, cfg->fc_table);
1287 }
1288 } else {
1289 table = fib6_new_table(net, cfg->fc_table);
1290 }
David S. Miller38308472011-12-03 18:02:47 -05001291
1292 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001293 goto out;
Thomas Grafc71099a2006-08-04 23:20:06 -07001294
David S. Miller957c6652011-06-24 15:25:00 -07001295 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296
David S. Miller38308472011-12-03 18:02:47 -05001297 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 err = -ENOMEM;
1299 goto out;
1300 }
1301
Changli Gaod8d1f302010-06-10 23:31:35 -07001302 rt->dst.obsolete = -1;
Gao feng1716a962012-04-06 00:13:10 +00001303
1304 if (cfg->fc_flags & RTF_EXPIRES)
1305 rt6_set_expires(rt, jiffies +
1306 clock_t_to_jiffies(cfg->fc_expires));
1307 else
1308 rt6_clean_expires(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309
Thomas Graf86872cb2006-08-22 00:01:08 -07001310 if (cfg->fc_protocol == RTPROT_UNSPEC)
1311 cfg->fc_protocol = RTPROT_BOOT;
1312 rt->rt6i_protocol = cfg->fc_protocol;
1313
1314 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315
1316 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001317 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001318 else if (cfg->fc_flags & RTF_LOCAL)
1319 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001321 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322
Changli Gaod8d1f302010-06-10 23:31:35 -07001323 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324
Thomas Graf86872cb2006-08-22 00:01:08 -07001325 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1326 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001328 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001330 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1331 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1332 if (!metrics) {
1333 err = -ENOMEM;
1334 goto out;
1335 }
1336 dst_init_metrics(&rt->dst, metrics, 0);
1337 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001339 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1340 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341#endif
1342
Thomas Graf86872cb2006-08-22 00:01:08 -07001343 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344
1345 /* We cannot add true routes via loopback here,
1346 they would result in kernel looping; promote them to reject routes
1347 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001348 if ((cfg->fc_flags & RTF_REJECT) ||
David S. Miller38308472011-12-03 18:02:47 -05001349 (dev && (dev->flags & IFF_LOOPBACK) &&
1350 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1351 !(cfg->fc_flags & RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001353 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 if (dev) {
1355 dev_put(dev);
1356 in6_dev_put(idev);
1357 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001358 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 dev_hold(dev);
1360 idev = in6_dev_get(dev);
1361 if (!idev) {
1362 err = -ENODEV;
1363 goto out;
1364 }
1365 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001366 rt->dst.output = ip6_pkt_discard_out;
1367 rt->dst.input = ip6_pkt_discard;
1368 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1370 goto install_route;
1371 }
1372
Thomas Graf86872cb2006-08-22 00:01:08 -07001373 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001374 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 int gwa_type;
1376
Thomas Graf86872cb2006-08-22 00:01:08 -07001377 gw_addr = &cfg->fc_gateway;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001378 rt->rt6i_gateway = *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 gwa_type = ipv6_addr_type(gw_addr);
1380
1381 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1382 struct rt6_info *grt;
1383
1384 /* IPv6 strictly inhibits using not link-local
1385 addresses as nexthop address.
1386 Otherwise, router will not able to send redirects.
1387 It is very good, but in some (rare!) circumstances
1388 (SIT, PtP, NBMA NOARP links) it is handy to allow
1389 some exceptions. --ANK
1390 */
1391 err = -EINVAL;
David S. Miller38308472011-12-03 18:02:47 -05001392 if (!(gwa_type & IPV6_ADDR_UNICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 goto out;
1394
Daniel Lezcano55786892008-03-04 13:47:47 -08001395 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396
1397 err = -EHOSTUNREACH;
David S. Miller38308472011-12-03 18:02:47 -05001398 if (!grt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 goto out;
1400 if (dev) {
David S. Millerd1918542011-12-28 20:19:20 -05001401 if (dev != grt->dst.dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001402 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 goto out;
1404 }
1405 } else {
David S. Millerd1918542011-12-28 20:19:20 -05001406 dev = grt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 idev = grt->rt6i_idev;
1408 dev_hold(dev);
1409 in6_dev_hold(grt->rt6i_idev);
1410 }
David S. Miller38308472011-12-03 18:02:47 -05001411 if (!(grt->rt6i_flags & RTF_GATEWAY))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001413 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415 if (err)
1416 goto out;
1417 }
1418 err = -EINVAL;
David S. Miller38308472011-12-03 18:02:47 -05001419 if (!dev || (dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 goto out;
1421 }
1422
1423 err = -ENODEV;
David S. Miller38308472011-12-03 18:02:47 -05001424 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425 goto out;
1426
Daniel Walterc3968a82011-04-13 21:10:57 +00001427 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1428 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1429 err = -EINVAL;
1430 goto out;
1431 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001432 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00001433 rt->rt6i_prefsrc.plen = 128;
1434 } else
1435 rt->rt6i_prefsrc.plen = 0;
1436
Thomas Graf86872cb2006-08-22 00:01:08 -07001437 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller8ade06c2011-12-29 18:51:57 -05001438 err = rt6_bind_neighbour(rt, dev);
David S. Millerf83c7792011-12-28 15:41:23 -05001439 if (err)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 }
1442
Thomas Graf86872cb2006-08-22 00:01:08 -07001443 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444
1445install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001446 if (cfg->fc_mx) {
1447 struct nlattr *nla;
1448 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449
Thomas Graf86872cb2006-08-22 00:01:08 -07001450 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001451 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001452
1453 if (type) {
1454 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 err = -EINVAL;
1456 goto out;
1457 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001458
David S. Millerdefb3512010-12-08 21:16:57 -08001459 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461 }
1462 }
1463
Changli Gaod8d1f302010-06-10 23:31:35 -07001464 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001466 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001467
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001468 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001469
Thomas Graf86872cb2006-08-22 00:01:08 -07001470 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471
1472out:
1473 if (dev)
1474 dev_put(dev);
1475 if (idev)
1476 in6_dev_put(idev);
1477 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001478 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 return err;
1480}
1481
Thomas Graf86872cb2006-08-22 00:01:08 -07001482static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483{
1484 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001485 struct fib6_table *table;
David S. Millerd1918542011-12-28 20:19:20 -05001486 struct net *net = dev_net(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001488 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001489 return -ENOENT;
1490
Thomas Grafc71099a2006-08-04 23:20:06 -07001491 table = rt->rt6i_table;
1492 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493
Thomas Graf86872cb2006-08-22 00:01:08 -07001494 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001495 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496
Thomas Grafc71099a2006-08-04 23:20:06 -07001497 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498
1499 return err;
1500}
1501
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001502int ip6_del_rt(struct rt6_info *rt)
1503{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001504 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -05001505 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001506 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001507 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001508}
1509
Thomas Graf86872cb2006-08-22 00:01:08 -07001510static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511{
Thomas Grafc71099a2006-08-04 23:20:06 -07001512 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 struct fib6_node *fn;
1514 struct rt6_info *rt;
1515 int err = -ESRCH;
1516
Daniel Lezcano55786892008-03-04 13:47:47 -08001517 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05001518 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001519 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520
Thomas Grafc71099a2006-08-04 23:20:06 -07001521 read_lock_bh(&table->tb6_lock);
1522
1523 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001524 &cfg->fc_dst, cfg->fc_dst_len,
1525 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001526
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001528 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001529 if (cfg->fc_ifindex &&
David S. Millerd1918542011-12-28 20:19:20 -05001530 (!rt->dst.dev ||
1531 rt->dst.dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001533 if (cfg->fc_flags & RTF_GATEWAY &&
1534 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001536 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001538 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001539 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540
Thomas Graf86872cb2006-08-22 00:01:08 -07001541 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 }
1543 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001544 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545
1546 return err;
1547}
1548
1549/*
1550 * Handle redirects
1551 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001552struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001553 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001554 struct in6_addr gateway;
1555};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001557static struct rt6_info *__ip6_route_redirect(struct net *net,
1558 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001559 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001560 int flags)
1561{
David S. Miller4c9483b2011-03-12 16:22:43 -05001562 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001563 struct rt6_info *rt;
1564 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001565
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001567 * Get the "current" route for this destination and
1568 * check if the redirect has come from approriate router.
1569 *
1570 * RFC 2461 specifies that redirects should only be
1571 * accepted if they come from the nexthop to the target.
1572 * Due to the way the routes are chosen, this notion
1573 * is a bit fuzzy and one might need to check all possible
1574 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
Thomas Grafc71099a2006-08-04 23:20:06 -07001577 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001578 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001579restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001580 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001581 /*
1582 * Current route is on-link; redirect is always invalid.
1583 *
1584 * Seems, previous statement is not true. It could
1585 * be node, which looks for us as on-link (f.e. proxy ndisc)
1586 * But then router serving it might decide, that we should
1587 * know truth 8)8) --ANK (980726).
1588 */
1589 if (rt6_check_expired(rt))
1590 continue;
1591 if (!(rt->rt6i_flags & RTF_GATEWAY))
1592 continue;
David S. Millerd1918542011-12-28 20:19:20 -05001593 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001594 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001595 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001596 continue;
1597 break;
1598 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001599
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001600 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001601 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001602 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001603out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001604 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001605
1606 read_unlock_bh(&table->tb6_lock);
1607
1608 return rt;
1609};
1610
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001611static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1612 const struct in6_addr *src,
1613 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001614 struct net_device *dev)
1615{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001616 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001617 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001618 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001619 .fl6 = {
1620 .flowi6_oif = dev->ifindex,
1621 .daddr = *dest,
1622 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001623 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001624 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001625
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001626 rdfl.gateway = *gateway;
Brian Haley86c36ce2009-10-07 13:58:01 -07001627
Thomas Grafadaa70b2006-10-13 15:01:03 -07001628 if (rt6_need_strict(dest))
1629 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001630
David S. Miller4c9483b2011-03-12 16:22:43 -05001631 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001632 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001633}
1634
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001635void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1636 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001637 struct neighbour *neigh, u8 *lladdr, int on_link)
1638{
1639 struct rt6_info *rt, *nrt = NULL;
1640 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001641 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001642
1643 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1644
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001645 if (rt == net->ipv6.ip6_null_entry) {
Joe Perchese87cc472012-05-13 21:56:26 +00001646 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001647 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 }
1649
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 /*
1651 * We have finally decided to accept it.
1652 */
1653
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001654 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1656 NEIGH_UPDATE_F_OVERRIDE|
1657 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1658 NEIGH_UPDATE_F_ISROUTER))
1659 );
1660
1661 /*
1662 * Redirect received -> path was valid.
1663 * Look, redirects are sent only in response to data packets,
1664 * so that this nexthop apparently is reachable. --ANK
1665 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001666 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667
1668 /* Duplicate redirect: silently ignore. */
David Miller27217452011-12-02 16:52:08 +00001669 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 goto out;
1671
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001672 nrt = ip6_rt_copy(rt, dest);
David S. Miller38308472011-12-03 18:02:47 -05001673 if (!nrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 goto out;
1675
1676 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1677 if (on_link)
1678 nrt->rt6i_flags &= ~RTF_GATEWAY;
1679
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001680 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
David S. Miller69cce1d2011-07-17 23:09:49 -07001681 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682
Thomas Graf40e22e82006-08-22 00:00:45 -07001683 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 goto out;
1685
Changli Gaod8d1f302010-06-10 23:31:35 -07001686 netevent.old = &rt->dst;
1687 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001688 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1689
David S. Miller38308472011-12-03 18:02:47 -05001690 if (rt->rt6i_flags & RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001691 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 return;
1693 }
1694
1695out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001696 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697}
1698
1699/*
1700 * Handle ICMP "packet too big" messages
1701 * i.e. Path MTU discovery
1702 */
1703
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001704static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001705 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706{
1707 struct rt6_info *rt, *nrt;
1708 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001709again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001710 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
David S. Miller38308472011-12-03 18:02:47 -05001711 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 return;
1713
Andrey Vagind3052b52010-12-11 15:20:11 +00001714 if (rt6_check_expired(rt)) {
1715 ip6_del_rt(rt);
1716 goto again;
1717 }
1718
Changli Gaod8d1f302010-06-10 23:31:35 -07001719 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 goto out;
1721
1722 if (pmtu < IPV6_MIN_MTU) {
1723 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001724 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 * MTU (1280) and a fragment header should always be included
1726 * after a node receiving Too Big message reporting PMTU is
1727 * less than the IPv6 Minimum Link MTU.
1728 */
1729 pmtu = IPV6_MIN_MTU;
1730 allfrag = 1;
1731 }
1732
1733 /* New mtu received -> path was valid.
1734 They are sent only in response to data packets,
1735 so that this nexthop apparently is reachable. --ANK
1736 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001737 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738
1739 /* Host route. If it is static, it would be better
1740 not to override it, but add new one, so that
1741 when cache entry will expire old pmtu
1742 would return automatically.
1743 */
1744 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001745 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1746 if (allfrag) {
1747 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1748 features |= RTAX_FEATURE_ALLFRAG;
1749 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1750 }
Gao feng1716a962012-04-06 00:13:10 +00001751 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1752 rt->rt6i_flags |= RTF_MODIFIED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 goto out;
1754 }
1755
1756 /* Network route.
1757 Two cases are possible:
1758 1. It is connected route. Action: COW
1759 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1760 */
David Miller27217452011-12-02 16:52:08 +00001761 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001762 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001763 else
1764 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001765
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001766 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001767 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1768 if (allfrag) {
1769 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1770 features |= RTAX_FEATURE_ALLFRAG;
1771 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1772 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001773
1774 /* According to RFC 1981, detecting PMTU increase shouldn't be
1775 * happened within 5 mins, the recommended timer is 10 mins.
1776 * Here this route expiration time is set to ip6_rt_mtu_expires
1777 * which is 10 mins. After 10 mins the decreased pmtu is expired
1778 * and detecting PMTU increase will be automatically happened.
1779 */
Gao feng1716a962012-04-06 00:13:10 +00001780 rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1781 nrt->rt6i_flags |= RTF_DYNAMIC;
Thomas Graf40e22e82006-08-22 00:00:45 -07001782 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001785 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786}
1787
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001788void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001789 struct net_device *dev, u32 pmtu)
1790{
1791 struct net *net = dev_net(dev);
1792
1793 /*
1794 * RFC 1981 states that a node "MUST reduce the size of the packets it
1795 * is sending along the path" that caused the Packet Too Big message.
1796 * Since it's not possible in the general case to determine which
1797 * interface was used to send the original packet, we update the MTU
1798 * on the interface that will be used to send future packets. We also
1799 * update the MTU on the interface that received the Packet Too Big in
1800 * case the original packet was forced out that interface with
1801 * SO_BINDTODEVICE or similar. This is the next best thing to the
1802 * correct behaviour, which would be to update the MTU on all
1803 * interfaces.
1804 */
1805 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1806 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1807}
1808
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809/*
1810 * Misc support functions
1811 */
1812
Gao feng1716a962012-04-06 00:13:10 +00001813static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001814 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815{
David S. Millerd1918542011-12-28 20:19:20 -05001816 struct net *net = dev_net(ort->dst.dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001817 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001818 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819
1820 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001821 rt->dst.input = ort->dst.input;
1822 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001823 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001825 rt->rt6i_dst.addr = *dest;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001826 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001827 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001828 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 rt->rt6i_idev = ort->rt6i_idev;
1830 if (rt->rt6i_idev)
1831 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001832 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001834 rt->rt6i_gateway = ort->rt6i_gateway;
Gao feng1716a962012-04-06 00:13:10 +00001835 rt->rt6i_flags = ort->rt6i_flags;
1836 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1837 (RTF_DEFAULT | RTF_ADDRCONF))
1838 rt6_set_from(rt, ort);
1839 else
1840 rt6_clean_expires(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001841 rt->rt6i_metric = 0;
1842
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843#ifdef CONFIG_IPV6_SUBTREES
1844 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1845#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001846 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001847 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 }
1849 return rt;
1850}
1851
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001852#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001853static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001854 const struct in6_addr *prefix, int prefixlen,
1855 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001856{
1857 struct fib6_node *fn;
1858 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001859 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001860
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001861 table = fib6_get_table(net, RT6_TABLE_INFO);
David S. Miller38308472011-12-03 18:02:47 -05001862 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001863 return NULL;
1864
1865 write_lock_bh(&table->tb6_lock);
1866 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001867 if (!fn)
1868 goto out;
1869
Changli Gaod8d1f302010-06-10 23:31:35 -07001870 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05001871 if (rt->dst.dev->ifindex != ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001872 continue;
1873 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1874 continue;
1875 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1876 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001877 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001878 break;
1879 }
1880out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001881 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001882 return rt;
1883}
1884
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001885static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001886 const struct in6_addr *prefix, int prefixlen,
1887 const struct in6_addr *gwaddr, int ifindex,
Eric Dumazet95c96172012-04-15 05:58:06 +00001888 unsigned int pref)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001889{
Thomas Graf86872cb2006-08-22 00:01:08 -07001890 struct fib6_config cfg = {
1891 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001892 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001893 .fc_ifindex = ifindex,
1894 .fc_dst_len = prefixlen,
1895 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1896 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001897 .fc_nlinfo.pid = 0,
1898 .fc_nlinfo.nlh = NULL,
1899 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001900 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001901
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001902 cfg.fc_dst = *prefix;
1903 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07001904
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001905 /* We should treat it as a default route if prefix length is 0. */
1906 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001907 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001908
Thomas Graf86872cb2006-08-22 00:01:08 -07001909 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001910
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001911 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001912}
1913#endif
1914
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001915struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001916{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001918 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001920 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
David S. Miller38308472011-12-03 18:02:47 -05001921 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001922 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923
Thomas Grafc71099a2006-08-04 23:20:06 -07001924 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001925 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05001926 if (dev == rt->dst.dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001927 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1929 break;
1930 }
1931 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001932 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001933 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 return rt;
1935}
1936
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001937struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001938 struct net_device *dev,
1939 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940{
Thomas Graf86872cb2006-08-22 00:01:08 -07001941 struct fib6_config cfg = {
1942 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001943 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001944 .fc_ifindex = dev->ifindex,
1945 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1946 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001947 .fc_nlinfo.pid = 0,
1948 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001949 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001950 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001952 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
Thomas Graf86872cb2006-08-22 00:01:08 -07001954 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956 return rt6_get_dflt_router(gwaddr, dev);
1957}
1958
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001959void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960{
1961 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001962 struct fib6_table *table;
1963
1964 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001965 table = fib6_get_table(net, RT6_TABLE_DFLT);
David S. Miller38308472011-12-03 18:02:47 -05001966 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001967 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968
1969restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001970 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001971 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001973 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001974 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001975 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976 goto restart;
1977 }
1978 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001979 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980}
1981
Daniel Lezcano55786892008-03-04 13:47:47 -08001982static void rtmsg_to_fib6_config(struct net *net,
1983 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001984 struct fib6_config *cfg)
1985{
1986 memset(cfg, 0, sizeof(*cfg));
1987
1988 cfg->fc_table = RT6_TABLE_MAIN;
1989 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1990 cfg->fc_metric = rtmsg->rtmsg_metric;
1991 cfg->fc_expires = rtmsg->rtmsg_info;
1992 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1993 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1994 cfg->fc_flags = rtmsg->rtmsg_flags;
1995
Daniel Lezcano55786892008-03-04 13:47:47 -08001996 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001997
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001998 cfg->fc_dst = rtmsg->rtmsg_dst;
1999 cfg->fc_src = rtmsg->rtmsg_src;
2000 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07002001}
2002
Daniel Lezcano55786892008-03-04 13:47:47 -08002003int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004{
Thomas Graf86872cb2006-08-22 00:01:08 -07002005 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 struct in6_rtmsg rtmsg;
2007 int err;
2008
2009 switch(cmd) {
2010 case SIOCADDRT: /* Add a route */
2011 case SIOCDELRT: /* Delete a route */
2012 if (!capable(CAP_NET_ADMIN))
2013 return -EPERM;
2014 err = copy_from_user(&rtmsg, arg,
2015 sizeof(struct in6_rtmsg));
2016 if (err)
2017 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07002018
Daniel Lezcano55786892008-03-04 13:47:47 -08002019 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07002020
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021 rtnl_lock();
2022 switch (cmd) {
2023 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07002024 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025 break;
2026 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07002027 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002028 break;
2029 default:
2030 err = -EINVAL;
2031 }
2032 rtnl_unlock();
2033
2034 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07002035 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036
2037 return -EINVAL;
2038}
2039
2040/*
2041 * Drop the packet on the floor
2042 */
2043
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07002044static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002046 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00002047 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002048 switch (ipstats_mib_noroutes) {
2049 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07002050 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00002051 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002052 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2053 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002054 break;
2055 }
2056 /* FALLTHROUGH */
2057 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002058 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2059 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002060 break;
2061 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002062 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063 kfree_skb(skb);
2064 return 0;
2065}
2066
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002067static int ip6_pkt_discard(struct sk_buff *skb)
2068{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002069 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002070}
2071
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002072static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073{
Eric Dumazetadf30902009-06-02 05:19:30 +00002074 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002075 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076}
2077
David S. Miller6723ab52006-10-18 21:20:57 -07002078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2079
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002080static int ip6_pkt_prohibit(struct sk_buff *skb)
2081{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002082 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002083}
2084
2085static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2086{
Eric Dumazetadf30902009-06-02 05:19:30 +00002087 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002088 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002089}
2090
David S. Miller6723ab52006-10-18 21:20:57 -07002091#endif
2092
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093/*
2094 * Allocate a dst for local (unicast / anycast) address.
2095 */
2096
2097struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2098 const struct in6_addr *addr,
David S. Miller8f031512011-12-06 16:48:14 -05002099 bool anycast)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002101 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002102 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002103 net->loopback_dev, 0);
David S. Millerf83c7792011-12-28 15:41:23 -05002104 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105
David S. Miller38308472011-12-03 18:02:47 -05002106 if (!rt) {
Joe Perchesf3213832012-05-15 14:11:53 +00002107 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002109 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 in6_dev_hold(idev);
2112
David S. Miller11d53b42011-06-24 15:23:34 -07002113 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002114 rt->dst.input = ip6_input;
2115 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002117 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118
2119 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002120 if (anycast)
2121 rt->rt6i_flags |= RTF_ANYCAST;
2122 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller8ade06c2011-12-29 18:51:57 -05002124 err = rt6_bind_neighbour(rt, rt->dst.dev);
David S. Millerf83c7792011-12-28 15:41:23 -05002125 if (err) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002126 dst_free(&rt->dst);
David S. Millerf83c7792011-12-28 15:41:23 -05002127 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002128 }
2129
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002130 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002132 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133
Changli Gaod8d1f302010-06-10 23:31:35 -07002134 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135
2136 return rt;
2137}
2138
Daniel Walterc3968a82011-04-13 21:10:57 +00002139int ip6_route_get_saddr(struct net *net,
2140 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002141 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002142 unsigned int prefs,
2143 struct in6_addr *saddr)
2144{
2145 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2146 int err = 0;
2147 if (rt->rt6i_prefsrc.plen)
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002148 *saddr = rt->rt6i_prefsrc.addr;
Daniel Walterc3968a82011-04-13 21:10:57 +00002149 else
2150 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2151 daddr, prefs, saddr);
2152 return err;
2153}
2154
2155/* remove deleted ip from prefsrc entries */
2156struct arg_dev_net_ip {
2157 struct net_device *dev;
2158 struct net *net;
2159 struct in6_addr *addr;
2160};
2161
2162static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2163{
2164 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2165 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2166 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2167
David S. Millerd1918542011-12-28 20:19:20 -05002168 if (((void *)rt->dst.dev == dev || !dev) &&
Daniel Walterc3968a82011-04-13 21:10:57 +00002169 rt != net->ipv6.ip6_null_entry &&
2170 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2171 /* remove prefsrc entry */
2172 rt->rt6i_prefsrc.plen = 0;
2173 }
2174 return 0;
2175}
2176
2177void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2178{
2179 struct net *net = dev_net(ifp->idev->dev);
2180 struct arg_dev_net_ip adni = {
2181 .dev = ifp->idev->dev,
2182 .net = net,
2183 .addr = &ifp->addr,
2184 };
2185 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2186}
2187
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002188struct arg_dev_net {
2189 struct net_device *dev;
2190 struct net *net;
2191};
2192
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193static int fib6_ifdown(struct rt6_info *rt, void *arg)
2194{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002195 const struct arg_dev_net *adn = arg;
2196 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002197
David S. Millerd1918542011-12-28 20:19:20 -05002198 if ((rt->dst.dev == dev || !dev) &&
David S. Millerc159d302011-12-26 15:24:36 -05002199 rt != adn->net->ipv6.ip6_null_entry)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002200 return -1;
David S. Millerc159d302011-12-26 15:24:36 -05002201
Linus Torvalds1da177e2005-04-16 15:20:36 -07002202 return 0;
2203}
2204
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002205void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002207 struct arg_dev_net adn = {
2208 .dev = dev,
2209 .net = net,
2210 };
2211
2212 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002213 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214}
2215
Eric Dumazet95c96172012-04-15 05:58:06 +00002216struct rt6_mtu_change_arg {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217 struct net_device *dev;
Eric Dumazet95c96172012-04-15 05:58:06 +00002218 unsigned int mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219};
2220
2221static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2222{
2223 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2224 struct inet6_dev *idev;
2225
2226 /* In IPv6 pmtu discovery is not optional,
2227 so that RTAX_MTU lock cannot disable it.
2228 We still use this lock to block changes
2229 caused by addrconf/ndisc.
2230 */
2231
2232 idev = __in6_dev_get(arg->dev);
David S. Miller38308472011-12-03 18:02:47 -05002233 if (!idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 return 0;
2235
2236 /* For administrative MTU increase, there is no way to discover
2237 IPv6 PMTU increase, so PMTU increase should be updated here.
2238 Since RFC 1981 doesn't include administrative MTU increase
2239 update PMTU increase is a MUST. (i.e. jumbo frame)
2240 */
2241 /*
2242 If new MTU is less than route PMTU, this new MTU will be the
2243 lowest MTU in the path, update the route PMTU to reflect PMTU
2244 decreases; if new MTU is greater than route PMTU, and the
2245 old MTU is the lowest MTU in the path, update the route PMTU
2246 to reflect the increase. In this case if the other nodes' MTU
2247 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2248 PMTU discouvery.
2249 */
David S. Millerd1918542011-12-28 20:19:20 -05002250 if (rt->dst.dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002251 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2252 (dst_mtu(&rt->dst) >= arg->mtu ||
2253 (dst_mtu(&rt->dst) < arg->mtu &&
2254 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002255 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002256 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 return 0;
2258}
2259
Eric Dumazet95c96172012-04-15 05:58:06 +00002260void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261{
Thomas Grafc71099a2006-08-04 23:20:06 -07002262 struct rt6_mtu_change_arg arg = {
2263 .dev = dev,
2264 .mtu = mtu,
2265 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002267 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268}
2269
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002270static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002271 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002272 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002273 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002274 [RTA_PRIORITY] = { .type = NLA_U32 },
2275 [RTA_METRICS] = { .type = NLA_NESTED },
2276};
2277
2278static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2279 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002280{
Thomas Graf86872cb2006-08-22 00:01:08 -07002281 struct rtmsg *rtm;
2282 struct nlattr *tb[RTA_MAX+1];
2283 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284
Thomas Graf86872cb2006-08-22 00:01:08 -07002285 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2286 if (err < 0)
2287 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288
Thomas Graf86872cb2006-08-22 00:01:08 -07002289 err = -EINVAL;
2290 rtm = nlmsg_data(nlh);
2291 memset(cfg, 0, sizeof(*cfg));
2292
2293 cfg->fc_table = rtm->rtm_table;
2294 cfg->fc_dst_len = rtm->rtm_dst_len;
2295 cfg->fc_src_len = rtm->rtm_src_len;
2296 cfg->fc_flags = RTF_UP;
2297 cfg->fc_protocol = rtm->rtm_protocol;
2298
2299 if (rtm->rtm_type == RTN_UNREACHABLE)
2300 cfg->fc_flags |= RTF_REJECT;
2301
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002302 if (rtm->rtm_type == RTN_LOCAL)
2303 cfg->fc_flags |= RTF_LOCAL;
2304
Thomas Graf86872cb2006-08-22 00:01:08 -07002305 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2306 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002307 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002308
2309 if (tb[RTA_GATEWAY]) {
2310 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2311 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002313
2314 if (tb[RTA_DST]) {
2315 int plen = (rtm->rtm_dst_len + 7) >> 3;
2316
2317 if (nla_len(tb[RTA_DST]) < plen)
2318 goto errout;
2319
2320 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002322
2323 if (tb[RTA_SRC]) {
2324 int plen = (rtm->rtm_src_len + 7) >> 3;
2325
2326 if (nla_len(tb[RTA_SRC]) < plen)
2327 goto errout;
2328
2329 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002331
Daniel Walterc3968a82011-04-13 21:10:57 +00002332 if (tb[RTA_PREFSRC])
2333 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2334
Thomas Graf86872cb2006-08-22 00:01:08 -07002335 if (tb[RTA_OIF])
2336 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2337
2338 if (tb[RTA_PRIORITY])
2339 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2340
2341 if (tb[RTA_METRICS]) {
2342 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2343 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002344 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002345
2346 if (tb[RTA_TABLE])
2347 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2348
2349 err = 0;
2350errout:
2351 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352}
2353
Thomas Grafc127ea22007-03-22 11:58:32 -07002354static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355{
Thomas Graf86872cb2006-08-22 00:01:08 -07002356 struct fib6_config cfg;
2357 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358
Thomas Graf86872cb2006-08-22 00:01:08 -07002359 err = rtm_to_fib6_config(skb, nlh, &cfg);
2360 if (err < 0)
2361 return err;
2362
2363 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364}
2365
Thomas Grafc127ea22007-03-22 11:58:32 -07002366static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367{
Thomas Graf86872cb2006-08-22 00:01:08 -07002368 struct fib6_config cfg;
2369 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370
Thomas Graf86872cb2006-08-22 00:01:08 -07002371 err = rtm_to_fib6_config(skb, nlh, &cfg);
2372 if (err < 0)
2373 return err;
2374
2375 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376}
2377
Thomas Graf339bf982006-11-10 14:10:15 -08002378static inline size_t rt6_nlmsg_size(void)
2379{
2380 return NLMSG_ALIGN(sizeof(struct rtmsg))
2381 + nla_total_size(16) /* RTA_SRC */
2382 + nla_total_size(16) /* RTA_DST */
2383 + nla_total_size(16) /* RTA_GATEWAY */
2384 + nla_total_size(16) /* RTA_PREFSRC */
2385 + nla_total_size(4) /* RTA_TABLE */
2386 + nla_total_size(4) /* RTA_IIF */
2387 + nla_total_size(4) /* RTA_OIF */
2388 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002389 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002390 + nla_total_size(sizeof(struct rta_cacheinfo));
2391}
2392
Brian Haley191cd582008-08-14 15:33:21 -07002393static int rt6_fill_node(struct net *net,
2394 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002395 struct in6_addr *dst, struct in6_addr *src,
2396 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002397 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398{
David S. Miller346f8702011-12-29 15:22:33 -05002399 const struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002401 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002402 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002403 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002404 struct neighbour *n;
David S. Miller346f8702011-12-29 15:22:33 -05002405 u32 ts, tsage;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406
2407 if (prefix) { /* user wants prefix routes only */
2408 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2409 /* success since this is not a prefix route */
2410 return 1;
2411 }
2412 }
2413
Thomas Graf2d7202b2006-08-22 00:01:27 -07002414 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
David S. Miller38308472011-12-03 18:02:47 -05002415 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002416 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002417
2418 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 rtm->rtm_family = AF_INET6;
2420 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2421 rtm->rtm_src_len = rt->rt6i_src.plen;
2422 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002423 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002424 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002425 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002426 table = RT6_TABLE_UNSPEC;
2427 rtm->rtm_table = table;
David S. Millerc78679e2012-04-01 20:27:33 -04002428 if (nla_put_u32(skb, RTA_TABLE, table))
2429 goto nla_put_failure;
David S. Miller38308472011-12-03 18:02:47 -05002430 if (rt->rt6i_flags & RTF_REJECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 rtm->rtm_type = RTN_UNREACHABLE;
David S. Miller38308472011-12-03 18:02:47 -05002432 else if (rt->rt6i_flags & RTF_LOCAL)
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002433 rtm->rtm_type = RTN_LOCAL;
David S. Millerd1918542011-12-28 20:19:20 -05002434 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435 rtm->rtm_type = RTN_LOCAL;
2436 else
2437 rtm->rtm_type = RTN_UNICAST;
2438 rtm->rtm_flags = 0;
2439 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2440 rtm->rtm_protocol = rt->rt6i_protocol;
David S. Miller38308472011-12-03 18:02:47 -05002441 if (rt->rt6i_flags & RTF_DYNAMIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442 rtm->rtm_protocol = RTPROT_REDIRECT;
2443 else if (rt->rt6i_flags & RTF_ADDRCONF)
2444 rtm->rtm_protocol = RTPROT_KERNEL;
David S. Miller38308472011-12-03 18:02:47 -05002445 else if (rt->rt6i_flags & RTF_DEFAULT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 rtm->rtm_protocol = RTPROT_RA;
2447
David S. Miller38308472011-12-03 18:02:47 -05002448 if (rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449 rtm->rtm_flags |= RTM_F_CLONED;
2450
2451 if (dst) {
David S. Millerc78679e2012-04-01 20:27:33 -04002452 if (nla_put(skb, RTA_DST, 16, dst))
2453 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002454 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 } else if (rtm->rtm_dst_len)
David S. Millerc78679e2012-04-01 20:27:33 -04002456 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2457 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002458#ifdef CONFIG_IPV6_SUBTREES
2459 if (src) {
David S. Millerc78679e2012-04-01 20:27:33 -04002460 if (nla_put(skb, RTA_SRC, 16, src))
2461 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002462 rtm->rtm_src_len = 128;
David S. Millerc78679e2012-04-01 20:27:33 -04002463 } else if (rtm->rtm_src_len &&
2464 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2465 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002467 if (iif) {
2468#ifdef CONFIG_IPV6_MROUTE
2469 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002470 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002471 if (err <= 0) {
2472 if (!nowait) {
2473 if (err == 0)
2474 return 0;
2475 goto nla_put_failure;
2476 } else {
2477 if (err == -EMSGSIZE)
2478 goto nla_put_failure;
2479 }
2480 }
2481 } else
2482#endif
David S. Millerc78679e2012-04-01 20:27:33 -04002483 if (nla_put_u32(skb, RTA_IIF, iif))
2484 goto nla_put_failure;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002485 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 struct in6_addr saddr_buf;
David S. Millerc78679e2012-04-01 20:27:33 -04002487 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2488 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2489 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002491
Daniel Walterc3968a82011-04-13 21:10:57 +00002492 if (rt->rt6i_prefsrc.plen) {
2493 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002494 saddr_buf = rt->rt6i_prefsrc.addr;
David S. Millerc78679e2012-04-01 20:27:33 -04002495 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2496 goto nla_put_failure;
Daniel Walterc3968a82011-04-13 21:10:57 +00002497 }
2498
David S. Millerdefb3512010-12-08 21:16:57 -08002499 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002500 goto nla_put_failure;
2501
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002502 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +00002503 n = dst_get_neighbour_noref(&rt->dst);
Eric Dumazet94f826b2012-03-27 09:53:52 +00002504 if (n) {
2505 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2506 rcu_read_unlock();
2507 goto nla_put_failure;
2508 }
2509 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002510 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002511
David S. Millerc78679e2012-04-01 20:27:33 -04002512 if (rt->dst.dev &&
2513 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2514 goto nla_put_failure;
2515 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2516 goto nla_put_failure;
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002517 if (!(rt->rt6i_flags & RTF_EXPIRES))
2518 expires = 0;
David S. Millerd1918542011-12-28 20:19:20 -05002519 else if (rt->dst.expires - jiffies < INT_MAX)
2520 expires = rt->dst.expires - jiffies;
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002521 else
2522 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002523
David S. Miller346f8702011-12-29 15:22:33 -05002524 peer = rt->rt6i_peer;
2525 ts = tsage = 0;
2526 if (peer && peer->tcp_ts_stamp) {
2527 ts = peer->tcp_ts;
2528 tsage = get_seconds() - peer->tcp_ts_stamp;
2529 }
2530
2531 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
Changli Gaod8d1f302010-06-10 23:31:35 -07002532 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002533 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534
Thomas Graf2d7202b2006-08-22 00:01:27 -07002535 return nlmsg_end(skb, nlh);
2536
2537nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002538 nlmsg_cancel(skb, nlh);
2539 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540}
2541
Patrick McHardy1b43af52006-08-10 23:11:17 -07002542int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543{
2544 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2545 int prefix;
2546
Thomas Graf2d7202b2006-08-22 00:01:27 -07002547 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2548 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2550 } else
2551 prefix = 0;
2552
Brian Haley191cd582008-08-14 15:33:21 -07002553 return rt6_fill_node(arg->net,
2554 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002555 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002556 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557}
2558
Thomas Grafc127ea22007-03-22 11:58:32 -07002559static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002561 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002562 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002564 struct sk_buff *skb;
2565 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002566 struct flowi6 fl6;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002567 int err, iif = 0, oif = 0;
Thomas Grafab364a62006-08-22 00:01:47 -07002568
2569 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2570 if (err < 0)
2571 goto errout;
2572
2573 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002574 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002575
2576 if (tb[RTA_SRC]) {
2577 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2578 goto errout;
2579
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002580 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07002581 }
2582
2583 if (tb[RTA_DST]) {
2584 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2585 goto errout;
2586
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002587 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07002588 }
2589
2590 if (tb[RTA_IIF])
2591 iif = nla_get_u32(tb[RTA_IIF]);
2592
2593 if (tb[RTA_OIF])
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002594 oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002595
2596 if (iif) {
2597 struct net_device *dev;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002598 int flags = 0;
2599
Daniel Lezcano55786892008-03-04 13:47:47 -08002600 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002601 if (!dev) {
2602 err = -ENODEV;
2603 goto errout;
2604 }
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002605
2606 fl6.flowi6_iif = iif;
2607
2608 if (!ipv6_addr_any(&fl6.saddr))
2609 flags |= RT6_LOOKUP_F_HAS_SADDR;
2610
2611 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2612 flags);
2613 } else {
2614 fl6.flowi6_oif = oif;
2615
2616 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
Thomas Grafab364a62006-08-22 00:01:47 -07002617 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618
2619 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
David S. Miller38308472011-12-03 18:02:47 -05002620 if (!skb) {
Shmulik Ladkani2173bff2012-04-03 23:13:00 +00002621 dst_release(&rt->dst);
Thomas Grafab364a62006-08-22 00:01:47 -07002622 err = -ENOBUFS;
2623 goto errout;
2624 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625
2626 /* Reserve room for dummy headers, this skb can pass
2627 through good chunk of routing engine.
2628 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002629 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2631
Changli Gaod8d1f302010-06-10 23:31:35 -07002632 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633
David S. Miller4c9483b2011-03-12 16:22:43 -05002634 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002636 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002638 kfree_skb(skb);
2639 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640 }
2641
Daniel Lezcano55786892008-03-04 13:47:47 -08002642 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002643errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002645}
2646
Thomas Graf86872cb2006-08-22 00:01:08 -07002647void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648{
2649 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002650 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002651 u32 seq;
2652 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002654 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05002655 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002656
Thomas Graf339bf982006-11-10 14:10:15 -08002657 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
David S. Miller38308472011-12-03 18:02:47 -05002658 if (!skb)
Thomas Graf21713eb2006-08-15 00:35:24 -07002659 goto errout;
2660
Brian Haley191cd582008-08-14 15:33:21 -07002661 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002662 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002663 if (err < 0) {
2664 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2665 WARN_ON(err == -EMSGSIZE);
2666 kfree_skb(skb);
2667 goto errout;
2668 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002669 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2670 info->nlh, gfp_any());
2671 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002672errout:
2673 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002674 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675}
2676
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002677static int ip6_route_dev_notify(struct notifier_block *this,
2678 unsigned long event, void *data)
2679{
2680 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002681 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002682
2683 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002684 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002685 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2686#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002687 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002688 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002689 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002690 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2691#endif
2692 }
2693
2694 return NOTIFY_OK;
2695}
2696
Linus Torvalds1da177e2005-04-16 15:20:36 -07002697/*
2698 * /proc
2699 */
2700
2701#ifdef CONFIG_PROC_FS
2702
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703struct rt6_proc_arg
2704{
2705 char *buffer;
2706 int offset;
2707 int length;
2708 int skip;
2709 int len;
2710};
2711
2712static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2713{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002714 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002715 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002717 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718
2719#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002720 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002722 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002724 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +00002725 n = dst_get_neighbour_noref(&rt->dst);
David S. Miller69cce1d2011-07-17 23:09:49 -07002726 if (n) {
2727 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002729 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002730 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002731 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002732 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002733 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2734 rt->dst.__use, rt->rt6i_flags,
David S. Millerd1918542011-12-28 20:19:20 -05002735 rt->dst.dev ? rt->dst.dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 return 0;
2737}
2738
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002739static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002740{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002741 struct net *net = (struct net *)m->private;
Josh Hunt32b293a2011-12-28 13:23:07 +00002742 fib6_clean_all_ro(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002743 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002744}
2745
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002746static int ipv6_route_open(struct inode *inode, struct file *file)
2747{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002748 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002749}
2750
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002751static const struct file_operations ipv6_route_proc_fops = {
2752 .owner = THIS_MODULE,
2753 .open = ipv6_route_open,
2754 .read = seq_read,
2755 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002756 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002757};
2758
Linus Torvalds1da177e2005-04-16 15:20:36 -07002759static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2760{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002761 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002762 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002763 net->ipv6.rt6_stats->fib_nodes,
2764 net->ipv6.rt6_stats->fib_route_nodes,
2765 net->ipv6.rt6_stats->fib_rt_alloc,
2766 net->ipv6.rt6_stats->fib_rt_entries,
2767 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002768 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002769 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770
2771 return 0;
2772}
2773
2774static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2775{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002776 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002777}
2778
Arjan van de Ven9a321442007-02-12 00:55:35 -08002779static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 .owner = THIS_MODULE,
2781 .open = rt6_stats_seq_open,
2782 .read = seq_read,
2783 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002784 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785};
2786#endif /* CONFIG_PROC_FS */
2787
2788#ifdef CONFIG_SYSCTL
2789
Linus Torvalds1da177e2005-04-16 15:20:36 -07002790static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002791int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792 void __user *buffer, size_t *lenp, loff_t *ppos)
2793{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002794 struct net *net;
2795 int delay;
2796 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002797 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002798
2799 net = (struct net *)ctl->extra1;
2800 delay = net->ipv6.sysctl.flush_delay;
2801 proc_dointvec(ctl, write, buffer, lenp, ppos);
2802 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2803 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002804}
2805
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002806ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002807 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002808 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002809 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002810 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002811 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002812 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002813 },
2814 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002815 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002816 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817 .maxlen = sizeof(int),
2818 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002819 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002820 },
2821 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002822 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002823 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002824 .maxlen = sizeof(int),
2825 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002826 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002827 },
2828 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002829 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002830 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002831 .maxlen = sizeof(int),
2832 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002833 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002834 },
2835 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002836 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002837 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002838 .maxlen = sizeof(int),
2839 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002840 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002841 },
2842 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002843 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002844 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002845 .maxlen = sizeof(int),
2846 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002847 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002848 },
2849 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002850 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002851 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002852 .maxlen = sizeof(int),
2853 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002854 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002855 },
2856 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002857 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002858 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002859 .maxlen = sizeof(int),
2860 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002861 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002862 },
2863 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002864 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002865 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002866 .maxlen = sizeof(int),
2867 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002868 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002869 },
2870 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002871 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002872 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002873 .maxlen = sizeof(int),
2874 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002875 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002876 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002877 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002878};
2879
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002880struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002881{
2882 struct ctl_table *table;
2883
2884 table = kmemdup(ipv6_route_table_template,
2885 sizeof(ipv6_route_table_template),
2886 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002887
2888 if (table) {
2889 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002890 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002891 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002892 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2893 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2894 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2895 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2896 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2897 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2898 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002899 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002900 }
2901
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002902 return table;
2903}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002904#endif
2905
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002906static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002907{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002908 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002909
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002910 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2911 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002912
Eric Dumazetfc66f952010-10-08 06:37:34 +00002913 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2914 goto out_ip6_dst_ops;
2915
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002916 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2917 sizeof(*net->ipv6.ip6_null_entry),
2918 GFP_KERNEL);
2919 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002920 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002921 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002922 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002923 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002924 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2925 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002926
2927#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2928 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2929 sizeof(*net->ipv6.ip6_prohibit_entry),
2930 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002931 if (!net->ipv6.ip6_prohibit_entry)
2932 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002933 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002934 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002935 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002936 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2937 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002938
2939 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2940 sizeof(*net->ipv6.ip6_blk_hole_entry),
2941 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002942 if (!net->ipv6.ip6_blk_hole_entry)
2943 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002944 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002945 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002946 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002947 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2948 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002949#endif
2950
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002951 net->ipv6.sysctl.flush_delay = 0;
2952 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2953 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2954 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2955 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2956 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2957 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2958 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2959
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002960#ifdef CONFIG_PROC_FS
2961 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2962 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2963#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002964 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2965
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002966 ret = 0;
2967out:
2968 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002969
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002970#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2971out_ip6_prohibit_entry:
2972 kfree(net->ipv6.ip6_prohibit_entry);
2973out_ip6_null_entry:
2974 kfree(net->ipv6.ip6_null_entry);
2975#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002976out_ip6_dst_entries:
2977 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002978out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002979 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002980}
2981
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002982static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002983{
2984#ifdef CONFIG_PROC_FS
2985 proc_net_remove(net, "ipv6_route");
2986 proc_net_remove(net, "rt6_stats");
2987#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002988 kfree(net->ipv6.ip6_null_entry);
2989#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2990 kfree(net->ipv6.ip6_prohibit_entry);
2991 kfree(net->ipv6.ip6_blk_hole_entry);
2992#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002993 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002994}
2995
2996static struct pernet_operations ip6_route_net_ops = {
2997 .init = ip6_route_net_init,
2998 .exit = ip6_route_net_exit,
2999};
3000
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003001static struct notifier_block ip6_route_dev_notifier = {
3002 .notifier_call = ip6_route_dev_notify,
3003 .priority = 0,
3004};
3005
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003006int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003007{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003008 int ret;
3009
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08003010 ret = -ENOMEM;
3011 ip6_dst_ops_template.kmem_cachep =
3012 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3013 SLAB_HWCACHE_ALIGN, NULL);
3014 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08003015 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07003016
Eric Dumazetfc66f952010-10-08 06:37:34 +00003017 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003018 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08003019 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08003020
Thomas Graf2a0c4512012-06-14 23:00:17 +00003021 ret = register_pernet_subsys(&ip6_route_net_ops);
3022 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07003023 goto out_dst_entries;
Thomas Graf2a0c4512012-06-14 23:00:17 +00003024
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07003025 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3026
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003027 /* Registering of the loopback is done before this portion of code,
3028 * the loopback reference in rt6_info will not be taken, do it
3029 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07003030 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003031 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3032 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07003033 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003034 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07003035 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003036 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3037 #endif
David S. Millere8803b62012-06-16 01:12:19 -07003038 ret = fib6_init();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003039 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003040 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003041
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003042 ret = xfrm6_init();
3043 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07003044 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08003045
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003046 ret = fib6_rules_init();
3047 if (ret)
3048 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08003049
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003050 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00003051 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3052 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3053 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003054 goto fib6_rules_init;
3055
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003056 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08003057 if (ret)
3058 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003059
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003060out:
3061 return ret;
3062
3063fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003064 fib6_rules_cleanup();
3065xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003066 xfrm6_fini();
Thomas Graf2a0c4512012-06-14 23:00:17 +00003067out_fib6_init:
3068 fib6_gc_cleanup();
David S. Millere8803b62012-06-16 01:12:19 -07003069out_register_subsys:
3070 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00003071out_dst_entries:
3072 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003073out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003074 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003075 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003076}
3077
3078void ip6_route_cleanup(void)
3079{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003080 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07003081 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003082 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003083 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003084 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00003085 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003086 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003087}