blob: 8c5df6f3a2decc2f5d821c713bbfdd9f514a5011 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040029#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090038#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090044#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020045#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070057#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
Eric Dumazet21efcfa2011-07-19 20:18:36 +000065static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070067static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080068static unsigned int ip6_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +000069static unsigned int ip6_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070070static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080074static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080081#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080082static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000083 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080085 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080086static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000087 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080089#endif
90
David S. Miller06582542011-01-27 14:58:42 -080091static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
Yan, Zheng8e2ec632011-09-05 21:34:30 +000097 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
David S. Miller06582542011-01-27 14:58:42 -0800100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
David S. Miller39232972012-01-26 15:22:32 -0500124static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
125{
126 struct in6_addr *p = &rt->rt6i_gateway;
127
David S. Millera7563f32012-01-26 16:29:16 -0500128 if (!ipv6_addr_any(p))
David S. Miller39232972012-01-26 15:22:32 -0500129 return (const void *) p;
130 return daddr;
131}
132
David S. Millerd3aaeb32011-07-18 00:40:17 -0700133static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
134{
David S. Miller39232972012-01-26 15:22:32 -0500135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
138 daddr = choose_neigh_daddr(rt, daddr);
139 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
David S. Miller8ade06c2011-12-29 18:51:57 -0500145static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
David S. Millerf83c7792011-12-28 15:41:23 -0500146{
David S. Miller8ade06c2011-12-29 18:51:57 -0500147 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
148 if (!n) {
149 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
150 if (IS_ERR(n))
151 return PTR_ERR(n);
152 }
David S. Millerf83c7792011-12-28 15:41:23 -0500153 dst_set_neighbour(&rt->dst, n);
154
155 return 0;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700156}
157
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800158static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800160 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 .gc = ip6_dst_gc,
162 .gc_thresh = 1024,
163 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800164 .default_advmss = ip6_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000165 .mtu = ip6_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800166 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 .destroy = ip6_dst_destroy,
168 .ifdown = ip6_dst_ifdown,
169 .negative_advice = ip6_negative_advice,
170 .link_failure = ip6_link_failure,
171 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700172 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
Steffen Klassertebb762f2011-11-23 02:12:51 +0000176static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -0800177{
Steffen Klassert618f9bc2011-11-23 02:13:31 +0000178 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
179
180 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -0800181}
182
David S. Miller14e50e52007-05-24 18:17:54 -0700183static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
184{
185}
186
Held Bernhard0972ddb2011-04-24 22:07:32 +0000187static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
188 unsigned long old)
189{
190 return NULL;
191}
192
David S. Miller14e50e52007-05-24 18:17:54 -0700193static struct dst_ops ip6_dst_blackhole_ops = {
194 .family = AF_INET6,
Harvey Harrison09640e632009-02-01 00:45:17 -0800195 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700196 .destroy = ip6_dst_destroy,
197 .check = ip6_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000198 .mtu = ip6_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800199 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700200 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000201 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700202 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700203};
204
David S. Miller62fa8a82011-01-26 20:51:05 -0800205static const u32 ip6_template_metrics[RTAX_MAX] = {
206 [RTAX_HOPLIMIT - 1] = 255,
207};
208
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800209static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700215 .input = ip6_pkt_discard,
216 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700219 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
Thomas Graf101367c2006-08-04 03:39:02 -0700224#ifdef CONFIG_IPV6_MULTIPLE_TABLES
225
David S. Miller6723ab52006-10-18 21:20:57 -0700226static int ip6_pkt_prohibit(struct sk_buff *skb);
227static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700228
Adrian Bunk280a34c2008-04-21 02:29:32 -0700229static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700230 .dst = {
231 .__refcnt = ATOMIC_INIT(1),
232 .__use = 1,
233 .obsolete = -1,
234 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700235 .input = ip6_pkt_prohibit,
236 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700237 },
238 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700239 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700240 .rt6i_metric = ~(u32) 0,
241 .rt6i_ref = ATOMIC_INIT(1),
242};
243
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800244static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700245 .dst = {
246 .__refcnt = ATOMIC_INIT(1),
247 .__use = 1,
248 .obsolete = -1,
249 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700250 .input = dst_discard,
251 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700252 },
253 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700254 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700255 .rt6i_metric = ~(u32) 0,
256 .rt6i_ref = ATOMIC_INIT(1),
257};
258
259#endif
260
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700262static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700263 struct net_device *dev,
264 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265{
David S. Miller957c6652011-06-24 15:25:00 -0700266 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700267
David S. Miller38308472011-12-03 18:02:47 -0500268 if (rt)
Madalin Bucurfbe58182011-09-26 07:04:56 +0000269 memset(&rt->rt6i_table, 0,
David S. Miller38308472011-12-03 18:02:47 -0500270 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700271
272 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273}
274
275static void ip6_dst_destroy(struct dst_entry *dst)
276{
277 struct rt6_info *rt = (struct rt6_info *)dst;
278 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800279 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000281 if (!(rt->dst.flags & DST_HOST))
282 dst_destroy_metrics_generic(dst);
283
David S. Miller38308472011-12-03 18:02:47 -0500284 if (idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285 rt->rt6i_idev = NULL;
286 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900287 }
David S. Millerb3419362010-11-30 12:27:11 -0800288 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800289 rt->rt6i_peer = NULL;
290 inet_putpeer(peer);
291 }
292}
293
David S. Miller6431cbc2011-02-07 20:38:06 -0800294static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
295
296static u32 rt6_peer_genid(void)
297{
298 return atomic_read(&__rt6_peer_genid);
299}
300
David S. Millerb3419362010-11-30 12:27:11 -0800301void rt6_bind_peer(struct rt6_info *rt, int create)
302{
303 struct inet_peer *peer;
304
David S. Millerb3419362010-11-30 12:27:11 -0800305 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
306 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
307 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800308 else
309 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310}
311
312static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
313 int how)
314{
315 struct rt6_info *rt = (struct rt6_info *)dst;
316 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800317 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900318 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319
David S. Miller38308472011-12-03 18:02:47 -0500320 if (dev != loopback_dev && idev && idev->dev == dev) {
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800321 struct inet6_dev *loopback_idev =
322 in6_dev_get(loopback_dev);
David S. Miller38308472011-12-03 18:02:47 -0500323 if (loopback_idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 rt->rt6i_idev = loopback_idev;
325 in6_dev_put(idev);
326 }
327 }
328}
329
330static __inline__ int rt6_check_expired(const struct rt6_info *rt)
331{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000332 return (rt->rt6i_flags & RTF_EXPIRES) &&
David S. Millerd1918542011-12-28 20:19:20 -0500333 time_after(jiffies, rt->dst.expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334}
335
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000336static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700337{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000338 return ipv6_addr_type(daddr) &
339 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700340}
341
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700343 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 */
345
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800346static inline struct rt6_info *rt6_device_match(struct net *net,
347 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000348 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700350 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351{
352 struct rt6_info *local = NULL;
353 struct rt6_info *sprt;
354
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900355 if (!oif && ipv6_addr_any(saddr))
356 goto out;
357
Changli Gaod8d1f302010-06-10 23:31:35 -0700358 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -0500359 struct net_device *dev = sprt->dst.dev;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900360
361 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 if (dev->ifindex == oif)
363 return sprt;
364 if (dev->flags & IFF_LOOPBACK) {
David S. Miller38308472011-12-03 18:02:47 -0500365 if (!sprt->rt6i_idev ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700367 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900369 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 local->rt6i_idev->dev->ifindex == oif))
371 continue;
372 }
373 local = sprt;
374 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900375 } else {
376 if (ipv6_chk_addr(net, saddr, dev,
377 flags & RT6_LOOKUP_F_IFACE))
378 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900380 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900382 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 if (local)
384 return local;
385
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700386 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800387 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900389out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 return rt;
391}
392
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800393#ifdef CONFIG_IPV6_ROUTER_PREF
394static void rt6_probe(struct rt6_info *rt)
395{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000396 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800397 /*
398 * Okay, this does not seem to be appropriate
399 * for now, however, we need to check if it
400 * is really so; aka Router Reachability Probing.
401 *
402 * Router Reachability Probe MUST be rate-limited
403 * to no more than one per minute.
404 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000405 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +0000406 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800407 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000408 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800409 read_lock_bh(&neigh->lock);
410 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800411 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800412 struct in6_addr mcaddr;
413 struct in6_addr *target;
414
415 neigh->updated = jiffies;
416 read_unlock_bh(&neigh->lock);
417
418 target = (struct in6_addr *)&neigh->primary_key;
419 addrconf_addr_solict_mult(target, &mcaddr);
David S. Millerd1918542011-12-28 20:19:20 -0500420 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000421 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800422 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000423 }
424out:
425 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800426}
427#else
428static inline void rt6_probe(struct rt6_info *rt)
429{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800430}
431#endif
432
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800434 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700436static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437{
David S. Millerd1918542011-12-28 20:19:20 -0500438 struct net_device *dev = rt->dst.dev;
David S. Miller161980f2007-04-06 11:42:27 -0700439 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800440 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700441 if ((dev->flags & IFF_LOOPBACK) &&
442 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
443 return 1;
444 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445}
446
Dave Jonesb6f99a22007-03-22 12:27:49 -0700447static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000449 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800450 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000451
452 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +0000453 neigh = dst_get_neighbour_noref(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700454 if (rt->rt6i_flags & RTF_NONEXTHOP ||
455 !(rt->rt6i_flags & RTF_GATEWAY))
456 m = 1;
457 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800458 read_lock_bh(&neigh->lock);
459 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700460 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800461#ifdef CONFIG_IPV6_ROUTER_PREF
462 else if (neigh->nud_state & NUD_FAILED)
463 m = 0;
464#endif
465 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800466 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800467 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800468 } else
469 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000470 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800471 return m;
472}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800474static int rt6_score_route(struct rt6_info *rt, int oif,
475 int strict)
476{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700477 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900478
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700479 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700480 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800481 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800482#ifdef CONFIG_IPV6_ROUTER_PREF
483 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
484#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700485 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800486 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800487 return -1;
488 return m;
489}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490
David S. Millerf11e6652007-03-24 20:36:25 -0700491static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
492 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800493{
David S. Millerf11e6652007-03-24 20:36:25 -0700494 int m;
495
496 if (rt6_check_expired(rt))
497 goto out;
498
499 m = rt6_score_route(rt, oif, strict);
500 if (m < 0)
501 goto out;
502
503 if (m > *mpri) {
504 if (strict & RT6_LOOKUP_F_REACHABLE)
505 rt6_probe(match);
506 *mpri = m;
507 match = rt;
508 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
509 rt6_probe(rt);
510 }
511
512out:
513 return match;
514}
515
516static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
517 struct rt6_info *rr_head,
518 u32 metric, int oif, int strict)
519{
520 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800521 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
David S. Millerf11e6652007-03-24 20:36:25 -0700523 match = NULL;
524 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700525 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700526 match = find_match(rt, oif, strict, &mpri, match);
527 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700528 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700529 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800530
David S. Millerf11e6652007-03-24 20:36:25 -0700531 return match;
532}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800533
David S. Millerf11e6652007-03-24 20:36:25 -0700534static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
535{
536 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800537 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538
David S. Millerf11e6652007-03-24 20:36:25 -0700539 rt0 = fn->rr_ptr;
540 if (!rt0)
541 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
David S. Millerf11e6652007-03-24 20:36:25 -0700543 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800545 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700546 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700547 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700548
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800549 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700550 if (!next || next->rt6i_metric != rt0->rt6i_metric)
551 next = fn->leaf;
552
553 if (next != rt0)
554 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555 }
556
David S. Millerd1918542011-12-28 20:19:20 -0500557 net = dev_net(rt0->dst.dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000558 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559}
560
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800561#ifdef CONFIG_IPV6_ROUTE_INFO
562int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000563 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800564{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900565 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800566 struct route_info *rinfo = (struct route_info *) opt;
567 struct in6_addr prefix_buf, *prefix;
568 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900569 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800570 struct rt6_info *rt;
571
572 if (len < sizeof(struct route_info)) {
573 return -EINVAL;
574 }
575
576 /* Sanity check for prefix_len and length */
577 if (rinfo->length > 3) {
578 return -EINVAL;
579 } else if (rinfo->prefix_len > 128) {
580 return -EINVAL;
581 } else if (rinfo->prefix_len > 64) {
582 if (rinfo->length < 2) {
583 return -EINVAL;
584 }
585 } else if (rinfo->prefix_len > 0) {
586 if (rinfo->length < 1) {
587 return -EINVAL;
588 }
589 }
590
591 pref = rinfo->route_pref;
592 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000593 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800594
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900595 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800596
597 if (rinfo->length == 3)
598 prefix = (struct in6_addr *)rinfo->prefix;
599 else {
600 /* this function is safe */
601 ipv6_addr_prefix(&prefix_buf,
602 (struct in6_addr *)rinfo->prefix,
603 rinfo->prefix_len);
604 prefix = &prefix_buf;
605 }
606
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800607 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
608 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800609
610 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700611 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800612 rt = NULL;
613 }
614
615 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800616 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800617 pref);
618 else if (rt)
619 rt->rt6i_flags = RTF_ROUTEINFO |
620 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
621
622 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900623 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800624 rt->rt6i_flags &= ~RTF_EXPIRES;
625 } else {
David S. Millerd1918542011-12-28 20:19:20 -0500626 rt->dst.expires = jiffies + HZ * lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800627 rt->rt6i_flags |= RTF_EXPIRES;
628 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700629 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800630 }
631 return 0;
632}
633#endif
634
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800635#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700636do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800637 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700638 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700639 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700640 if (fn->fn_flags & RTN_TL_ROOT) \
641 goto out; \
642 pn = fn->parent; \
643 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800644 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700645 else \
646 fn = pn; \
647 if (fn->fn_flags & RTN_RTINFO) \
648 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700649 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700650 } \
David S. Miller38308472011-12-03 18:02:47 -0500651} while (0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700652
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800653static struct rt6_info *ip6_pol_route_lookup(struct net *net,
654 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500655 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656{
657 struct fib6_node *fn;
658 struct rt6_info *rt;
659
Thomas Grafc71099a2006-08-04 23:20:06 -0700660 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500661 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700662restart:
663 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500664 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
665 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700666out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700667 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700668 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700669 return rt;
670
671}
672
Florian Westphalea6e5742011-09-05 16:05:44 +0200673struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
674 int flags)
675{
676 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
677}
678EXPORT_SYMBOL_GPL(ip6_route_lookup);
679
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900680struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
681 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700682{
David S. Miller4c9483b2011-03-12 16:22:43 -0500683 struct flowi6 fl6 = {
684 .flowi6_oif = oif,
685 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700686 };
687 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700688 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700689
Thomas Grafadaa70b2006-10-13 15:01:03 -0700690 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500691 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700692 flags |= RT6_LOOKUP_F_HAS_SADDR;
693 }
694
David S. Miller4c9483b2011-03-12 16:22:43 -0500695 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700696 if (dst->error == 0)
697 return (struct rt6_info *) dst;
698
699 dst_release(dst);
700
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 return NULL;
702}
703
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900704EXPORT_SYMBOL(rt6_lookup);
705
Thomas Grafc71099a2006-08-04 23:20:06 -0700706/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707 It takes new route entry, the addition fails by any reason the
708 route is freed. In any case, if caller does not hold it, it may
709 be destroyed.
710 */
711
Thomas Graf86872cb2006-08-22 00:01:08 -0700712static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713{
714 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700715 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
Thomas Grafc71099a2006-08-04 23:20:06 -0700717 table = rt->rt6i_table;
718 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700719 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700720 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721
722 return err;
723}
724
Thomas Graf40e22e82006-08-22 00:00:45 -0700725int ip6_ins_rt(struct rt6_info *rt)
726{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800727 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -0500728 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800729 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800730 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700731}
732
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000733static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
734 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000735 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 struct rt6_info *rt;
738
739 /*
740 * Clone the route.
741 */
742
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000743 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744
745 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800746 int attempts = !in_softirq();
747
David S. Miller38308472011-12-03 18:02:47 -0500748 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
David S. Millerbb3c3682011-12-13 17:35:06 -0500749 if (ort->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000750 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900751 rt->rt6i_flags |= RTF_ANYCAST;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000752 rt->rt6i_gateway = *daddr;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900753 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756
757#ifdef CONFIG_IPV6_SUBTREES
758 if (rt->rt6i_src.plen && saddr) {
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000759 rt->rt6i_src.addr = *saddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 rt->rt6i_src.plen = 128;
761 }
762#endif
763
David S. Miller14deae42009-01-04 16:04:39 -0800764 retry:
David S. Miller8ade06c2011-12-29 18:51:57 -0500765 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
David S. Millerd1918542011-12-28 20:19:20 -0500766 struct net *net = dev_net(rt->dst.dev);
David S. Miller14deae42009-01-04 16:04:39 -0800767 int saved_rt_min_interval =
768 net->ipv6.sysctl.ip6_rt_gc_min_interval;
769 int saved_rt_elasticity =
770 net->ipv6.sysctl.ip6_rt_gc_elasticity;
771
772 if (attempts-- > 0) {
773 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
774 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
775
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000776 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800777
778 net->ipv6.sysctl.ip6_rt_gc_elasticity =
779 saved_rt_elasticity;
780 net->ipv6.sysctl.ip6_rt_gc_min_interval =
781 saved_rt_min_interval;
782 goto retry;
783 }
784
785 if (net_ratelimit())
786 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700787 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700788 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800789 return NULL;
790 }
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800791 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800793 return rt;
794}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000796static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
797 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800798{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000799 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
800
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800801 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800802 rt->rt6i_flags |= RTF_CACHE;
David Miller27217452011-12-02 16:52:08 +0000803 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800804 }
805 return rt;
806}
807
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800808static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500809 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810{
811 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800812 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700813 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800815 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700816 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700818 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819
820relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700821 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800823restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500824 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
826restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700827 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800828
David S. Miller4c9483b2011-03-12 16:22:43 -0500829 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800830 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800831 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800832 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
Changli Gaod8d1f302010-06-10 23:31:35 -0700834 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700835 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800836
David Miller27217452011-12-02 16:52:08 +0000837 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500838 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800839 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500840 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800841 else
842 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800843
Changli Gaod8d1f302010-06-10 23:31:35 -0700844 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800845 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800846
Changli Gaod8d1f302010-06-10 23:31:35 -0700847 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800848 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700849 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800850 if (!err)
851 goto out2;
852 }
853
854 if (--attempts <= 0)
855 goto out2;
856
857 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700858 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800859 * released someone could insert this route. Relookup.
860 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700861 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800862 goto relookup;
863
864out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800865 if (reachable) {
866 reachable = 0;
867 goto restart_2;
868 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700869 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700870 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700872 rt->dst.lastuse = jiffies;
873 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700874
875 return rt;
876}
877
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800878static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500879 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700880{
David S. Miller4c9483b2011-03-12 16:22:43 -0500881 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700882}
883
Shmulik Ladkani72331bc2012-04-01 04:03:45 +0000884static struct dst_entry *ip6_route_input_lookup(struct net *net,
885 struct net_device *dev,
886 struct flowi6 *fl6, int flags)
887{
888 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
889 flags |= RT6_LOOKUP_F_IFACE;
890
891 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
892}
893
Thomas Grafc71099a2006-08-04 23:20:06 -0700894void ip6_route_input(struct sk_buff *skb)
895{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000896 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900897 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700898 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500899 struct flowi6 fl6 = {
900 .flowi6_iif = skb->dev->ifindex,
901 .daddr = iph->daddr,
902 .saddr = iph->saddr,
David S. Miller38308472011-12-03 18:02:47 -0500903 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
David S. Miller4c9483b2011-03-12 16:22:43 -0500904 .flowi6_mark = skb->mark,
905 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700906 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700907
Shmulik Ladkani72331bc2012-04-01 04:03:45 +0000908 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
Thomas Grafc71099a2006-08-04 23:20:06 -0700909}
910
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800911static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500912 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700913{
David S. Miller4c9483b2011-03-12 16:22:43 -0500914 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700915}
916
Florian Westphal9c7a4f9c2011-03-22 19:17:36 -0700917struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500918 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700919{
920 int flags = 0;
921
David S. Miller4c9483b2011-03-12 16:22:43 -0500922 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700923 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700924
David S. Miller4c9483b2011-03-12 16:22:43 -0500925 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700926 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000927 else if (sk)
928 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700929
David S. Miller4c9483b2011-03-12 16:22:43 -0500930 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931}
932
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900933EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934
David S. Miller2774c132011-03-01 14:59:04 -0800935struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700936{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700937 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700938 struct dst_entry *new = NULL;
939
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700940 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700941 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700942 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
943
Changli Gaod8d1f302010-06-10 23:31:35 -0700944 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700945
David S. Miller14e50e52007-05-24 18:17:54 -0700946 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800947 new->input = dst_discard;
948 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700949
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000950 if (dst_metrics_read_only(&ort->dst))
951 new->_metrics = ort->dst._metrics;
952 else
953 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700954 rt->rt6i_idev = ort->rt6i_idev;
955 if (rt->rt6i_idev)
956 in6_dev_hold(rt->rt6i_idev);
David S. Millerd1918542011-12-28 20:19:20 -0500957 rt->dst.expires = 0;
David S. Miller14e50e52007-05-24 18:17:54 -0700958
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000959 rt->rt6i_gateway = ort->rt6i_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -0700960 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
961 rt->rt6i_metric = 0;
962
963 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
964#ifdef CONFIG_IPV6_SUBTREES
965 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
966#endif
967
968 dst_free(new);
969 }
970
David S. Miller69ead7a2011-03-01 14:45:33 -0800971 dst_release(dst_orig);
972 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700973}
David S. Miller14e50e52007-05-24 18:17:54 -0700974
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975/*
976 * Destination cache support functions
977 */
978
979static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
980{
981 struct rt6_info *rt;
982
983 rt = (struct rt6_info *) dst;
984
David S. Miller6431cbc2011-02-07 20:38:06 -0800985 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
986 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
987 if (!rt->rt6i_peer)
988 rt6_bind_peer(rt, 0);
989 rt->rt6i_peer_genid = rt6_peer_genid();
990 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800992 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 return NULL;
994}
995
996static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
997{
998 struct rt6_info *rt = (struct rt6_info *) dst;
999
1000 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001001 if (rt->rt6i_flags & RTF_CACHE) {
1002 if (rt6_check_expired(rt)) {
1003 ip6_del_rt(rt);
1004 dst = NULL;
1005 }
1006 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001008 dst = NULL;
1009 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00001011 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012}
1013
1014static void ip6_link_failure(struct sk_buff *skb)
1015{
1016 struct rt6_info *rt;
1017
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00001018 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019
Eric Dumazetadf30902009-06-02 05:19:30 +00001020 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 if (rt) {
David S. Miller38308472011-12-03 18:02:47 -05001022 if (rt->rt6i_flags & RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001023 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 rt->rt6i_flags |= RTF_EXPIRES;
1025 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1026 rt->rt6i_node->fn_sernum = -1;
1027 }
1028}
1029
1030static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1031{
1032 struct rt6_info *rt6 = (struct rt6_info*)dst;
1033
1034 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1035 rt6->rt6i_flags |= RTF_MODIFIED;
1036 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001037 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001039 features |= RTAX_FEATURE_ALLFRAG;
1040 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 }
David S. Millerdefb3512010-12-08 21:16:57 -08001042 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 }
1044}
1045
David S. Miller0dbaee32010-12-13 12:52:14 -08001046static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047{
David S. Miller0dbaee32010-12-13 12:52:14 -08001048 struct net_device *dev = dst->dev;
1049 unsigned int mtu = dst_mtu(dst);
1050 struct net *net = dev_net(dev);
1051
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1053
Daniel Lezcano55786892008-03-04 13:47:47 -08001054 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1055 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
1057 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001058 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1059 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1060 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 * rely only on pmtu discovery"
1062 */
1063 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1064 mtu = IPV6_MAXPLEN;
1065 return mtu;
1066}
1067
Steffen Klassertebb762f2011-11-23 02:12:51 +00001068static unsigned int ip6_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001069{
David S. Millerd33e4552010-12-14 13:01:14 -08001070 struct inet6_dev *idev;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001071 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1072
1073 if (mtu)
1074 return mtu;
1075
1076 mtu = IPV6_MIN_MTU;
David S. Millerd33e4552010-12-14 13:01:14 -08001077
1078 rcu_read_lock();
1079 idev = __in6_dev_get(dst->dev);
1080 if (idev)
1081 mtu = idev->cnf.mtu6;
1082 rcu_read_unlock();
1083
1084 return mtu;
1085}
1086
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001087static struct dst_entry *icmp6_dst_gc_list;
1088static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001089
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001090struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 struct neighbour *neigh,
David S. Miller87a11572011-12-06 17:04:13 -05001092 struct flowi6 *fl6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093{
David S. Miller87a11572011-12-06 17:04:13 -05001094 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 struct rt6_info *rt;
1096 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001097 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098
David S. Miller38308472011-12-03 18:02:47 -05001099 if (unlikely(!idev))
Eric Dumazet122bdf62012-03-14 21:13:11 +00001100 return ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101
David S. Miller957c6652011-06-24 15:25:00 -07001102 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
David S. Miller38308472011-12-03 18:02:47 -05001103 if (unlikely(!rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 in6_dev_put(idev);
David S. Miller87a11572011-12-06 17:04:13 -05001105 dst = ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 goto out;
1107 }
1108
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 if (neigh)
1110 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001111 else {
David S. Millerf83c7792011-12-28 15:41:23 -05001112 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
David S. Millerb43faac2011-12-13 16:48:21 -05001113 if (IS_ERR(neigh)) {
RongQing.Li252c3d82012-01-12 22:33:46 +00001114 in6_dev_put(idev);
David S. Millerb43faac2011-12-13 16:48:21 -05001115 dst_free(&rt->dst);
1116 return ERR_CAST(neigh);
1117 }
David S. Miller14deae42009-01-04 16:04:39 -08001118 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001120 rt->dst.flags |= DST_HOST;
1121 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001122 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001123 atomic_set(&rt->dst.__refcnt, 1);
David S. Miller87a11572011-12-06 17:04:13 -05001124 rt->rt6i_dst.addr = fl6->daddr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001125 rt->rt6i_dst.plen = 128;
1126 rt->rt6i_idev = idev;
Gao feng70116872011-10-28 02:46:57 +00001127 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001129 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001130 rt->dst.next = icmp6_dst_gc_list;
1131 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001132 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133
Daniel Lezcano55786892008-03-04 13:47:47 -08001134 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135
David S. Miller87a11572011-12-06 17:04:13 -05001136 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1137
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138out:
David S. Miller87a11572011-12-06 17:04:13 -05001139 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140}
1141
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001142int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143{
Hagen Paul Pfeifere9476e952011-02-25 05:45:19 +00001144 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001145 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001147 spin_lock_bh(&icmp6_dst_lock);
1148 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001149
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 while ((dst = *pprev) != NULL) {
1151 if (!atomic_read(&dst->__refcnt)) {
1152 *pprev = dst->next;
1153 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 } else {
1155 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001156 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 }
1158 }
1159
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001160 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001161
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001162 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163}
1164
David S. Miller1e493d12008-09-10 17:27:15 -07001165static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1166 void *arg)
1167{
1168 struct dst_entry *dst, **pprev;
1169
1170 spin_lock_bh(&icmp6_dst_lock);
1171 pprev = &icmp6_dst_gc_list;
1172 while ((dst = *pprev) != NULL) {
1173 struct rt6_info *rt = (struct rt6_info *) dst;
1174 if (func(rt, arg)) {
1175 *pprev = dst->next;
1176 dst_free(dst);
1177 } else {
1178 pprev = &dst->next;
1179 }
1180 }
1181 spin_unlock_bh(&icmp6_dst_lock);
1182}
1183
Daniel Lezcano569d3642008-01-18 03:56:57 -08001184static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001187 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001188 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1189 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1190 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1191 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1192 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001193 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194
Eric Dumazetfc66f952010-10-08 06:37:34 +00001195 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001196 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001197 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 goto out;
1199
Benjamin Thery6891a342008-03-04 13:49:47 -08001200 net->ipv6.ip6_rt_gc_expire++;
1201 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1202 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001203 entries = dst_entries_get_slow(ops);
1204 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001205 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001207 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001208 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209}
1210
1211/* Clean host part of a prefix. Not necessary in radix tree,
1212 but results in cleaner routing tables.
1213
1214 Remove it only when all the things will work!
1215 */
1216
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001217int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218{
David S. Miller5170ae82010-12-12 21:35:57 -08001219 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001220 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001221 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001222 struct inet6_dev *idev;
1223
1224 rcu_read_lock();
1225 idev = __in6_dev_get(dev);
1226 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001227 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001228 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001229 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001230 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 }
1232 return hoplimit;
1233}
David S. Millerabbf46a2010-12-12 21:14:46 -08001234EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235
1236/*
1237 *
1238 */
1239
Thomas Graf86872cb2006-08-22 00:01:08 -07001240int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241{
1242 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001243 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 struct rt6_info *rt = NULL;
1245 struct net_device *dev = NULL;
1246 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001247 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 int addr_type;
1249
Thomas Graf86872cb2006-08-22 00:01:08 -07001250 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 return -EINVAL;
1252#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001253 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 return -EINVAL;
1255#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001256 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001258 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 if (!dev)
1260 goto out;
1261 idev = in6_dev_get(dev);
1262 if (!idev)
1263 goto out;
1264 }
1265
Thomas Graf86872cb2006-08-22 00:01:08 -07001266 if (cfg->fc_metric == 0)
1267 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268
Matti Vaittinend71314b2011-11-14 00:14:49 +00001269 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05001270 if (cfg->fc_nlinfo.nlh &&
1271 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00001272 table = fib6_get_table(net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05001273 if (!table) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00001274 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1275 table = fib6_new_table(net, cfg->fc_table);
1276 }
1277 } else {
1278 table = fib6_new_table(net, cfg->fc_table);
1279 }
David S. Miller38308472011-12-03 18:02:47 -05001280
1281 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001282 goto out;
Thomas Grafc71099a2006-08-04 23:20:06 -07001283
David S. Miller957c6652011-06-24 15:25:00 -07001284 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285
David S. Miller38308472011-12-03 18:02:47 -05001286 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 err = -ENOMEM;
1288 goto out;
1289 }
1290
Changli Gaod8d1f302010-06-10 23:31:35 -07001291 rt->dst.obsolete = -1;
David S. Millerd1918542011-12-28 20:19:20 -05001292 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001293 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1294 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295
Thomas Graf86872cb2006-08-22 00:01:08 -07001296 if (cfg->fc_protocol == RTPROT_UNSPEC)
1297 cfg->fc_protocol = RTPROT_BOOT;
1298 rt->rt6i_protocol = cfg->fc_protocol;
1299
1300 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301
1302 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001303 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001304 else if (cfg->fc_flags & RTF_LOCAL)
1305 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001307 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308
Changli Gaod8d1f302010-06-10 23:31:35 -07001309 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310
Thomas Graf86872cb2006-08-22 00:01:08 -07001311 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1312 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001314 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001316 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1317 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1318 if (!metrics) {
1319 err = -ENOMEM;
1320 goto out;
1321 }
1322 dst_init_metrics(&rt->dst, metrics, 0);
1323 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001325 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1326 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327#endif
1328
Thomas Graf86872cb2006-08-22 00:01:08 -07001329 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
1331 /* We cannot add true routes via loopback here,
1332 they would result in kernel looping; promote them to reject routes
1333 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001334 if ((cfg->fc_flags & RTF_REJECT) ||
David S. Miller38308472011-12-03 18:02:47 -05001335 (dev && (dev->flags & IFF_LOOPBACK) &&
1336 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1337 !(cfg->fc_flags & RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001339 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 if (dev) {
1341 dev_put(dev);
1342 in6_dev_put(idev);
1343 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001344 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 dev_hold(dev);
1346 idev = in6_dev_get(dev);
1347 if (!idev) {
1348 err = -ENODEV;
1349 goto out;
1350 }
1351 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001352 rt->dst.output = ip6_pkt_discard_out;
1353 rt->dst.input = ip6_pkt_discard;
1354 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1356 goto install_route;
1357 }
1358
Thomas Graf86872cb2006-08-22 00:01:08 -07001359 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001360 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 int gwa_type;
1362
Thomas Graf86872cb2006-08-22 00:01:08 -07001363 gw_addr = &cfg->fc_gateway;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001364 rt->rt6i_gateway = *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365 gwa_type = ipv6_addr_type(gw_addr);
1366
1367 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1368 struct rt6_info *grt;
1369
1370 /* IPv6 strictly inhibits using not link-local
1371 addresses as nexthop address.
1372 Otherwise, router will not able to send redirects.
1373 It is very good, but in some (rare!) circumstances
1374 (SIT, PtP, NBMA NOARP links) it is handy to allow
1375 some exceptions. --ANK
1376 */
1377 err = -EINVAL;
David S. Miller38308472011-12-03 18:02:47 -05001378 if (!(gwa_type & IPV6_ADDR_UNICAST))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 goto out;
1380
Daniel Lezcano55786892008-03-04 13:47:47 -08001381 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382
1383 err = -EHOSTUNREACH;
David S. Miller38308472011-12-03 18:02:47 -05001384 if (!grt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 goto out;
1386 if (dev) {
David S. Millerd1918542011-12-28 20:19:20 -05001387 if (dev != grt->dst.dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001388 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 goto out;
1390 }
1391 } else {
David S. Millerd1918542011-12-28 20:19:20 -05001392 dev = grt->dst.dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 idev = grt->rt6i_idev;
1394 dev_hold(dev);
1395 in6_dev_hold(grt->rt6i_idev);
1396 }
David S. Miller38308472011-12-03 18:02:47 -05001397 if (!(grt->rt6i_flags & RTF_GATEWAY))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001399 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400
1401 if (err)
1402 goto out;
1403 }
1404 err = -EINVAL;
David S. Miller38308472011-12-03 18:02:47 -05001405 if (!dev || (dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406 goto out;
1407 }
1408
1409 err = -ENODEV;
David S. Miller38308472011-12-03 18:02:47 -05001410 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411 goto out;
1412
Daniel Walterc3968a82011-04-13 21:10:57 +00001413 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1414 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1415 err = -EINVAL;
1416 goto out;
1417 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001418 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00001419 rt->rt6i_prefsrc.plen = 128;
1420 } else
1421 rt->rt6i_prefsrc.plen = 0;
1422
Thomas Graf86872cb2006-08-22 00:01:08 -07001423 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller8ade06c2011-12-29 18:51:57 -05001424 err = rt6_bind_neighbour(rt, dev);
David S. Millerf83c7792011-12-28 15:41:23 -05001425 if (err)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 }
1428
Thomas Graf86872cb2006-08-22 00:01:08 -07001429 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430
1431install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001432 if (cfg->fc_mx) {
1433 struct nlattr *nla;
1434 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435
Thomas Graf86872cb2006-08-22 00:01:08 -07001436 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001437 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001438
1439 if (type) {
1440 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 err = -EINVAL;
1442 goto out;
1443 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001444
David S. Millerdefb3512010-12-08 21:16:57 -08001445 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 }
1448 }
1449
Changli Gaod8d1f302010-06-10 23:31:35 -07001450 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001452 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001453
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001454 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001455
Thomas Graf86872cb2006-08-22 00:01:08 -07001456 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457
1458out:
1459 if (dev)
1460 dev_put(dev);
1461 if (idev)
1462 in6_dev_put(idev);
1463 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001464 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 return err;
1466}
1467
Thomas Graf86872cb2006-08-22 00:01:08 -07001468static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469{
1470 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001471 struct fib6_table *table;
David S. Millerd1918542011-12-28 20:19:20 -05001472 struct net *net = dev_net(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001474 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001475 return -ENOENT;
1476
Thomas Grafc71099a2006-08-04 23:20:06 -07001477 table = rt->rt6i_table;
1478 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479
Thomas Graf86872cb2006-08-22 00:01:08 -07001480 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001481 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482
Thomas Grafc71099a2006-08-04 23:20:06 -07001483 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484
1485 return err;
1486}
1487
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001488int ip6_del_rt(struct rt6_info *rt)
1489{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001490 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -05001491 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001492 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001493 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001494}
1495
Thomas Graf86872cb2006-08-22 00:01:08 -07001496static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497{
Thomas Grafc71099a2006-08-04 23:20:06 -07001498 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 struct fib6_node *fn;
1500 struct rt6_info *rt;
1501 int err = -ESRCH;
1502
Daniel Lezcano55786892008-03-04 13:47:47 -08001503 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05001504 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001505 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506
Thomas Grafc71099a2006-08-04 23:20:06 -07001507 read_lock_bh(&table->tb6_lock);
1508
1509 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001510 &cfg->fc_dst, cfg->fc_dst_len,
1511 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001512
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001514 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001515 if (cfg->fc_ifindex &&
David S. Millerd1918542011-12-28 20:19:20 -05001516 (!rt->dst.dev ||
1517 rt->dst.dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001519 if (cfg->fc_flags & RTF_GATEWAY &&
1520 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001522 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001524 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001525 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526
Thomas Graf86872cb2006-08-22 00:01:08 -07001527 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 }
1529 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001530 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531
1532 return err;
1533}
1534
1535/*
1536 * Handle redirects
1537 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001538struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001539 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001540 struct in6_addr gateway;
1541};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001543static struct rt6_info *__ip6_route_redirect(struct net *net,
1544 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001545 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001546 int flags)
1547{
David S. Miller4c9483b2011-03-12 16:22:43 -05001548 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001549 struct rt6_info *rt;
1550 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001551
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001553 * Get the "current" route for this destination and
1554 * check if the redirect has come from approriate router.
1555 *
1556 * RFC 2461 specifies that redirects should only be
1557 * accepted if they come from the nexthop to the target.
1558 * Due to the way the routes are chosen, this notion
1559 * is a bit fuzzy and one might need to check all possible
1560 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562
Thomas Grafc71099a2006-08-04 23:20:06 -07001563 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001564 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001565restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001566 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001567 /*
1568 * Current route is on-link; redirect is always invalid.
1569 *
1570 * Seems, previous statement is not true. It could
1571 * be node, which looks for us as on-link (f.e. proxy ndisc)
1572 * But then router serving it might decide, that we should
1573 * know truth 8)8) --ANK (980726).
1574 */
1575 if (rt6_check_expired(rt))
1576 continue;
1577 if (!(rt->rt6i_flags & RTF_GATEWAY))
1578 continue;
David S. Millerd1918542011-12-28 20:19:20 -05001579 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001580 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001581 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001582 continue;
1583 break;
1584 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001585
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001586 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001587 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001588 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001589out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001590 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001591
1592 read_unlock_bh(&table->tb6_lock);
1593
1594 return rt;
1595};
1596
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001597static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1598 const struct in6_addr *src,
1599 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001600 struct net_device *dev)
1601{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001602 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001603 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001604 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001605 .fl6 = {
1606 .flowi6_oif = dev->ifindex,
1607 .daddr = *dest,
1608 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001609 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001610 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001611
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001612 rdfl.gateway = *gateway;
Brian Haley86c36ce2009-10-07 13:58:01 -07001613
Thomas Grafadaa70b2006-10-13 15:01:03 -07001614 if (rt6_need_strict(dest))
1615 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001616
David S. Miller4c9483b2011-03-12 16:22:43 -05001617 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001618 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001619}
1620
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001621void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1622 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001623 struct neighbour *neigh, u8 *lladdr, int on_link)
1624{
1625 struct rt6_info *rt, *nrt = NULL;
1626 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001627 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001628
1629 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1630
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001631 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 if (net_ratelimit())
1633 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1634 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001635 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 }
1637
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638 /*
1639 * We have finally decided to accept it.
1640 */
1641
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001642 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1644 NEIGH_UPDATE_F_OVERRIDE|
1645 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1646 NEIGH_UPDATE_F_ISROUTER))
1647 );
1648
1649 /*
1650 * Redirect received -> path was valid.
1651 * Look, redirects are sent only in response to data packets,
1652 * so that this nexthop apparently is reachable. --ANK
1653 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001654 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655
1656 /* Duplicate redirect: silently ignore. */
David Miller27217452011-12-02 16:52:08 +00001657 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 goto out;
1659
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001660 nrt = ip6_rt_copy(rt, dest);
David S. Miller38308472011-12-03 18:02:47 -05001661 if (!nrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 goto out;
1663
1664 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1665 if (on_link)
1666 nrt->rt6i_flags &= ~RTF_GATEWAY;
1667
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001668 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
David S. Miller69cce1d2011-07-17 23:09:49 -07001669 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Thomas Graf40e22e82006-08-22 00:00:45 -07001671 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 goto out;
1673
Changli Gaod8d1f302010-06-10 23:31:35 -07001674 netevent.old = &rt->dst;
1675 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001676 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1677
David S. Miller38308472011-12-03 18:02:47 -05001678 if (rt->rt6i_flags & RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001679 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 return;
1681 }
1682
1683out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001684 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685}
1686
1687/*
1688 * Handle ICMP "packet too big" messages
1689 * i.e. Path MTU discovery
1690 */
1691
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001692static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001693 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694{
1695 struct rt6_info *rt, *nrt;
1696 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001697again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001698 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
David S. Miller38308472011-12-03 18:02:47 -05001699 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 return;
1701
Andrey Vagind3052b52010-12-11 15:20:11 +00001702 if (rt6_check_expired(rt)) {
1703 ip6_del_rt(rt);
1704 goto again;
1705 }
1706
Changli Gaod8d1f302010-06-10 23:31:35 -07001707 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 goto out;
1709
1710 if (pmtu < IPV6_MIN_MTU) {
1711 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001712 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 * MTU (1280) and a fragment header should always be included
1714 * after a node receiving Too Big message reporting PMTU is
1715 * less than the IPv6 Minimum Link MTU.
1716 */
1717 pmtu = IPV6_MIN_MTU;
1718 allfrag = 1;
1719 }
1720
1721 /* New mtu received -> path was valid.
1722 They are sent only in response to data packets,
1723 so that this nexthop apparently is reachable. --ANK
1724 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001725 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726
1727 /* Host route. If it is static, it would be better
1728 not to override it, but add new one, so that
1729 when cache entry will expire old pmtu
1730 would return automatically.
1731 */
1732 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001733 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1734 if (allfrag) {
1735 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1736 features |= RTAX_FEATURE_ALLFRAG;
1737 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1738 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001739 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1741 goto out;
1742 }
1743
1744 /* Network route.
1745 Two cases are possible:
1746 1. It is connected route. Action: COW
1747 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1748 */
David Miller27217452011-12-02 16:52:08 +00001749 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001750 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001751 else
1752 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001753
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001754 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001755 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1756 if (allfrag) {
1757 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1758 features |= RTAX_FEATURE_ALLFRAG;
1759 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1760 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001761
1762 /* According to RFC 1981, detecting PMTU increase shouldn't be
1763 * happened within 5 mins, the recommended timer is 10 mins.
1764 * Here this route expiration time is set to ip6_rt_mtu_expires
1765 * which is 10 mins. After 10 mins the decreased pmtu is expired
1766 * and detecting PMTU increase will be automatically happened.
1767 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001768 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001769 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1770
Thomas Graf40e22e82006-08-22 00:00:45 -07001771 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001774 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775}
1776
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001777void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001778 struct net_device *dev, u32 pmtu)
1779{
1780 struct net *net = dev_net(dev);
1781
1782 /*
1783 * RFC 1981 states that a node "MUST reduce the size of the packets it
1784 * is sending along the path" that caused the Packet Too Big message.
1785 * Since it's not possible in the general case to determine which
1786 * interface was used to send the original packet, we update the MTU
1787 * on the interface that will be used to send future packets. We also
1788 * update the MTU on the interface that received the Packet Too Big in
1789 * case the original packet was forced out that interface with
1790 * SO_BINDTODEVICE or similar. This is the next best thing to the
1791 * correct behaviour, which would be to update the MTU on all
1792 * interfaces.
1793 */
1794 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1795 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1796}
1797
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798/*
1799 * Misc support functions
1800 */
1801
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001802static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1803 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804{
David S. Millerd1918542011-12-28 20:19:20 -05001805 struct net *net = dev_net(ort->dst.dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001806 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001807 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808
1809 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001810 rt->dst.input = ort->dst.input;
1811 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001812 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001814 rt->rt6i_dst.addr = *dest;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001815 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001816 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001817 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818 rt->rt6i_idev = ort->rt6i_idev;
1819 if (rt->rt6i_idev)
1820 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001821 rt->dst.lastuse = jiffies;
David S. Millerd1918542011-12-28 20:19:20 -05001822 rt->dst.expires = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001824 rt->rt6i_gateway = ort->rt6i_gateway;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1826 rt->rt6i_metric = 0;
1827
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828#ifdef CONFIG_IPV6_SUBTREES
1829 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1830#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001831 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001832 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 }
1834 return rt;
1835}
1836
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001837#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001838static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001839 const struct in6_addr *prefix, int prefixlen,
1840 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001841{
1842 struct fib6_node *fn;
1843 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001844 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001845
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001846 table = fib6_get_table(net, RT6_TABLE_INFO);
David S. Miller38308472011-12-03 18:02:47 -05001847 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001848 return NULL;
1849
1850 write_lock_bh(&table->tb6_lock);
1851 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001852 if (!fn)
1853 goto out;
1854
Changli Gaod8d1f302010-06-10 23:31:35 -07001855 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05001856 if (rt->dst.dev->ifindex != ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001857 continue;
1858 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1859 continue;
1860 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1861 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001862 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001863 break;
1864 }
1865out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001866 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001867 return rt;
1868}
1869
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001870static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001871 const struct in6_addr *prefix, int prefixlen,
1872 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001873 unsigned pref)
1874{
Thomas Graf86872cb2006-08-22 00:01:08 -07001875 struct fib6_config cfg = {
1876 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001877 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001878 .fc_ifindex = ifindex,
1879 .fc_dst_len = prefixlen,
1880 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1881 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001882 .fc_nlinfo.pid = 0,
1883 .fc_nlinfo.nlh = NULL,
1884 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001885 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001886
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001887 cfg.fc_dst = *prefix;
1888 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07001889
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001890 /* We should treat it as a default route if prefix length is 0. */
1891 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001892 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001893
Thomas Graf86872cb2006-08-22 00:01:08 -07001894 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001895
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001896 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001897}
1898#endif
1899
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001900struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001901{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001903 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001905 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
David S. Miller38308472011-12-03 18:02:47 -05001906 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001907 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908
Thomas Grafc71099a2006-08-04 23:20:06 -07001909 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001910 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
David S. Millerd1918542011-12-28 20:19:20 -05001911 if (dev == rt->dst.dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001912 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1914 break;
1915 }
1916 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001917 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001918 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 return rt;
1920}
1921
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001922struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001923 struct net_device *dev,
1924 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925{
Thomas Graf86872cb2006-08-22 00:01:08 -07001926 struct fib6_config cfg = {
1927 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001928 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001929 .fc_ifindex = dev->ifindex,
1930 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1931 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001932 .fc_nlinfo.pid = 0,
1933 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001934 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001935 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001937 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938
Thomas Graf86872cb2006-08-22 00:01:08 -07001939 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 return rt6_get_dflt_router(gwaddr, dev);
1942}
1943
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001944void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945{
1946 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001947 struct fib6_table *table;
1948
1949 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001950 table = fib6_get_table(net, RT6_TABLE_DFLT);
David S. Miller38308472011-12-03 18:02:47 -05001951 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07001952 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
1954restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001955 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001956 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001958 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001959 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001960 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 goto restart;
1962 }
1963 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001964 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965}
1966
Daniel Lezcano55786892008-03-04 13:47:47 -08001967static void rtmsg_to_fib6_config(struct net *net,
1968 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001969 struct fib6_config *cfg)
1970{
1971 memset(cfg, 0, sizeof(*cfg));
1972
1973 cfg->fc_table = RT6_TABLE_MAIN;
1974 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1975 cfg->fc_metric = rtmsg->rtmsg_metric;
1976 cfg->fc_expires = rtmsg->rtmsg_info;
1977 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1978 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1979 cfg->fc_flags = rtmsg->rtmsg_flags;
1980
Daniel Lezcano55786892008-03-04 13:47:47 -08001981 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001982
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001983 cfg->fc_dst = rtmsg->rtmsg_dst;
1984 cfg->fc_src = rtmsg->rtmsg_src;
1985 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07001986}
1987
Daniel Lezcano55786892008-03-04 13:47:47 -08001988int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989{
Thomas Graf86872cb2006-08-22 00:01:08 -07001990 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 struct in6_rtmsg rtmsg;
1992 int err;
1993
1994 switch(cmd) {
1995 case SIOCADDRT: /* Add a route */
1996 case SIOCDELRT: /* Delete a route */
1997 if (!capable(CAP_NET_ADMIN))
1998 return -EPERM;
1999 err = copy_from_user(&rtmsg, arg,
2000 sizeof(struct in6_rtmsg));
2001 if (err)
2002 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07002003
Daniel Lezcano55786892008-03-04 13:47:47 -08002004 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07002005
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 rtnl_lock();
2007 switch (cmd) {
2008 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07002009 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002010 break;
2011 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07002012 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 break;
2014 default:
2015 err = -EINVAL;
2016 }
2017 rtnl_unlock();
2018
2019 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07002020 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021
2022 return -EINVAL;
2023}
2024
2025/*
2026 * Drop the packet on the floor
2027 */
2028
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07002029static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002031 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00002032 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002033 switch (ipstats_mib_noroutes) {
2034 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07002035 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00002036 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002037 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2038 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002039 break;
2040 }
2041 /* FALLTHROUGH */
2042 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002043 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2044 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002045 break;
2046 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002047 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 kfree_skb(skb);
2049 return 0;
2050}
2051
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002052static int ip6_pkt_discard(struct sk_buff *skb)
2053{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002054 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002055}
2056
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002057static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058{
Eric Dumazetadf30902009-06-02 05:19:30 +00002059 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002060 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061}
2062
David S. Miller6723ab52006-10-18 21:20:57 -07002063#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2064
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002065static int ip6_pkt_prohibit(struct sk_buff *skb)
2066{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002067 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002068}
2069
2070static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2071{
Eric Dumazetadf30902009-06-02 05:19:30 +00002072 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002073 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002074}
2075
David S. Miller6723ab52006-10-18 21:20:57 -07002076#endif
2077
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078/*
2079 * Allocate a dst for local (unicast / anycast) address.
2080 */
2081
2082struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2083 const struct in6_addr *addr,
David S. Miller8f031512011-12-06 16:48:14 -05002084 bool anycast)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002086 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002087 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002088 net->loopback_dev, 0);
David S. Millerf83c7792011-12-28 15:41:23 -05002089 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090
David S. Miller38308472011-12-03 18:02:47 -05002091 if (!rt) {
Ben Greear40385652010-11-08 12:33:48 +00002092 if (net_ratelimit())
2093 pr_warning("IPv6: Maximum number of routes reached,"
2094 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002096 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 in6_dev_hold(idev);
2099
David S. Miller11d53b42011-06-24 15:23:34 -07002100 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002101 rt->dst.input = ip6_input;
2102 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002104 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105
2106 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002107 if (anycast)
2108 rt->rt6i_flags |= RTF_ANYCAST;
2109 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002110 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller8ade06c2011-12-29 18:51:57 -05002111 err = rt6_bind_neighbour(rt, rt->dst.dev);
David S. Millerf83c7792011-12-28 15:41:23 -05002112 if (err) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002113 dst_free(&rt->dst);
David S. Millerf83c7792011-12-28 15:41:23 -05002114 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 }
2116
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002117 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002119 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120
Changli Gaod8d1f302010-06-10 23:31:35 -07002121 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122
2123 return rt;
2124}
2125
Daniel Walterc3968a82011-04-13 21:10:57 +00002126int ip6_route_get_saddr(struct net *net,
2127 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002128 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002129 unsigned int prefs,
2130 struct in6_addr *saddr)
2131{
2132 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2133 int err = 0;
2134 if (rt->rt6i_prefsrc.plen)
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002135 *saddr = rt->rt6i_prefsrc.addr;
Daniel Walterc3968a82011-04-13 21:10:57 +00002136 else
2137 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2138 daddr, prefs, saddr);
2139 return err;
2140}
2141
2142/* remove deleted ip from prefsrc entries */
2143struct arg_dev_net_ip {
2144 struct net_device *dev;
2145 struct net *net;
2146 struct in6_addr *addr;
2147};
2148
2149static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2150{
2151 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2152 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2153 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2154
David S. Millerd1918542011-12-28 20:19:20 -05002155 if (((void *)rt->dst.dev == dev || !dev) &&
Daniel Walterc3968a82011-04-13 21:10:57 +00002156 rt != net->ipv6.ip6_null_entry &&
2157 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2158 /* remove prefsrc entry */
2159 rt->rt6i_prefsrc.plen = 0;
2160 }
2161 return 0;
2162}
2163
2164void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2165{
2166 struct net *net = dev_net(ifp->idev->dev);
2167 struct arg_dev_net_ip adni = {
2168 .dev = ifp->idev->dev,
2169 .net = net,
2170 .addr = &ifp->addr,
2171 };
2172 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2173}
2174
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002175struct arg_dev_net {
2176 struct net_device *dev;
2177 struct net *net;
2178};
2179
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180static int fib6_ifdown(struct rt6_info *rt, void *arg)
2181{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002182 const struct arg_dev_net *adn = arg;
2183 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002184
David S. Millerd1918542011-12-28 20:19:20 -05002185 if ((rt->dst.dev == dev || !dev) &&
David S. Millerc159d302011-12-26 15:24:36 -05002186 rt != adn->net->ipv6.ip6_null_entry)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187 return -1;
David S. Millerc159d302011-12-26 15:24:36 -05002188
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189 return 0;
2190}
2191
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002192void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002194 struct arg_dev_net adn = {
2195 .dev = dev,
2196 .net = net,
2197 };
2198
2199 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002200 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201}
2202
2203struct rt6_mtu_change_arg
2204{
2205 struct net_device *dev;
2206 unsigned mtu;
2207};
2208
2209static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2210{
2211 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2212 struct inet6_dev *idev;
2213
2214 /* In IPv6 pmtu discovery is not optional,
2215 so that RTAX_MTU lock cannot disable it.
2216 We still use this lock to block changes
2217 caused by addrconf/ndisc.
2218 */
2219
2220 idev = __in6_dev_get(arg->dev);
David S. Miller38308472011-12-03 18:02:47 -05002221 if (!idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 return 0;
2223
2224 /* For administrative MTU increase, there is no way to discover
2225 IPv6 PMTU increase, so PMTU increase should be updated here.
2226 Since RFC 1981 doesn't include administrative MTU increase
2227 update PMTU increase is a MUST. (i.e. jumbo frame)
2228 */
2229 /*
2230 If new MTU is less than route PMTU, this new MTU will be the
2231 lowest MTU in the path, update the route PMTU to reflect PMTU
2232 decreases; if new MTU is greater than route PMTU, and the
2233 old MTU is the lowest MTU in the path, update the route PMTU
2234 to reflect the increase. In this case if the other nodes' MTU
2235 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2236 PMTU discouvery.
2237 */
David S. Millerd1918542011-12-28 20:19:20 -05002238 if (rt->dst.dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002239 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2240 (dst_mtu(&rt->dst) >= arg->mtu ||
2241 (dst_mtu(&rt->dst) < arg->mtu &&
2242 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002243 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002244 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002245 return 0;
2246}
2247
2248void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2249{
Thomas Grafc71099a2006-08-04 23:20:06 -07002250 struct rt6_mtu_change_arg arg = {
2251 .dev = dev,
2252 .mtu = mtu,
2253 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002255 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256}
2257
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002258static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002259 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002260 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002261 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002262 [RTA_PRIORITY] = { .type = NLA_U32 },
2263 [RTA_METRICS] = { .type = NLA_NESTED },
2264};
2265
2266static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2267 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268{
Thomas Graf86872cb2006-08-22 00:01:08 -07002269 struct rtmsg *rtm;
2270 struct nlattr *tb[RTA_MAX+1];
2271 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272
Thomas Graf86872cb2006-08-22 00:01:08 -07002273 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2274 if (err < 0)
2275 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002276
Thomas Graf86872cb2006-08-22 00:01:08 -07002277 err = -EINVAL;
2278 rtm = nlmsg_data(nlh);
2279 memset(cfg, 0, sizeof(*cfg));
2280
2281 cfg->fc_table = rtm->rtm_table;
2282 cfg->fc_dst_len = rtm->rtm_dst_len;
2283 cfg->fc_src_len = rtm->rtm_src_len;
2284 cfg->fc_flags = RTF_UP;
2285 cfg->fc_protocol = rtm->rtm_protocol;
2286
2287 if (rtm->rtm_type == RTN_UNREACHABLE)
2288 cfg->fc_flags |= RTF_REJECT;
2289
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002290 if (rtm->rtm_type == RTN_LOCAL)
2291 cfg->fc_flags |= RTF_LOCAL;
2292
Thomas Graf86872cb2006-08-22 00:01:08 -07002293 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2294 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002295 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002296
2297 if (tb[RTA_GATEWAY]) {
2298 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2299 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002301
2302 if (tb[RTA_DST]) {
2303 int plen = (rtm->rtm_dst_len + 7) >> 3;
2304
2305 if (nla_len(tb[RTA_DST]) < plen)
2306 goto errout;
2307
2308 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002309 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002310
2311 if (tb[RTA_SRC]) {
2312 int plen = (rtm->rtm_src_len + 7) >> 3;
2313
2314 if (nla_len(tb[RTA_SRC]) < plen)
2315 goto errout;
2316
2317 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002319
Daniel Walterc3968a82011-04-13 21:10:57 +00002320 if (tb[RTA_PREFSRC])
2321 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2322
Thomas Graf86872cb2006-08-22 00:01:08 -07002323 if (tb[RTA_OIF])
2324 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2325
2326 if (tb[RTA_PRIORITY])
2327 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2328
2329 if (tb[RTA_METRICS]) {
2330 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2331 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002333
2334 if (tb[RTA_TABLE])
2335 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2336
2337 err = 0;
2338errout:
2339 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340}
2341
Thomas Grafc127ea22007-03-22 11:58:32 -07002342static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343{
Thomas Graf86872cb2006-08-22 00:01:08 -07002344 struct fib6_config cfg;
2345 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346
Thomas Graf86872cb2006-08-22 00:01:08 -07002347 err = rtm_to_fib6_config(skb, nlh, &cfg);
2348 if (err < 0)
2349 return err;
2350
2351 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352}
2353
Thomas Grafc127ea22007-03-22 11:58:32 -07002354static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355{
Thomas Graf86872cb2006-08-22 00:01:08 -07002356 struct fib6_config cfg;
2357 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358
Thomas Graf86872cb2006-08-22 00:01:08 -07002359 err = rtm_to_fib6_config(skb, nlh, &cfg);
2360 if (err < 0)
2361 return err;
2362
2363 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002364}
2365
Thomas Graf339bf982006-11-10 14:10:15 -08002366static inline size_t rt6_nlmsg_size(void)
2367{
2368 return NLMSG_ALIGN(sizeof(struct rtmsg))
2369 + nla_total_size(16) /* RTA_SRC */
2370 + nla_total_size(16) /* RTA_DST */
2371 + nla_total_size(16) /* RTA_GATEWAY */
2372 + nla_total_size(16) /* RTA_PREFSRC */
2373 + nla_total_size(4) /* RTA_TABLE */
2374 + nla_total_size(4) /* RTA_IIF */
2375 + nla_total_size(4) /* RTA_OIF */
2376 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002377 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002378 + nla_total_size(sizeof(struct rta_cacheinfo));
2379}
2380
Brian Haley191cd582008-08-14 15:33:21 -07002381static int rt6_fill_node(struct net *net,
2382 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002383 struct in6_addr *dst, struct in6_addr *src,
2384 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002385 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386{
David S. Miller346f8702011-12-29 15:22:33 -05002387 const struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002389 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002390 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002391 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002392 struct neighbour *n;
David S. Miller346f8702011-12-29 15:22:33 -05002393 u32 ts, tsage;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002394
2395 if (prefix) { /* user wants prefix routes only */
2396 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2397 /* success since this is not a prefix route */
2398 return 1;
2399 }
2400 }
2401
Thomas Graf2d7202b2006-08-22 00:01:27 -07002402 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
David S. Miller38308472011-12-03 18:02:47 -05002403 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002404 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002405
2406 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 rtm->rtm_family = AF_INET6;
2408 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2409 rtm->rtm_src_len = rt->rt6i_src.plen;
2410 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002411 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002412 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002413 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002414 table = RT6_TABLE_UNSPEC;
2415 rtm->rtm_table = table;
David S. Millerc78679e2012-04-01 20:27:33 -04002416 if (nla_put_u32(skb, RTA_TABLE, table))
2417 goto nla_put_failure;
David S. Miller38308472011-12-03 18:02:47 -05002418 if (rt->rt6i_flags & RTF_REJECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 rtm->rtm_type = RTN_UNREACHABLE;
David S. Miller38308472011-12-03 18:02:47 -05002420 else if (rt->rt6i_flags & RTF_LOCAL)
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002421 rtm->rtm_type = RTN_LOCAL;
David S. Millerd1918542011-12-28 20:19:20 -05002422 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 rtm->rtm_type = RTN_LOCAL;
2424 else
2425 rtm->rtm_type = RTN_UNICAST;
2426 rtm->rtm_flags = 0;
2427 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2428 rtm->rtm_protocol = rt->rt6i_protocol;
David S. Miller38308472011-12-03 18:02:47 -05002429 if (rt->rt6i_flags & RTF_DYNAMIC)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430 rtm->rtm_protocol = RTPROT_REDIRECT;
2431 else if (rt->rt6i_flags & RTF_ADDRCONF)
2432 rtm->rtm_protocol = RTPROT_KERNEL;
David S. Miller38308472011-12-03 18:02:47 -05002433 else if (rt->rt6i_flags & RTF_DEFAULT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002434 rtm->rtm_protocol = RTPROT_RA;
2435
David S. Miller38308472011-12-03 18:02:47 -05002436 if (rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 rtm->rtm_flags |= RTM_F_CLONED;
2438
2439 if (dst) {
David S. Millerc78679e2012-04-01 20:27:33 -04002440 if (nla_put(skb, RTA_DST, 16, dst))
2441 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002442 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443 } else if (rtm->rtm_dst_len)
David S. Millerc78679e2012-04-01 20:27:33 -04002444 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2445 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446#ifdef CONFIG_IPV6_SUBTREES
2447 if (src) {
David S. Millerc78679e2012-04-01 20:27:33 -04002448 if (nla_put(skb, RTA_SRC, 16, src))
2449 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002450 rtm->rtm_src_len = 128;
David S. Millerc78679e2012-04-01 20:27:33 -04002451 } else if (rtm->rtm_src_len &&
2452 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2453 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002454#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002455 if (iif) {
2456#ifdef CONFIG_IPV6_MROUTE
2457 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002458 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002459 if (err <= 0) {
2460 if (!nowait) {
2461 if (err == 0)
2462 return 0;
2463 goto nla_put_failure;
2464 } else {
2465 if (err == -EMSGSIZE)
2466 goto nla_put_failure;
2467 }
2468 }
2469 } else
2470#endif
David S. Millerc78679e2012-04-01 20:27:33 -04002471 if (nla_put_u32(skb, RTA_IIF, iif))
2472 goto nla_put_failure;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002473 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 struct in6_addr saddr_buf;
David S. Millerc78679e2012-04-01 20:27:33 -04002475 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2476 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2477 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002479
Daniel Walterc3968a82011-04-13 21:10:57 +00002480 if (rt->rt6i_prefsrc.plen) {
2481 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002482 saddr_buf = rt->rt6i_prefsrc.addr;
David S. Millerc78679e2012-04-01 20:27:33 -04002483 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2484 goto nla_put_failure;
Daniel Walterc3968a82011-04-13 21:10:57 +00002485 }
2486
David S. Millerdefb3512010-12-08 21:16:57 -08002487 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002488 goto nla_put_failure;
2489
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002490 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +00002491 n = dst_get_neighbour_noref(&rt->dst);
Eric Dumazet94f826b2012-03-27 09:53:52 +00002492 if (n) {
2493 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2494 rcu_read_unlock();
2495 goto nla_put_failure;
2496 }
2497 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002498 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002499
David S. Millerc78679e2012-04-01 20:27:33 -04002500 if (rt->dst.dev &&
2501 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2502 goto nla_put_failure;
2503 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2504 goto nla_put_failure;
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002505 if (!(rt->rt6i_flags & RTF_EXPIRES))
2506 expires = 0;
David S. Millerd1918542011-12-28 20:19:20 -05002507 else if (rt->dst.expires - jiffies < INT_MAX)
2508 expires = rt->dst.expires - jiffies;
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002509 else
2510 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002511
David S. Miller346f8702011-12-29 15:22:33 -05002512 peer = rt->rt6i_peer;
2513 ts = tsage = 0;
2514 if (peer && peer->tcp_ts_stamp) {
2515 ts = peer->tcp_ts;
2516 tsage = get_seconds() - peer->tcp_ts_stamp;
2517 }
2518
2519 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
Changli Gaod8d1f302010-06-10 23:31:35 -07002520 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002521 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522
Thomas Graf2d7202b2006-08-22 00:01:27 -07002523 return nlmsg_end(skb, nlh);
2524
2525nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002526 nlmsg_cancel(skb, nlh);
2527 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528}
2529
Patrick McHardy1b43af52006-08-10 23:11:17 -07002530int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531{
2532 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2533 int prefix;
2534
Thomas Graf2d7202b2006-08-22 00:01:27 -07002535 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2536 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2538 } else
2539 prefix = 0;
2540
Brian Haley191cd582008-08-14 15:33:21 -07002541 return rt6_fill_node(arg->net,
2542 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002544 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545}
2546
Thomas Grafc127ea22007-03-22 11:58:32 -07002547static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002549 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002550 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002552 struct sk_buff *skb;
2553 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002554 struct flowi6 fl6;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002555 int err, iif = 0, oif = 0;
Thomas Grafab364a62006-08-22 00:01:47 -07002556
2557 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2558 if (err < 0)
2559 goto errout;
2560
2561 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002562 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002563
2564 if (tb[RTA_SRC]) {
2565 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2566 goto errout;
2567
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002568 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07002569 }
2570
2571 if (tb[RTA_DST]) {
2572 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2573 goto errout;
2574
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002575 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07002576 }
2577
2578 if (tb[RTA_IIF])
2579 iif = nla_get_u32(tb[RTA_IIF]);
2580
2581 if (tb[RTA_OIF])
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002582 oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002583
2584 if (iif) {
2585 struct net_device *dev;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002586 int flags = 0;
2587
Daniel Lezcano55786892008-03-04 13:47:47 -08002588 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002589 if (!dev) {
2590 err = -ENODEV;
2591 goto errout;
2592 }
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00002593
2594 fl6.flowi6_iif = iif;
2595
2596 if (!ipv6_addr_any(&fl6.saddr))
2597 flags |= RT6_LOOKUP_F_HAS_SADDR;
2598
2599 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2600 flags);
2601 } else {
2602 fl6.flowi6_oif = oif;
2603
2604 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
Thomas Grafab364a62006-08-22 00:01:47 -07002605 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606
2607 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
David S. Miller38308472011-12-03 18:02:47 -05002608 if (!skb) {
Shmulik Ladkani2173bff2012-04-03 23:13:00 +00002609 dst_release(&rt->dst);
Thomas Grafab364a62006-08-22 00:01:47 -07002610 err = -ENOBUFS;
2611 goto errout;
2612 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613
2614 /* Reserve room for dummy headers, this skb can pass
2615 through good chunk of routing engine.
2616 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002617 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002618 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2619
Changli Gaod8d1f302010-06-10 23:31:35 -07002620 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621
David S. Miller4c9483b2011-03-12 16:22:43 -05002622 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002624 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002625 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002626 kfree_skb(skb);
2627 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628 }
2629
Daniel Lezcano55786892008-03-04 13:47:47 -08002630 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002631errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002632 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633}
2634
Thomas Graf86872cb2006-08-22 00:01:08 -07002635void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636{
2637 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002638 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002639 u32 seq;
2640 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002642 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05002643 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002644
Thomas Graf339bf982006-11-10 14:10:15 -08002645 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
David S. Miller38308472011-12-03 18:02:47 -05002646 if (!skb)
Thomas Graf21713eb2006-08-15 00:35:24 -07002647 goto errout;
2648
Brian Haley191cd582008-08-14 15:33:21 -07002649 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002650 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002651 if (err < 0) {
2652 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2653 WARN_ON(err == -EMSGSIZE);
2654 kfree_skb(skb);
2655 goto errout;
2656 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002657 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2658 info->nlh, gfp_any());
2659 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002660errout:
2661 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002662 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002663}
2664
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002665static int ip6_route_dev_notify(struct notifier_block *this,
2666 unsigned long event, void *data)
2667{
2668 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002669 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002670
2671 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002672 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002673 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2674#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002675 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002676 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002677 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002678 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2679#endif
2680 }
2681
2682 return NOTIFY_OK;
2683}
2684
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685/*
2686 * /proc
2687 */
2688
2689#ifdef CONFIG_PROC_FS
2690
Linus Torvalds1da177e2005-04-16 15:20:36 -07002691struct rt6_proc_arg
2692{
2693 char *buffer;
2694 int offset;
2695 int length;
2696 int skip;
2697 int len;
2698};
2699
2700static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2701{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002702 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002703 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002704
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002705 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002706
2707#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002708 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002709#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002710 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002711#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002712 rcu_read_lock();
David Miller27217452011-12-02 16:52:08 +00002713 n = dst_get_neighbour_noref(&rt->dst);
David S. Miller69cce1d2011-07-17 23:09:49 -07002714 if (n) {
2715 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002717 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002719 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002720 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002721 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2722 rt->dst.__use, rt->rt6i_flags,
David S. Millerd1918542011-12-28 20:19:20 -05002723 rt->dst.dev ? rt->dst.dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724 return 0;
2725}
2726
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002727static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002729 struct net *net = (struct net *)m->private;
Josh Hunt32b293a2011-12-28 13:23:07 +00002730 fib6_clean_all_ro(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002731 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002732}
2733
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002734static int ipv6_route_open(struct inode *inode, struct file *file)
2735{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002736 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002737}
2738
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002739static const struct file_operations ipv6_route_proc_fops = {
2740 .owner = THIS_MODULE,
2741 .open = ipv6_route_open,
2742 .read = seq_read,
2743 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002744 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002745};
2746
Linus Torvalds1da177e2005-04-16 15:20:36 -07002747static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2748{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002749 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002751 net->ipv6.rt6_stats->fib_nodes,
2752 net->ipv6.rt6_stats->fib_route_nodes,
2753 net->ipv6.rt6_stats->fib_rt_alloc,
2754 net->ipv6.rt6_stats->fib_rt_entries,
2755 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002756 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002757 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002758
2759 return 0;
2760}
2761
2762static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2763{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002764 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002765}
2766
Arjan van de Ven9a321442007-02-12 00:55:35 -08002767static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002768 .owner = THIS_MODULE,
2769 .open = rt6_stats_seq_open,
2770 .read = seq_read,
2771 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002772 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773};
2774#endif /* CONFIG_PROC_FS */
2775
2776#ifdef CONFIG_SYSCTL
2777
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002779int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 void __user *buffer, size_t *lenp, loff_t *ppos)
2781{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002782 struct net *net;
2783 int delay;
2784 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002786
2787 net = (struct net *)ctl->extra1;
2788 delay = net->ipv6.sysctl.flush_delay;
2789 proc_dointvec(ctl, write, buffer, lenp, ppos);
2790 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2791 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792}
2793
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002794ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002795 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002796 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002797 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002799 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002800 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002801 },
2802 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002803 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002804 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002805 .maxlen = sizeof(int),
2806 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002807 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002808 },
2809 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002810 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002811 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002812 .maxlen = sizeof(int),
2813 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002814 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002815 },
2816 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002817 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002818 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002819 .maxlen = sizeof(int),
2820 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002821 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002822 },
2823 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002824 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002825 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002826 .maxlen = sizeof(int),
2827 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002828 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002829 },
2830 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002831 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002832 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002833 .maxlen = sizeof(int),
2834 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002835 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002836 },
2837 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002838 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002839 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002840 .maxlen = sizeof(int),
2841 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002842 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002843 },
2844 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002845 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002846 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002847 .maxlen = sizeof(int),
2848 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002849 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002850 },
2851 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002852 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002853 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002854 .maxlen = sizeof(int),
2855 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002856 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002857 },
2858 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002859 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002860 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002861 .maxlen = sizeof(int),
2862 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002863 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002864 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002865 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002866};
2867
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002868struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002869{
2870 struct ctl_table *table;
2871
2872 table = kmemdup(ipv6_route_table_template,
2873 sizeof(ipv6_route_table_template),
2874 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002875
2876 if (table) {
2877 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002878 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002879 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002880 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2881 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2882 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2883 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2884 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2885 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2886 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002887 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002888 }
2889
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002890 return table;
2891}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002892#endif
2893
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002894static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002895{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07002896 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002897
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002898 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2899 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002900
Eric Dumazetfc66f952010-10-08 06:37:34 +00002901 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2902 goto out_ip6_dst_ops;
2903
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002904 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2905 sizeof(*net->ipv6.ip6_null_entry),
2906 GFP_KERNEL);
2907 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002908 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002909 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002910 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002911 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002912 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2913 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002914
2915#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2916 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2917 sizeof(*net->ipv6.ip6_prohibit_entry),
2918 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002919 if (!net->ipv6.ip6_prohibit_entry)
2920 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002921 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002922 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002923 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002924 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2925 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002926
2927 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2928 sizeof(*net->ipv6.ip6_blk_hole_entry),
2929 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002930 if (!net->ipv6.ip6_blk_hole_entry)
2931 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002932 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002933 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002934 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002935 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2936 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002937#endif
2938
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002939 net->ipv6.sysctl.flush_delay = 0;
2940 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2941 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2942 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2943 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2944 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2945 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2946 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2947
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002948#ifdef CONFIG_PROC_FS
2949 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2950 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2951#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002952 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2953
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002954 ret = 0;
2955out:
2956 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002957
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002958#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2959out_ip6_prohibit_entry:
2960 kfree(net->ipv6.ip6_prohibit_entry);
2961out_ip6_null_entry:
2962 kfree(net->ipv6.ip6_null_entry);
2963#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002964out_ip6_dst_entries:
2965 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002966out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002967 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002968}
2969
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002970static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002971{
2972#ifdef CONFIG_PROC_FS
2973 proc_net_remove(net, "ipv6_route");
2974 proc_net_remove(net, "rt6_stats");
2975#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002976 kfree(net->ipv6.ip6_null_entry);
2977#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2978 kfree(net->ipv6.ip6_prohibit_entry);
2979 kfree(net->ipv6.ip6_blk_hole_entry);
2980#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002981 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002982}
2983
2984static struct pernet_operations ip6_route_net_ops = {
2985 .init = ip6_route_net_init,
2986 .exit = ip6_route_net_exit,
2987};
2988
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002989static struct notifier_block ip6_route_dev_notifier = {
2990 .notifier_call = ip6_route_dev_notify,
2991 .priority = 0,
2992};
2993
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002994int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002996 int ret;
2997
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002998 ret = -ENOMEM;
2999 ip6_dst_ops_template.kmem_cachep =
3000 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3001 SLAB_HWCACHE_ALIGN, NULL);
3002 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08003003 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07003004
Eric Dumazetfc66f952010-10-08 06:37:34 +00003005 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003006 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08003007 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08003008
Eric Dumazetfc66f952010-10-08 06:37:34 +00003009 ret = register_pernet_subsys(&ip6_route_net_ops);
3010 if (ret)
3011 goto out_dst_entries;
3012
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07003013 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3014
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003015 /* Registering of the loopback is done before this portion of code,
3016 * the loopback reference in rt6_info will not be taken, do it
3017 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07003018 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003019 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3020 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07003021 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003022 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07003023 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003024 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3025 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003026 ret = fib6_init();
3027 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003028 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003029
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003030 ret = xfrm6_init();
3031 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08003032 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08003033
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003034 ret = fib6_rules_init();
3035 if (ret)
3036 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08003037
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003038 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00003039 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3040 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3041 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003042 goto fib6_rules_init;
3043
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003044 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08003045 if (ret)
3046 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003047
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003048out:
3049 return ret;
3050
3051fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003052 fib6_rules_cleanup();
3053xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003054 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003055out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003056 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003057out_register_subsys:
3058 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00003059out_dst_entries:
3060 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003061out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003062 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08003063 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003064}
3065
3066void ip6_route_cleanup(void)
3067{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003068 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07003069 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003070 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003071 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003072 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00003073 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003074 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003075}