blob: 56a854b426a6f9d36cbf2472af9fd1c6c5def2fa [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Joe Perchesf3213832012-05-15 14:11:53 +000027#define pr_fmt(fmt) "IPv6: " fmt
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040031#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080045#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090046#include <linux/slab.h>
Wei Wang35732d02017-10-06 12:05:57 -070047#include <linux/jhash.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020048#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
Jiri Benc904af042015-08-20 13:56:31 +020058#include <net/dst_metadata.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070060#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070061#include <net/netlink.h>
Nicolas Dichtel51ebd312012-10-22 03:42:09 +000062#include <net/nexthop.h>
Roopa Prabhu19e42e42015-07-21 10:43:48 +020063#include <net/lwtunnel.h>
Jiri Benc904af042015-08-20 13:56:31 +020064#include <net/ip_tunnels.h>
David Ahernca254492015-10-12 11:47:10 -070065#include <net/l3mdev.h>
David Ahernb8115802015-11-19 12:24:22 -080066#include <trace/events/fib6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +020074enum rt6_nud_state {
Jiri Benc7e980562013-12-11 13:48:20 +010075 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +020078 RT6_NUD_SUCCEED = 1
79};
80
Linus Torvalds1da177e2005-04-16 15:20:36 -070081static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080082static unsigned int ip6_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +000083static unsigned int ip6_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080088static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070089
90static int ip6_pkt_discard(struct sk_buff *skb);
Eric W. Biedermanede20592015-10-07 16:48:47 -050091static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
Kamala R7150aed2013-12-02 19:55:21 +053092static int ip6_pkt_prohibit(struct sk_buff *skb);
Eric W. Biedermanede20592015-10-07 16:48:47 -050093static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -070094static void ip6_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -070095static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +020099static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
David Ahern16a16cd2017-02-02 12:37:11 -0800100static size_t rt6_nlmsg_size(struct rt6_info *rt);
David Ahernd4ead6b2018-04-17 17:33:16 -0700101static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102 struct rt6_info *rt, struct dst_entry *dst,
103 struct in6_addr *dest, struct in6_addr *src,
David Ahern16a16cd2017-02-02 12:37:11 -0800104 int iif, int type, u32 portid, u32 seq,
105 unsigned int flags);
Wei Wang35732d02017-10-06 12:05:57 -0700106static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
107 struct in6_addr *daddr,
108 struct in6_addr *saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800110#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800111static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000112 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -0700113 const struct in6_addr *gwaddr,
114 struct net_device *dev,
Eric Dumazet95c96172012-04-15 05:58:06 +0000115 unsigned int pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800116static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000117 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -0700118 const struct in6_addr *gwaddr,
119 struct net_device *dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800120#endif
121
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700122struct uncached_list {
123 spinlock_t lock;
124 struct list_head head;
125};
126
127static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
128
Xin Long510c3212018-02-14 19:06:02 +0800129void rt6_uncached_list_add(struct rt6_info *rt)
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700130{
131 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
132
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700133 rt->rt6i_uncached_list = ul;
134
135 spin_lock_bh(&ul->lock);
136 list_add_tail(&rt->rt6i_uncached, &ul->head);
137 spin_unlock_bh(&ul->lock);
138}
139
Xin Long510c3212018-02-14 19:06:02 +0800140void rt6_uncached_list_del(struct rt6_info *rt)
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700141{
142 if (!list_empty(&rt->rt6i_uncached)) {
143 struct uncached_list *ul = rt->rt6i_uncached_list;
Wei Wang81eb8442017-10-06 12:06:11 -0700144 struct net *net = dev_net(rt->dst.dev);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700145
146 spin_lock_bh(&ul->lock);
147 list_del(&rt->rt6i_uncached);
Wei Wang81eb8442017-10-06 12:06:11 -0700148 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700149 spin_unlock_bh(&ul->lock);
150 }
151}
152
153static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
154{
155 struct net_device *loopback_dev = net->loopback_dev;
156 int cpu;
157
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500158 if (dev == loopback_dev)
159 return;
160
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700161 for_each_possible_cpu(cpu) {
162 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
163 struct rt6_info *rt;
164
165 spin_lock_bh(&ul->lock);
166 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
167 struct inet6_dev *rt_idev = rt->rt6i_idev;
168 struct net_device *rt_dev = rt->dst.dev;
169
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500170 if (rt_idev->dev == dev) {
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700171 rt->rt6i_idev = in6_dev_get(loopback_dev);
172 in6_dev_put(rt_idev);
173 }
174
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500175 if (rt_dev == dev) {
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700176 rt->dst.dev = loopback_dev;
177 dev_hold(rt->dst.dev);
178 dev_put(rt_dev);
179 }
180 }
181 spin_unlock_bh(&ul->lock);
182 }
183}
184
David Ahernf8a1b432018-04-17 17:33:21 -0700185static inline const void *choose_neigh_daddr(const struct in6_addr *p,
David S. Millerf894cbf2012-07-02 21:52:24 -0700186 struct sk_buff *skb,
187 const void *daddr)
David S. Miller39232972012-01-26 15:22:32 -0500188{
David S. Millera7563f32012-01-26 16:29:16 -0500189 if (!ipv6_addr_any(p))
David S. Miller39232972012-01-26 15:22:32 -0500190 return (const void *) p;
David S. Millerf894cbf2012-07-02 21:52:24 -0700191 else if (skb)
192 return &ipv6_hdr(skb)->daddr;
David S. Miller39232972012-01-26 15:22:32 -0500193 return daddr;
194}
195
David Ahernf8a1b432018-04-17 17:33:21 -0700196struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197 struct net_device *dev,
198 struct sk_buff *skb,
199 const void *daddr)
David S. Millerd3aaeb32011-07-18 00:40:17 -0700200{
David S. Miller39232972012-01-26 15:22:32 -0500201 struct neighbour *n;
202
David Ahernf8a1b432018-04-17 17:33:21 -0700203 daddr = choose_neigh_daddr(gw, skb, daddr);
204 n = __ipv6_neigh_lookup(dev, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500205 if (n)
206 return n;
David Ahernf8a1b432018-04-17 17:33:21 -0700207 return neigh_create(&nd_tbl, daddr, dev);
208}
209
210static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211 struct sk_buff *skb,
212 const void *daddr)
213{
214 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215
216 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500217}
218
Julian Anastasov63fca652017-02-06 23:14:15 +0200219static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
220{
221 struct net_device *dev = dst->dev;
222 struct rt6_info *rt = (struct rt6_info *)dst;
223
David Ahernf8a1b432018-04-17 17:33:21 -0700224 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
Julian Anastasov63fca652017-02-06 23:14:15 +0200225 if (!daddr)
226 return;
227 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
228 return;
229 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
230 return;
231 __ipv6_confirm_neigh(dev, daddr);
232}
233
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800234static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 .family = AF_INET6,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236 .gc = ip6_dst_gc,
237 .gc_thresh = 1024,
238 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800239 .default_advmss = ip6_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000240 .mtu = ip6_mtu,
David Ahernd4ead6b2018-04-17 17:33:16 -0700241 .cow_metrics = dst_cow_metrics_generic,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .destroy = ip6_dst_destroy,
243 .ifdown = ip6_dst_ifdown,
244 .negative_advice = ip6_negative_advice,
245 .link_failure = ip6_link_failure,
246 .update_pmtu = ip6_rt_update_pmtu,
David S. Miller6e157b62012-07-12 00:05:02 -0700247 .redirect = rt6_do_redirect,
Eric W. Biederman9f8955c2015-10-07 16:48:39 -0500248 .local_out = __ip6_local_out,
David Ahernf8a1b432018-04-17 17:33:21 -0700249 .neigh_lookup = ip6_dst_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200250 .confirm_neigh = ip6_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251};
252
Steffen Klassertebb762f2011-11-23 02:12:51 +0000253static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -0800254{
Steffen Klassert618f9bc2011-11-23 02:13:31 +0000255 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256
257 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -0800258}
259
David S. Miller6700c272012-07-17 03:29:28 -0700260static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
261 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -0700262{
263}
264
David S. Miller6700c272012-07-17 03:29:28 -0700265static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
266 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -0700267{
268}
269
David S. Miller14e50e52007-05-24 18:17:54 -0700270static struct dst_ops ip6_dst_blackhole_ops = {
271 .family = AF_INET6,
David S. Miller14e50e52007-05-24 18:17:54 -0700272 .destroy = ip6_dst_destroy,
273 .check = ip6_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000274 .mtu = ip6_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800275 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700276 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -0700277 .redirect = ip6_rt_blackhole_redirect,
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -0700278 .cow_metrics = dst_cow_metrics_generic,
David Ahernf8a1b432018-04-17 17:33:21 -0700279 .neigh_lookup = ip6_dst_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700280};
281
David S. Miller62fa8a82011-01-26 20:51:05 -0800282static const u32 ip6_template_metrics[RTAX_MAX] = {
Li RongQing14edd872012-10-24 14:01:18 +0800283 [RTAX_HOPLIMIT - 1] = 0,
David S. Miller62fa8a82011-01-26 20:51:05 -0800284};
285
David Ahern421842e2018-04-17 17:33:18 -0700286static const struct rt6_info fib6_null_entry_template = {
287 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
288 .rt6i_protocol = RTPROT_KERNEL,
289 .rt6i_metric = ~(u32)0,
290 .rt6i_ref = ATOMIC_INIT(1),
291 .fib6_type = RTN_UNREACHABLE,
292 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
293};
294
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000295static const struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700296 .dst = {
297 .__refcnt = ATOMIC_INIT(1),
298 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000299 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700300 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700301 .input = ip6_pkt_discard,
302 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 },
304 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700305 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 .rt6i_metric = ~(u32) 0,
307 .rt6i_ref = ATOMIC_INIT(1),
David Aherne8478e82018-04-17 17:33:13 -0700308 .fib6_type = RTN_UNREACHABLE,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309};
310
Thomas Graf101367c2006-08-04 03:39:02 -0700311#ifdef CONFIG_IPV6_MULTIPLE_TABLES
312
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000313static const struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700314 .dst = {
315 .__refcnt = ATOMIC_INIT(1),
316 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000317 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700318 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700319 .input = ip6_pkt_prohibit,
320 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700321 },
322 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700323 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700324 .rt6i_metric = ~(u32) 0,
325 .rt6i_ref = ATOMIC_INIT(1),
David Aherne8478e82018-04-17 17:33:13 -0700326 .fib6_type = RTN_PROHIBIT,
Thomas Graf101367c2006-08-04 03:39:02 -0700327};
328
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000329static const struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700330 .dst = {
331 .__refcnt = ATOMIC_INIT(1),
332 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000333 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700334 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700335 .input = dst_discard,
Eric W. Biedermanede20592015-10-07 16:48:47 -0500336 .output = dst_discard_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700337 },
338 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700339 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700340 .rt6i_metric = ~(u32) 0,
341 .rt6i_ref = ATOMIC_INIT(1),
David Aherne8478e82018-04-17 17:33:13 -0700342 .fib6_type = RTN_BLACKHOLE,
Thomas Graf101367c2006-08-04 03:39:02 -0700343};
344
345#endif
346
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
David Ahernd4ead6b2018-04-17 17:33:16 -0700354 rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700355}
356
Linus Torvalds1da177e2005-04-16 15:20:36 -0700357/* allocate dst with ip6_dst_ops */
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700358static struct rt6_info *__ip6_dst_alloc(struct net *net,
359 struct net_device *dev,
Martin KaFai Lauad706862015-08-14 11:05:52 -0700360 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361{
David S. Miller97bab732012-06-09 22:36:36 -0700362 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
Wei Wangb2a9c0e2017-06-17 10:42:41 -0700363 1, DST_OBSOLETE_FORCE_CHK, flags);
David S. Millercf911662011-04-28 14:31:47 -0700364
Wei Wang81eb8442017-10-06 12:06:11 -0700365 if (rt) {
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700366 rt6_info_init(rt);
Wei Wang81eb8442017-10-06 12:06:11 -0700367 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
368 }
Steffen Klassert81048912012-07-05 23:37:09 +0000369
David S. Millercf911662011-04-28 14:31:47 -0700370 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371}
372
David Ahern9ab179d2016-04-07 11:10:06 -0700373struct rt6_info *ip6_dst_alloc(struct net *net,
374 struct net_device *dev,
375 int flags)
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700376{
Martin KaFai Lauad706862015-08-14 11:05:52 -0700377 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700378
379 if (rt) {
380 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
Eric Dumazetbfd8e5a2017-10-09 06:01:37 -0700381 if (!rt->rt6i_pcpu) {
Wei Wang587fea72017-06-17 10:42:36 -0700382 dst_release_immediate(&rt->dst);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700383 return NULL;
384 }
385 }
386
387 return rt;
388}
David Ahern9ab179d2016-04-07 11:10:06 -0700389EXPORT_SYMBOL(ip6_dst_alloc);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700390
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391static void ip6_dst_destroy(struct dst_entry *dst)
392{
393 struct rt6_info *rt = (struct rt6_info *)dst;
Wei Wang35732d02017-10-06 12:05:57 -0700394 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -0500395 struct rt6_info *from = rt->from;
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700396 struct inet6_dev *idev;
David Ahernd4ead6b2018-04-17 17:33:16 -0700397 struct dst_metrics *m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700399 dst_destroy_metrics_generic(dst);
Markus Elfring87775312015-07-02 16:30:24 +0200400 free_percpu(rt->rt6i_pcpu);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700401 rt6_uncached_list_del(rt);
402
403 idev = rt->rt6i_idev;
David S. Miller38308472011-12-03 18:02:47 -0500404 if (idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 rt->rt6i_idev = NULL;
406 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900407 }
Wei Wang35732d02017-10-06 12:05:57 -0700408 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
409 if (bucket) {
410 rt->rt6i_exception_bucket = NULL;
411 kfree(bucket);
412 }
Gao feng1716a962012-04-06 00:13:10 +0000413
David Ahernd4ead6b2018-04-17 17:33:16 -0700414 m = rt->fib6_metrics;
415 if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
416 kfree(m);
417
David Miller3a2232e2017-11-28 15:40:40 -0500418 rt->from = NULL;
419 dst_release(&from->dst);
David S. Millerb3419362010-11-30 12:27:11 -0800420}
421
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
423 int how)
424{
425 struct rt6_info *rt = (struct rt6_info *)dst;
426 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800427 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900428 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
Wei Wange5645f52017-08-14 10:44:59 -0700430 if (idev && idev->dev != loopback_dev) {
431 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
432 if (loopback_idev) {
433 rt->rt6i_idev = loopback_idev;
434 in6_dev_put(idev);
David S. Miller97cac082012-07-02 22:43:47 -0700435 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 }
437}
438
Martin KaFai Lau5973fb12015-11-11 11:51:07 -0800439static bool __rt6_check_expired(const struct rt6_info *rt)
440{
441 if (rt->rt6i_flags & RTF_EXPIRES)
442 return time_after(jiffies, rt->dst.expires);
443 else
444 return false;
445}
446
Eric Dumazeta50feda2012-05-18 18:57:34 +0000447static bool rt6_check_expired(const struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448{
Gao feng1716a962012-04-06 00:13:10 +0000449 if (rt->rt6i_flags & RTF_EXPIRES) {
450 if (time_after(jiffies, rt->dst.expires))
Eric Dumazeta50feda2012-05-18 18:57:34 +0000451 return true;
David Miller3a2232e2017-11-28 15:40:40 -0500452 } else if (rt->from) {
Xin Long1e2ea8a2017-08-26 20:10:10 +0800453 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
David Ahern14895682018-04-17 17:33:17 -0700454 fib6_check_expired(rt->from);
Gao feng1716a962012-04-06 00:13:10 +0000455 }
Eric Dumazeta50feda2012-05-18 18:57:34 +0000456 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457}
458
David Ahernb4bac172018-03-02 08:32:18 -0800459static struct rt6_info *rt6_multipath_select(const struct net *net,
460 struct rt6_info *match,
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200461 struct flowi6 *fl6, int oif,
David Ahernb75cc8f2018-03-02 08:32:17 -0800462 const struct sk_buff *skb,
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200463 int strict)
Nicolas Dichtel51ebd312012-10-22 03:42:09 +0000464{
465 struct rt6_info *sibling, *next_sibling;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +0000466
Jakub Sitnickib673d6c2017-08-23 09:58:31 +0200467 /* We might have already computed the hash for ICMPv6 errors. In such
468 * case it will always be non-zero. Otherwise now is the time to do it.
469 */
470 if (!fl6->mp_hash)
David Ahernb4bac172018-03-02 08:32:18 -0800471 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
Jakub Sitnickib673d6c2017-08-23 09:58:31 +0200472
David Ahern5e670d82018-04-17 17:33:14 -0700473 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
Ido Schimmel3d709f62018-01-09 16:40:27 +0200474 return match;
Ido Schimmelbbfcd772017-11-21 09:50:12 +0200475
Ido Schimmel3d709f62018-01-09 16:40:27 +0200476 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
477 rt6i_siblings) {
David Ahern5e670d82018-04-17 17:33:14 -0700478 int nh_upper_bound;
479
480 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
481 if (fl6->mp_hash > nh_upper_bound)
Ido Schimmel3d709f62018-01-09 16:40:27 +0200482 continue;
483 if (rt6_score_route(sibling, oif, strict) < 0)
484 break;
485 match = sibling;
486 break;
487 }
488
Nicolas Dichtel51ebd312012-10-22 03:42:09 +0000489 return match;
490}
491
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492/*
Wei Wang66f5d6c2017-10-06 12:06:10 -0700493 * Route lookup. rcu_read_lock() should be held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494 */
495
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800496static inline struct rt6_info *rt6_device_match(struct net *net,
497 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000498 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700500 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501{
502 struct rt6_info *local = NULL;
503 struct rt6_info *sprt;
504
David Ahern5e670d82018-04-17 17:33:14 -0700505 if (!oif && ipv6_addr_any(saddr) &&
506 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
Ido Schimmel8067bb82018-01-07 12:45:09 +0200507 return rt;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900508
David Miller071fb372017-11-28 15:40:15 -0500509 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
David Ahern5e670d82018-04-17 17:33:14 -0700510 const struct net_device *dev = sprt->fib6_nh.nh_dev;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900511
David Ahern5e670d82018-04-17 17:33:14 -0700512 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel8067bb82018-01-07 12:45:09 +0200513 continue;
514
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900515 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 if (dev->ifindex == oif)
517 return sprt;
518 if (dev->flags & IFF_LOOPBACK) {
David S. Miller38308472011-12-03 18:02:47 -0500519 if (!sprt->rt6i_idev ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 sprt->rt6i_idev->dev->ifindex != oif) {
David Ahern17fb0b22015-09-25 15:22:54 -0600521 if (flags & RT6_LOOKUP_F_IFACE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 continue;
David Ahern17fb0b22015-09-25 15:22:54 -0600523 if (local &&
524 local->rt6i_idev->dev->ifindex == oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 continue;
526 }
527 local = sprt;
528 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900529 } else {
530 if (ipv6_chk_addr(net, saddr, dev,
531 flags & RT6_LOOKUP_F_IFACE))
532 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900534 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900536 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 if (local)
538 return local;
539
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700540 if (flags & RT6_LOOKUP_F_IFACE)
David Ahern421842e2018-04-17 17:33:18 -0700541 return net->ipv6.fib6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 }
Ido Schimmel8067bb82018-01-07 12:45:09 +0200543
David Ahern421842e2018-04-17 17:33:18 -0700544 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545}
546
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800547#ifdef CONFIG_IPV6_ROUTER_PREF
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200548struct __rt6_probe_work {
549 struct work_struct work;
550 struct in6_addr target;
551 struct net_device *dev;
552};
553
554static void rt6_probe_deferred(struct work_struct *w)
555{
556 struct in6_addr mcaddr;
557 struct __rt6_probe_work *work =
558 container_of(w, struct __rt6_probe_work, work);
559
560 addrconf_addr_solict_mult(&work->target, &mcaddr);
Erik Nordmarkadc176c2016-12-02 14:00:08 -0800561 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200562 dev_put(work->dev);
Michael Büsch662f5532015-02-08 10:14:07 +0100563 kfree(work);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200564}
565
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800566static void rt6_probe(struct rt6_info *rt)
567{
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700568 struct __rt6_probe_work *work;
David Ahern5e670d82018-04-17 17:33:14 -0700569 const struct in6_addr *nh_gw;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000570 struct neighbour *neigh;
David Ahern5e670d82018-04-17 17:33:14 -0700571 struct net_device *dev;
572
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800573 /*
574 * Okay, this does not seem to be appropriate
575 * for now, however, we need to check if it
576 * is really so; aka Router Reachability Probing.
577 *
578 * Router Reachability Probe MUST be rate-limited
579 * to no more than one per minute.
580 */
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000581 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
Amerigo Wangfdd66812012-09-10 02:48:44 +0000582 return;
David Ahern5e670d82018-04-17 17:33:14 -0700583
584 nh_gw = &rt->fib6_nh.nh_gw;
585 dev = rt->fib6_nh.nh_dev;
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000586 rcu_read_lock_bh();
David Ahern5e670d82018-04-17 17:33:14 -0700587 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000588 if (neigh) {
Martin KaFai Lau8d6c31b2015-07-24 09:57:43 -0700589 if (neigh->nud_state & NUD_VALID)
590 goto out;
591
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700592 work = NULL;
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000593 write_lock(&neigh->lock);
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700594 if (!(neigh->nud_state & NUD_VALID) &&
595 time_after(jiffies,
596 neigh->updated +
597 rt->rt6i_idev->cnf.rtr_probe_interval)) {
598 work = kmalloc(sizeof(*work), GFP_ATOMIC);
599 if (work)
600 __neigh_set_probe_once(neigh);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200601 }
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000602 write_unlock(&neigh->lock);
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700603 } else {
604 work = kmalloc(sizeof(*work), GFP_ATOMIC);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000605 }
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700606
607 if (work) {
608 INIT_WORK(&work->work, rt6_probe_deferred);
David Ahern5e670d82018-04-17 17:33:14 -0700609 work->target = *nh_gw;
610 dev_hold(dev);
611 work->dev = dev;
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700612 schedule_work(&work->work);
613 }
614
Martin KaFai Lau8d6c31b2015-07-24 09:57:43 -0700615out:
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000616 rcu_read_unlock_bh();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800617}
618#else
619static inline void rt6_probe(struct rt6_info *rt)
620{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800621}
622#endif
623
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800625 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700627static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700628{
David Ahern5e670d82018-04-17 17:33:14 -0700629 const struct net_device *dev = rt->fib6_nh.nh_dev;
630
David S. Miller161980f2007-04-06 11:42:27 -0700631 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800632 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700633 if ((dev->flags & IFF_LOOPBACK) &&
634 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
635 return 1;
636 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637}
638
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200639static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640{
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200641 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
David Ahern5e670d82018-04-17 17:33:14 -0700642 struct neighbour *neigh;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000643
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700644 if (rt->rt6i_flags & RTF_NONEXTHOP ||
645 !(rt->rt6i_flags & RTF_GATEWAY))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200646 return RT6_NUD_SUCCEED;
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000647
648 rcu_read_lock_bh();
David Ahern5e670d82018-04-17 17:33:14 -0700649 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
650 &rt->fib6_nh.nh_gw);
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000651 if (neigh) {
652 read_lock(&neigh->lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800653 if (neigh->nud_state & NUD_VALID)
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200654 ret = RT6_NUD_SUCCEED;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800655#ifdef CONFIG_IPV6_ROUTER_PREF
Paul Marksa5a81f02012-12-03 10:26:54 +0000656 else if (!(neigh->nud_state & NUD_FAILED))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200657 ret = RT6_NUD_SUCCEED;
Jiri Benc7e980562013-12-11 13:48:20 +0100658 else
659 ret = RT6_NUD_FAIL_PROBE;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800660#endif
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000661 read_unlock(&neigh->lock);
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200662 } else {
663 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
Jiri Benc7e980562013-12-11 13:48:20 +0100664 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
Paul Marksa5a81f02012-12-03 10:26:54 +0000665 }
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000666 rcu_read_unlock_bh();
667
Paul Marksa5a81f02012-12-03 10:26:54 +0000668 return ret;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800669}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800671static int rt6_score_route(struct rt6_info *rt, int oif,
672 int strict)
673{
Paul Marksa5a81f02012-12-03 10:26:54 +0000674 int m;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900675
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700676 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700677 if (!m && (strict & RT6_LOOKUP_F_IFACE))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200678 return RT6_NUD_FAIL_HARD;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800679#ifdef CONFIG_IPV6_ROUTER_PREF
680 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
681#endif
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200682 if (strict & RT6_LOOKUP_F_REACHABLE) {
683 int n = rt6_check_neigh(rt);
684 if (n < 0)
685 return n;
686 }
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800687 return m;
688}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
David S. Millerf11e6652007-03-24 20:36:25 -0700690static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200691 int *mpri, struct rt6_info *match,
692 bool *do_rr)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800693{
David S. Millerf11e6652007-03-24 20:36:25 -0700694 int m;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200695 bool match_do_rr = false;
Andy Gospodarek35103d12015-08-13 10:39:01 -0400696 struct inet6_dev *idev = rt->rt6i_idev;
Andy Gospodarek35103d12015-08-13 10:39:01 -0400697
David Ahern5e670d82018-04-17 17:33:14 -0700698 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel8067bb82018-01-07 12:45:09 +0200699 goto out;
700
Ido Schimmel14c52062018-01-07 12:45:07 +0200701 if (idev->cnf.ignore_routes_with_linkdown &&
David Ahern5e670d82018-04-17 17:33:14 -0700702 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
David Ahernd5d32e42016-10-24 12:27:23 -0700703 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
Andy Gospodarek35103d12015-08-13 10:39:01 -0400704 goto out;
David S. Millerf11e6652007-03-24 20:36:25 -0700705
David Ahern14895682018-04-17 17:33:17 -0700706 if (fib6_check_expired(rt))
David S. Millerf11e6652007-03-24 20:36:25 -0700707 goto out;
708
709 m = rt6_score_route(rt, oif, strict);
Jiri Benc7e980562013-12-11 13:48:20 +0100710 if (m == RT6_NUD_FAIL_DO_RR) {
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200711 match_do_rr = true;
712 m = 0; /* lowest valid score */
Jiri Benc7e980562013-12-11 13:48:20 +0100713 } else if (m == RT6_NUD_FAIL_HARD) {
David S. Millerf11e6652007-03-24 20:36:25 -0700714 goto out;
David S. Millerf11e6652007-03-24 20:36:25 -0700715 }
716
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200717 if (strict & RT6_LOOKUP_F_REACHABLE)
718 rt6_probe(rt);
719
Jiri Benc7e980562013-12-11 13:48:20 +0100720 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200721 if (m > *mpri) {
722 *do_rr = match_do_rr;
723 *mpri = m;
724 match = rt;
725 }
David S. Millerf11e6652007-03-24 20:36:25 -0700726out:
727 return match;
728}
729
730static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
Wei Wang8d1040e2017-10-06 12:06:08 -0700731 struct rt6_info *leaf,
David S. Millerf11e6652007-03-24 20:36:25 -0700732 struct rt6_info *rr_head,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200733 u32 metric, int oif, int strict,
734 bool *do_rr)
David S. Millerf11e6652007-03-24 20:36:25 -0700735{
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700736 struct rt6_info *rt, *match, *cont;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800737 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738
David S. Millerf11e6652007-03-24 20:36:25 -0700739 match = NULL;
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700740 cont = NULL;
David Miller071fb372017-11-28 15:40:15 -0500741 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700742 if (rt->rt6i_metric != metric) {
743 cont = rt;
744 break;
745 }
746
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200747 match = find_match(rt, oif, strict, &mpri, match, do_rr);
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700748 }
749
Wei Wang66f5d6c2017-10-06 12:06:10 -0700750 for (rt = leaf; rt && rt != rr_head;
David Miller071fb372017-11-28 15:40:15 -0500751 rt = rcu_dereference(rt->rt6_next)) {
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700752 if (rt->rt6i_metric != metric) {
753 cont = rt;
754 break;
755 }
756
757 match = find_match(rt, oif, strict, &mpri, match, do_rr);
758 }
759
760 if (match || !cont)
761 return match;
762
David Miller071fb372017-11-28 15:40:15 -0500763 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200764 match = find_match(rt, oif, strict, &mpri, match, do_rr);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800765
David S. Millerf11e6652007-03-24 20:36:25 -0700766 return match;
767}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800768
Wei Wang8d1040e2017-10-06 12:06:08 -0700769static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
770 int oif, int strict)
David S. Millerf11e6652007-03-24 20:36:25 -0700771{
Wei Wang66f5d6c2017-10-06 12:06:10 -0700772 struct rt6_info *leaf = rcu_dereference(fn->leaf);
David S. Millerf11e6652007-03-24 20:36:25 -0700773 struct rt6_info *match, *rt0;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200774 bool do_rr = false;
Wei Wang17ecf592017-10-06 12:06:09 -0700775 int key_plen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776
David Ahern421842e2018-04-17 17:33:18 -0700777 if (!leaf || leaf == net->ipv6.fib6_null_entry)
778 return net->ipv6.fib6_null_entry;
Wei Wang8d1040e2017-10-06 12:06:08 -0700779
Wei Wang66f5d6c2017-10-06 12:06:10 -0700780 rt0 = rcu_dereference(fn->rr_ptr);
David S. Millerf11e6652007-03-24 20:36:25 -0700781 if (!rt0)
Wei Wang66f5d6c2017-10-06 12:06:10 -0700782 rt0 = leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783
Wei Wang17ecf592017-10-06 12:06:09 -0700784 /* Double check to make sure fn is not an intermediate node
785 * and fn->leaf does not points to its child's leaf
786 * (This might happen if all routes under fn are deleted from
787 * the tree and fib6_repair_tree() is called on the node.)
788 */
789 key_plen = rt0->rt6i_dst.plen;
790#ifdef CONFIG_IPV6_SUBTREES
791 if (rt0->rt6i_src.plen)
792 key_plen = rt0->rt6i_src.plen;
793#endif
794 if (fn->fn_bit != key_plen)
David Ahern421842e2018-04-17 17:33:18 -0700795 return net->ipv6.fib6_null_entry;
Wei Wang17ecf592017-10-06 12:06:09 -0700796
Wei Wang8d1040e2017-10-06 12:06:08 -0700797 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200798 &do_rr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200800 if (do_rr) {
David Miller071fb372017-11-28 15:40:15 -0500801 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
David S. Millerf11e6652007-03-24 20:36:25 -0700802
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800803 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700804 if (!next || next->rt6i_metric != rt0->rt6i_metric)
Wei Wang8d1040e2017-10-06 12:06:08 -0700805 next = leaf;
David S. Millerf11e6652007-03-24 20:36:25 -0700806
Wei Wang66f5d6c2017-10-06 12:06:10 -0700807 if (next != rt0) {
808 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
809 /* make sure next is not being deleted from the tree */
810 if (next->rt6i_node)
811 rcu_assign_pointer(fn->rr_ptr, next);
812 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
813 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 }
815
David Ahern421842e2018-04-17 17:33:18 -0700816 return match ? match : net->ipv6.fib6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817}
818
Martin KaFai Lau8b9df262015-05-22 20:55:59 -0700819static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
820{
821 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
822}
823
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800824#ifdef CONFIG_IPV6_ROUTE_INFO
825int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000826 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800827{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900828 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800829 struct route_info *rinfo = (struct route_info *) opt;
830 struct in6_addr prefix_buf, *prefix;
831 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900832 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800833 struct rt6_info *rt;
834
835 if (len < sizeof(struct route_info)) {
836 return -EINVAL;
837 }
838
839 /* Sanity check for prefix_len and length */
840 if (rinfo->length > 3) {
841 return -EINVAL;
842 } else if (rinfo->prefix_len > 128) {
843 return -EINVAL;
844 } else if (rinfo->prefix_len > 64) {
845 if (rinfo->length < 2) {
846 return -EINVAL;
847 }
848 } else if (rinfo->prefix_len > 0) {
849 if (rinfo->length < 1) {
850 return -EINVAL;
851 }
852 }
853
854 pref = rinfo->route_pref;
855 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000856 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800857
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900858 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800859
860 if (rinfo->length == 3)
861 prefix = (struct in6_addr *)rinfo->prefix;
862 else {
863 /* this function is safe */
864 ipv6_addr_prefix(&prefix_buf,
865 (struct in6_addr *)rinfo->prefix,
866 rinfo->prefix_len);
867 prefix = &prefix_buf;
868 }
869
Duan Jiongf104a562013-11-08 09:56:53 +0800870 if (rinfo->prefix_len == 0)
David Ahernafb1d4b52018-04-17 17:33:11 -0700871 rt = rt6_get_dflt_router(net, gwaddr, dev);
Duan Jiongf104a562013-11-08 09:56:53 +0800872 else
873 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
David Ahern830218c2016-10-24 10:52:35 -0700874 gwaddr, dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800875
876 if (rt && !lifetime) {
David Ahernafb1d4b52018-04-17 17:33:11 -0700877 ip6_del_rt(net, rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800878 rt = NULL;
879 }
880
881 if (!rt && lifetime)
David Ahern830218c2016-10-24 10:52:35 -0700882 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
883 dev, pref);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800884 else if (rt)
885 rt->rt6i_flags = RTF_ROUTEINFO |
886 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
887
888 if (rt) {
Gao feng1716a962012-04-06 00:13:10 +0000889 if (!addrconf_finite_timeout(lifetime))
David Ahern14895682018-04-17 17:33:17 -0700890 fib6_clean_expires(rt);
Gao feng1716a962012-04-06 00:13:10 +0000891 else
David Ahern14895682018-04-17 17:33:17 -0700892 fib6_set_expires(rt, jiffies + HZ * lifetime);
Gao feng1716a962012-04-06 00:13:10 +0000893
Amerigo Wang94e187c2012-10-29 00:13:19 +0000894 ip6_rt_put(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800895 }
896 return 0;
897}
898#endif
899
David Ahernae90d862018-04-17 17:33:12 -0700900/*
901 * Misc support functions
902 */
903
904/* called with rcu_lock held */
905static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
906{
David Ahern5e670d82018-04-17 17:33:14 -0700907 struct net_device *dev = rt->fib6_nh.nh_dev;
David Ahernae90d862018-04-17 17:33:12 -0700908
909 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
910 /* for copies of local routes, dst->dev needs to be the
911 * device if it is a master device, the master device if
912 * device is enslaved, and the loopback as the default
913 */
914 if (netif_is_l3_slave(dev) &&
915 !rt6_need_strict(&rt->rt6i_dst.addr))
916 dev = l3mdev_master_dev_rcu(dev);
917 else if (!netif_is_l3_master(dev))
918 dev = dev_net(dev)->loopback_dev;
919 /* last case is netif_is_l3_master(dev) is true in which
920 * case we want dev returned to be dev
921 */
922 }
923
924 return dev;
925}
926
David Ahern6edb3c92018-04-17 17:33:15 -0700927static const int fib6_prop[RTN_MAX + 1] = {
928 [RTN_UNSPEC] = 0,
929 [RTN_UNICAST] = 0,
930 [RTN_LOCAL] = 0,
931 [RTN_BROADCAST] = 0,
932 [RTN_ANYCAST] = 0,
933 [RTN_MULTICAST] = 0,
934 [RTN_BLACKHOLE] = -EINVAL,
935 [RTN_UNREACHABLE] = -EHOSTUNREACH,
936 [RTN_PROHIBIT] = -EACCES,
937 [RTN_THROW] = -EAGAIN,
938 [RTN_NAT] = -EINVAL,
939 [RTN_XRESOLVE] = -EINVAL,
940};
941
942static int ip6_rt_type_to_error(u8 fib6_type)
943{
944 return fib6_prop[fib6_type];
945}
946
David Ahern3b6761d2018-04-17 17:33:20 -0700947static unsigned short fib6_info_dst_flags(struct rt6_info *rt)
948{
949 unsigned short flags = 0;
950
951 if (rt->dst_nocount)
952 flags |= DST_NOCOUNT;
953 if (rt->dst_nopolicy)
954 flags |= DST_NOPOLICY;
955 if (rt->dst_host)
956 flags |= DST_HOST;
957
958 return flags;
959}
960
David Ahern6edb3c92018-04-17 17:33:15 -0700961static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
962{
963 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
964
965 switch (ort->fib6_type) {
966 case RTN_BLACKHOLE:
967 rt->dst.output = dst_discard_out;
968 rt->dst.input = dst_discard;
969 break;
970 case RTN_PROHIBIT:
971 rt->dst.output = ip6_pkt_prohibit_out;
972 rt->dst.input = ip6_pkt_prohibit;
973 break;
974 case RTN_THROW:
975 case RTN_UNREACHABLE:
976 default:
977 rt->dst.output = ip6_pkt_discard_out;
978 rt->dst.input = ip6_pkt_discard;
979 break;
980 }
981}
982
983static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
984{
David Ahern3b6761d2018-04-17 17:33:20 -0700985 rt->dst.flags |= fib6_info_dst_flags(ort);
986
David Ahern6edb3c92018-04-17 17:33:15 -0700987 if (ort->rt6i_flags & RTF_REJECT) {
988 ip6_rt_init_dst_reject(rt, ort);
989 return;
990 }
991
992 rt->dst.error = 0;
993 rt->dst.output = ip6_output;
994
995 if (ort->fib6_type == RTN_LOCAL) {
David Ahern6edb3c92018-04-17 17:33:15 -0700996 rt->dst.input = ip6_input;
997 } else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
998 rt->dst.input = ip6_mc_input;
999 } else {
1000 rt->dst.input = ip6_forward;
1001 }
1002
1003 if (ort->fib6_nh.nh_lwtstate) {
1004 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
1005 lwtunnel_set_redirect(&rt->dst);
1006 }
1007
1008 rt->dst.lastuse = jiffies;
1009}
1010
David Ahernae90d862018-04-17 17:33:12 -07001011static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1012{
1013 BUG_ON(from->from);
1014
1015 rt->rt6i_flags &= ~RTF_EXPIRES;
1016 dst_hold(&from->dst);
1017 rt->from = from;
David Ahernd4ead6b2018-04-17 17:33:16 -07001018 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
1019 if (from->fib6_metrics != &dst_default_metrics) {
1020 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
1021 refcount_inc(&from->fib6_metrics->refcnt);
1022 }
David Ahernae90d862018-04-17 17:33:12 -07001023}
1024
1025static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
1026{
David Ahern6edb3c92018-04-17 17:33:15 -07001027 ip6_rt_init_dst(rt, ort);
1028
David Ahernae90d862018-04-17 17:33:12 -07001029 rt->rt6i_dst = ort->rt6i_dst;
David Ahernae90d862018-04-17 17:33:12 -07001030 rt->rt6i_idev = ort->rt6i_idev;
1031 if (rt->rt6i_idev)
1032 in6_dev_hold(rt->rt6i_idev);
David Ahern5e670d82018-04-17 17:33:14 -07001033 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
David Ahernae90d862018-04-17 17:33:12 -07001034 rt->rt6i_flags = ort->rt6i_flags;
1035 rt6_set_from(rt, ort);
1036 rt->rt6i_metric = ort->rt6i_metric;
1037#ifdef CONFIG_IPV6_SUBTREES
1038 rt->rt6i_src = ort->rt6i_src;
1039#endif
1040 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1041 rt->rt6i_table = ort->rt6i_table;
David Ahern5e670d82018-04-17 17:33:14 -07001042 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
David Ahernae90d862018-04-17 17:33:12 -07001043}
1044
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001045static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1046 struct in6_addr *saddr)
1047{
Wei Wang66f5d6c2017-10-06 12:06:10 -07001048 struct fib6_node *pn, *sn;
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001049 while (1) {
1050 if (fn->fn_flags & RTN_TL_ROOT)
1051 return NULL;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001052 pn = rcu_dereference(fn->parent);
1053 sn = FIB6_SUBTREE(pn);
1054 if (sn && sn != fn)
1055 fn = fib6_lookup(sn, NULL, saddr);
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001056 else
1057 fn = pn;
1058 if (fn->fn_flags & RTN_RTINFO)
1059 return fn;
1060 }
1061}
Thomas Grafc71099a2006-08-04 23:20:06 -07001062
Wei Wangd3843fe2017-10-06 12:06:06 -07001063static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1064 bool null_fallback)
1065{
1066 struct rt6_info *rt = *prt;
1067
1068 if (dst_hold_safe(&rt->dst))
1069 return true;
1070 if (null_fallback) {
1071 rt = net->ipv6.ip6_null_entry;
1072 dst_hold(&rt->dst);
1073 } else {
1074 rt = NULL;
1075 }
1076 *prt = rt;
1077 return false;
1078}
1079
David Aherndec9b0e2018-04-17 17:33:19 -07001080/* called with rcu_lock held */
1081static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
1082{
David Ahern3b6761d2018-04-17 17:33:20 -07001083 unsigned short flags = fib6_info_dst_flags(rt);
David Aherndec9b0e2018-04-17 17:33:19 -07001084 struct net_device *dev = rt->fib6_nh.nh_dev;
1085 struct rt6_info *nrt;
1086
David Ahern3b6761d2018-04-17 17:33:20 -07001087 nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
David Aherndec9b0e2018-04-17 17:33:19 -07001088 if (nrt)
1089 ip6_rt_copy_init(nrt, rt);
1090
1091 return nrt;
1092}
1093
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001094static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1095 struct fib6_table *table,
David Ahernb75cc8f2018-03-02 08:32:17 -08001096 struct flowi6 *fl6,
1097 const struct sk_buff *skb,
1098 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099{
Wei Wang2b760fc2017-10-06 12:06:03 -07001100 struct rt6_info *rt, *rt_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102
David Ahernb6cdbc82018-03-29 17:44:57 -07001103 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1104 flags &= ~RT6_LOOKUP_F_IFACE;
1105
Wei Wang66f5d6c2017-10-06 12:06:10 -07001106 rcu_read_lock();
David S. Miller4c9483b2011-03-12 16:22:43 -05001107 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -07001108restart:
Wei Wang66f5d6c2017-10-06 12:06:10 -07001109 rt = rcu_dereference(fn->leaf);
1110 if (!rt) {
David Ahern421842e2018-04-17 17:33:18 -07001111 rt = net->ipv6.fib6_null_entry;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001112 } else {
1113 rt = rt6_device_match(net, rt, &fl6->saddr,
1114 fl6->flowi6_oif, flags);
1115 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
David Ahernb4bac172018-03-02 08:32:18 -08001116 rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
David Ahernb75cc8f2018-03-02 08:32:17 -08001117 skb, flags);
Wei Wang66f5d6c2017-10-06 12:06:10 -07001118 }
David Ahern421842e2018-04-17 17:33:18 -07001119 if (rt == net->ipv6.fib6_null_entry) {
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001120 fn = fib6_backtrack(fn, &fl6->saddr);
1121 if (fn)
1122 goto restart;
1123 }
Wei Wang2b760fc2017-10-06 12:06:03 -07001124 /* Search through exception table */
1125 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
David Aherndec9b0e2018-04-17 17:33:19 -07001126 if (rt_cache) {
Wei Wang2b760fc2017-10-06 12:06:03 -07001127 rt = rt_cache;
David Aherndec9b0e2018-04-17 17:33:19 -07001128 if (ip6_hold_safe(net, &rt, true))
1129 dst_use_noref(&rt->dst, jiffies);
1130 } else if (dst_hold_safe(&rt->dst)) {
1131 struct rt6_info *nrt;
Wei Wang2b760fc2017-10-06 12:06:03 -07001132
David Aherndec9b0e2018-04-17 17:33:19 -07001133 nrt = ip6_create_rt_rcu(rt);
1134 dst_release(&rt->dst);
1135 rt = nrt;
1136 } else {
1137 rt = net->ipv6.ip6_null_entry;
1138 dst_hold(&rt->dst);
1139 }
Wei Wangd3843fe2017-10-06 12:06:06 -07001140
Wei Wang66f5d6c2017-10-06 12:06:10 -07001141 rcu_read_unlock();
David Ahernb8115802015-11-19 12:24:22 -08001142
Paolo Abenib65f1642017-10-19 09:31:43 +02001143 trace_fib6_table_lookup(net, rt, table, fl6);
David Ahernb8115802015-11-19 12:24:22 -08001144
Thomas Grafc71099a2006-08-04 23:20:06 -07001145 return rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001146}
1147
Ian Morris67ba4152014-08-24 21:53:10 +01001148struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
David Ahernb75cc8f2018-03-02 08:32:17 -08001149 const struct sk_buff *skb, int flags)
Florian Westphalea6e5742011-09-05 16:05:44 +02001150{
David Ahernb75cc8f2018-03-02 08:32:17 -08001151 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
Florian Westphalea6e5742011-09-05 16:05:44 +02001152}
1153EXPORT_SYMBOL_GPL(ip6_route_lookup);
1154
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001155struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
David Ahernb75cc8f2018-03-02 08:32:17 -08001156 const struct in6_addr *saddr, int oif,
1157 const struct sk_buff *skb, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -07001158{
David S. Miller4c9483b2011-03-12 16:22:43 -05001159 struct flowi6 fl6 = {
1160 .flowi6_oif = oif,
1161 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -07001162 };
1163 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001164 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07001165
Thomas Grafadaa70b2006-10-13 15:01:03 -07001166 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -05001167 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -07001168 flags |= RT6_LOOKUP_F_HAS_SADDR;
1169 }
1170
David Ahernb75cc8f2018-03-02 08:32:17 -08001171 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -07001172 if (dst->error == 0)
1173 return (struct rt6_info *) dst;
1174
1175 dst_release(dst);
1176
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return NULL;
1178}
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +09001179EXPORT_SYMBOL(rt6_lookup);
1180
Thomas Grafc71099a2006-08-04 23:20:06 -07001181/* ip6_ins_rt is called with FREE table->tb6_lock.
Wei Wang1cfb71e2017-06-17 10:42:33 -07001182 * It takes new route entry, the addition fails by any reason the
1183 * route is released.
1184 * Caller must hold dst before calling it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 */
1186
Michal Kubečeke5fd3872014-03-27 13:04:08 +01001187static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
David Ahern333c4302017-05-21 10:12:04 -06001188 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189{
1190 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001191 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
Thomas Grafc71099a2006-08-04 23:20:06 -07001193 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001194 spin_lock_bh(&table->tb6_lock);
David Ahernd4ead6b2018-04-17 17:33:16 -07001195 err = fib6_add(&table->tb6_root, rt, info, extack);
Wei Wang66f5d6c2017-10-06 12:06:10 -07001196 spin_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
1198 return err;
1199}
1200
David Ahernafb1d4b52018-04-17 17:33:11 -07001201int ip6_ins_rt(struct net *net, struct rt6_info *rt)
Thomas Graf40e22e82006-08-22 00:00:45 -07001202{
David Ahernafb1d4b52018-04-17 17:33:11 -07001203 struct nl_info info = { .nl_net = net, };
Florian Westphale715b6d2015-01-05 23:57:44 +01001204
Wei Wang1cfb71e2017-06-17 10:42:33 -07001205 /* Hold dst to account for the reference from the fib6 tree */
1206 dst_hold(&rt->dst);
David Ahernd4ead6b2018-04-17 17:33:16 -07001207 return __ip6_ins_rt(rt, &info, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -07001208}
1209
Martin KaFai Lau8b9df262015-05-22 20:55:59 -07001210static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1211 const struct in6_addr *daddr,
1212 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213{
David Ahern4832c302017-08-17 12:17:20 -07001214 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 struct rt6_info *rt;
1216
1217 /*
1218 * Clone the route.
1219 */
1220
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001221 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
David Miller3a2232e2017-11-28 15:40:40 -05001222 ort = ort->from;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223
David Ahern4832c302017-08-17 12:17:20 -07001224 rcu_read_lock();
1225 dev = ip6_rt_get_dev_rcu(ort);
1226 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1227 rcu_read_unlock();
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001228 if (!rt)
1229 return NULL;
1230
1231 ip6_rt_copy_init(rt, ort);
1232 rt->rt6i_flags |= RTF_CACHE;
1233 rt->rt6i_metric = 0;
1234 rt->dst.flags |= DST_HOST;
1235 rt->rt6i_dst.addr = *daddr;
1236 rt->rt6i_dst.plen = 128;
1237
1238 if (!rt6_is_gw_or_nonexthop(ort)) {
1239 if (ort->rt6i_dst.plen != 128 &&
1240 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1241 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242#ifdef CONFIG_IPV6_SUBTREES
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001243 if (rt->rt6i_src.plen && saddr) {
1244 rt->rt6i_src.addr = *saddr;
1245 rt->rt6i_src.plen = 128;
Martin KaFai Lau8b9df262015-05-22 20:55:59 -07001246 }
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001247#endif
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -08001248 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -08001250 return rt;
1251}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001253static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1254{
David Ahern3b6761d2018-04-17 17:33:20 -07001255 unsigned short flags = fib6_info_dst_flags(rt);
David Ahern4832c302017-08-17 12:17:20 -07001256 struct net_device *dev;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001257 struct rt6_info *pcpu_rt;
1258
David Ahern4832c302017-08-17 12:17:20 -07001259 rcu_read_lock();
1260 dev = ip6_rt_get_dev_rcu(rt);
David Ahern3b6761d2018-04-17 17:33:20 -07001261 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
David Ahern4832c302017-08-17 12:17:20 -07001262 rcu_read_unlock();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001263 if (!pcpu_rt)
1264 return NULL;
1265 ip6_rt_copy_init(pcpu_rt, rt);
1266 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1267 pcpu_rt->rt6i_flags |= RTF_PCPU;
1268 return pcpu_rt;
1269}
1270
Wei Wang66f5d6c2017-10-06 12:06:10 -07001271/* It should be called with rcu_read_lock() acquired */
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001272static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1273{
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001274 struct rt6_info *pcpu_rt, **p;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001275
1276 p = this_cpu_ptr(rt->rt6i_pcpu);
1277 pcpu_rt = *p;
1278
David Ahernd4ead6b2018-04-17 17:33:16 -07001279 if (pcpu_rt)
1280 ip6_hold_safe(NULL, &pcpu_rt, false);
Wei Wangd3843fe2017-10-06 12:06:06 -07001281
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001282 return pcpu_rt;
1283}
1284
David Ahernafb1d4b52018-04-17 17:33:11 -07001285static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1286 struct rt6_info *rt)
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001287{
1288 struct rt6_info *pcpu_rt, *prev, **p;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001289
1290 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1291 if (!pcpu_rt) {
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001292 dst_hold(&net->ipv6.ip6_null_entry->dst);
1293 return net->ipv6.ip6_null_entry;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001294 }
1295
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001296 dst_hold(&pcpu_rt->dst);
Wei Wanga94b9362017-10-06 12:06:04 -07001297 p = this_cpu_ptr(rt->rt6i_pcpu);
1298 prev = cmpxchg(p, NULL, pcpu_rt);
Eric Dumazet951f7882017-10-08 21:07:18 -07001299 BUG_ON(prev);
Wei Wanga94b9362017-10-06 12:06:04 -07001300
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001301 return pcpu_rt;
1302}
1303
Wei Wang35732d02017-10-06 12:05:57 -07001304/* exception hash table implementation
1305 */
1306static DEFINE_SPINLOCK(rt6_exception_lock);
1307
1308/* Remove rt6_ex from hash table and free the memory
1309 * Caller must hold rt6_exception_lock
1310 */
1311static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1312 struct rt6_exception *rt6_ex)
1313{
Colin Ian Kingb2427e62017-10-10 18:01:16 +01001314 struct net *net;
Wei Wang81eb8442017-10-06 12:06:11 -07001315
Wei Wang35732d02017-10-06 12:05:57 -07001316 if (!bucket || !rt6_ex)
1317 return;
Colin Ian Kingb2427e62017-10-10 18:01:16 +01001318
1319 net = dev_net(rt6_ex->rt6i->dst.dev);
Wei Wang35732d02017-10-06 12:05:57 -07001320 rt6_ex->rt6i->rt6i_node = NULL;
1321 hlist_del_rcu(&rt6_ex->hlist);
1322 rt6_release(rt6_ex->rt6i);
1323 kfree_rcu(rt6_ex, rcu);
1324 WARN_ON_ONCE(!bucket->depth);
1325 bucket->depth--;
Wei Wang81eb8442017-10-06 12:06:11 -07001326 net->ipv6.rt6_stats->fib_rt_cache--;
Wei Wang35732d02017-10-06 12:05:57 -07001327}
1328
1329/* Remove oldest rt6_ex in bucket and free the memory
1330 * Caller must hold rt6_exception_lock
1331 */
1332static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1333{
1334 struct rt6_exception *rt6_ex, *oldest = NULL;
1335
1336 if (!bucket)
1337 return;
1338
1339 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1340 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1341 oldest = rt6_ex;
1342 }
1343 rt6_remove_exception(bucket, oldest);
1344}
1345
1346static u32 rt6_exception_hash(const struct in6_addr *dst,
1347 const struct in6_addr *src)
1348{
1349 static u32 seed __read_mostly;
1350 u32 val;
1351
1352 net_get_random_once(&seed, sizeof(seed));
1353 val = jhash(dst, sizeof(*dst), seed);
1354
1355#ifdef CONFIG_IPV6_SUBTREES
1356 if (src)
1357 val = jhash(src, sizeof(*src), val);
1358#endif
1359 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1360}
1361
1362/* Helper function to find the cached rt in the hash table
1363 * and update bucket pointer to point to the bucket for this
1364 * (daddr, saddr) pair
1365 * Caller must hold rt6_exception_lock
1366 */
1367static struct rt6_exception *
1368__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1369 const struct in6_addr *daddr,
1370 const struct in6_addr *saddr)
1371{
1372 struct rt6_exception *rt6_ex;
1373 u32 hval;
1374
1375 if (!(*bucket) || !daddr)
1376 return NULL;
1377
1378 hval = rt6_exception_hash(daddr, saddr);
1379 *bucket += hval;
1380
1381 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1382 struct rt6_info *rt6 = rt6_ex->rt6i;
1383 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1384
1385#ifdef CONFIG_IPV6_SUBTREES
1386 if (matched && saddr)
1387 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1388#endif
1389 if (matched)
1390 return rt6_ex;
1391 }
1392 return NULL;
1393}
1394
1395/* Helper function to find the cached rt in the hash table
1396 * and update bucket pointer to point to the bucket for this
1397 * (daddr, saddr) pair
1398 * Caller must hold rcu_read_lock()
1399 */
1400static struct rt6_exception *
1401__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1402 const struct in6_addr *daddr,
1403 const struct in6_addr *saddr)
1404{
1405 struct rt6_exception *rt6_ex;
1406 u32 hval;
1407
1408 WARN_ON_ONCE(!rcu_read_lock_held());
1409
1410 if (!(*bucket) || !daddr)
1411 return NULL;
1412
1413 hval = rt6_exception_hash(daddr, saddr);
1414 *bucket += hval;
1415
1416 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1417 struct rt6_info *rt6 = rt6_ex->rt6i;
1418 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1419
1420#ifdef CONFIG_IPV6_SUBTREES
1421 if (matched && saddr)
1422 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1423#endif
1424 if (matched)
1425 return rt6_ex;
1426 }
1427 return NULL;
1428}
1429
David Ahernd4ead6b2018-04-17 17:33:16 -07001430static unsigned int fib6_mtu(const struct rt6_info *rt)
1431{
1432 unsigned int mtu;
1433
1434 mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6;
1435 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1436
1437 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1438}
1439
Wei Wang35732d02017-10-06 12:05:57 -07001440static int rt6_insert_exception(struct rt6_info *nrt,
1441 struct rt6_info *ort)
1442{
David Ahern5e670d82018-04-17 17:33:14 -07001443 struct net *net = dev_net(nrt->dst.dev);
Wei Wang35732d02017-10-06 12:05:57 -07001444 struct rt6_exception_bucket *bucket;
1445 struct in6_addr *src_key = NULL;
1446 struct rt6_exception *rt6_ex;
1447 int err = 0;
1448
1449 /* ort can't be a cache or pcpu route */
1450 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
David Miller3a2232e2017-11-28 15:40:40 -05001451 ort = ort->from;
Wei Wang35732d02017-10-06 12:05:57 -07001452 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1453
1454 spin_lock_bh(&rt6_exception_lock);
1455
1456 if (ort->exception_bucket_flushed) {
1457 err = -EINVAL;
1458 goto out;
1459 }
1460
1461 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1462 lockdep_is_held(&rt6_exception_lock));
1463 if (!bucket) {
1464 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1465 GFP_ATOMIC);
1466 if (!bucket) {
1467 err = -ENOMEM;
1468 goto out;
1469 }
1470 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1471 }
1472
1473#ifdef CONFIG_IPV6_SUBTREES
1474 /* rt6i_src.plen != 0 indicates ort is in subtree
1475 * and exception table is indexed by a hash of
1476 * both rt6i_dst and rt6i_src.
1477 * Otherwise, the exception table is indexed by
1478 * a hash of only rt6i_dst.
1479 */
1480 if (ort->rt6i_src.plen)
1481 src_key = &nrt->rt6i_src.addr;
1482#endif
Wei Wang60006a42017-10-06 12:05:58 -07001483
1484 /* Update rt6i_prefsrc as it could be changed
1485 * in rt6_remove_prefsrc()
1486 */
1487 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001488 /* rt6_mtu_change() might lower mtu on ort.
1489 * Only insert this exception route if its mtu
1490 * is less than ort's mtu value.
1491 */
David Ahernd4ead6b2018-04-17 17:33:16 -07001492 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001493 err = -EINVAL;
1494 goto out;
1495 }
Wei Wang60006a42017-10-06 12:05:58 -07001496
Wei Wang35732d02017-10-06 12:05:57 -07001497 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1498 src_key);
1499 if (rt6_ex)
1500 rt6_remove_exception(bucket, rt6_ex);
1501
1502 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1503 if (!rt6_ex) {
1504 err = -ENOMEM;
1505 goto out;
1506 }
1507 rt6_ex->rt6i = nrt;
1508 rt6_ex->stamp = jiffies;
1509 atomic_inc(&nrt->rt6i_ref);
1510 nrt->rt6i_node = ort->rt6i_node;
1511 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1512 bucket->depth++;
Wei Wang81eb8442017-10-06 12:06:11 -07001513 net->ipv6.rt6_stats->fib_rt_cache++;
Wei Wang35732d02017-10-06 12:05:57 -07001514
1515 if (bucket->depth > FIB6_MAX_DEPTH)
1516 rt6_exception_remove_oldest(bucket);
1517
1518out:
1519 spin_unlock_bh(&rt6_exception_lock);
1520
1521 /* Update fn->fn_sernum to invalidate all cached dst */
Paolo Abenib886d5f2017-10-19 16:07:10 +02001522 if (!err) {
Ido Schimmel922c2ac2018-01-07 12:45:14 +02001523 spin_lock_bh(&ort->rt6i_table->tb6_lock);
David Ahern7aef6852018-04-17 17:33:10 -07001524 fib6_update_sernum(net, ort);
Ido Schimmel922c2ac2018-01-07 12:45:14 +02001525 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
Paolo Abenib886d5f2017-10-19 16:07:10 +02001526 fib6_force_start_gc(net);
1527 }
Wei Wang35732d02017-10-06 12:05:57 -07001528
1529 return err;
1530}
1531
1532void rt6_flush_exceptions(struct rt6_info *rt)
1533{
1534 struct rt6_exception_bucket *bucket;
1535 struct rt6_exception *rt6_ex;
1536 struct hlist_node *tmp;
1537 int i;
1538
1539 spin_lock_bh(&rt6_exception_lock);
1540 /* Prevent rt6_insert_exception() to recreate the bucket list */
1541 rt->exception_bucket_flushed = 1;
1542
1543 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1544 lockdep_is_held(&rt6_exception_lock));
1545 if (!bucket)
1546 goto out;
1547
1548 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1549 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1550 rt6_remove_exception(bucket, rt6_ex);
1551 WARN_ON_ONCE(bucket->depth);
1552 bucket++;
1553 }
1554
1555out:
1556 spin_unlock_bh(&rt6_exception_lock);
1557}
1558
1559/* Find cached rt in the hash table inside passed in rt
1560 * Caller has to hold rcu_read_lock()
1561 */
1562static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1563 struct in6_addr *daddr,
1564 struct in6_addr *saddr)
1565{
1566 struct rt6_exception_bucket *bucket;
1567 struct in6_addr *src_key = NULL;
1568 struct rt6_exception *rt6_ex;
1569 struct rt6_info *res = NULL;
1570
1571 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1572
1573#ifdef CONFIG_IPV6_SUBTREES
1574 /* rt6i_src.plen != 0 indicates rt is in subtree
1575 * and exception table is indexed by a hash of
1576 * both rt6i_dst and rt6i_src.
1577 * Otherwise, the exception table is indexed by
1578 * a hash of only rt6i_dst.
1579 */
1580 if (rt->rt6i_src.plen)
1581 src_key = saddr;
1582#endif
1583 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1584
1585 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1586 res = rt6_ex->rt6i;
1587
1588 return res;
1589}
1590
1591/* Remove the passed in cached rt from the hash table that contains it */
1592int rt6_remove_exception_rt(struct rt6_info *rt)
1593{
Wei Wang35732d02017-10-06 12:05:57 -07001594 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -05001595 struct rt6_info *from = rt->from;
Wei Wang35732d02017-10-06 12:05:57 -07001596 struct in6_addr *src_key = NULL;
1597 struct rt6_exception *rt6_ex;
1598 int err;
1599
1600 if (!from ||
Colin Ian King442d7132017-10-10 19:10:30 +01001601 !(rt->rt6i_flags & RTF_CACHE))
Wei Wang35732d02017-10-06 12:05:57 -07001602 return -EINVAL;
1603
1604 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1605 return -ENOENT;
1606
1607 spin_lock_bh(&rt6_exception_lock);
1608 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1609 lockdep_is_held(&rt6_exception_lock));
1610#ifdef CONFIG_IPV6_SUBTREES
1611 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1612 * and exception table is indexed by a hash of
1613 * both rt6i_dst and rt6i_src.
1614 * Otherwise, the exception table is indexed by
1615 * a hash of only rt6i_dst.
1616 */
1617 if (from->rt6i_src.plen)
1618 src_key = &rt->rt6i_src.addr;
1619#endif
1620 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1621 &rt->rt6i_dst.addr,
1622 src_key);
1623 if (rt6_ex) {
1624 rt6_remove_exception(bucket, rt6_ex);
1625 err = 0;
1626 } else {
1627 err = -ENOENT;
1628 }
1629
1630 spin_unlock_bh(&rt6_exception_lock);
1631 return err;
1632}
1633
1634/* Find rt6_ex which contains the passed in rt cache and
1635 * refresh its stamp
1636 */
1637static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1638{
Wei Wang35732d02017-10-06 12:05:57 -07001639 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -05001640 struct rt6_info *from = rt->from;
Wei Wang35732d02017-10-06 12:05:57 -07001641 struct in6_addr *src_key = NULL;
1642 struct rt6_exception *rt6_ex;
1643
1644 if (!from ||
Colin Ian King442d7132017-10-10 19:10:30 +01001645 !(rt->rt6i_flags & RTF_CACHE))
Wei Wang35732d02017-10-06 12:05:57 -07001646 return;
1647
1648 rcu_read_lock();
1649 bucket = rcu_dereference(from->rt6i_exception_bucket);
1650
1651#ifdef CONFIG_IPV6_SUBTREES
1652 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1653 * and exception table is indexed by a hash of
1654 * both rt6i_dst and rt6i_src.
1655 * Otherwise, the exception table is indexed by
1656 * a hash of only rt6i_dst.
1657 */
1658 if (from->rt6i_src.plen)
1659 src_key = &rt->rt6i_src.addr;
1660#endif
1661 rt6_ex = __rt6_find_exception_rcu(&bucket,
1662 &rt->rt6i_dst.addr,
1663 src_key);
1664 if (rt6_ex)
1665 rt6_ex->stamp = jiffies;
1666
1667 rcu_read_unlock();
1668}
1669
Wei Wang60006a42017-10-06 12:05:58 -07001670static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1671{
1672 struct rt6_exception_bucket *bucket;
1673 struct rt6_exception *rt6_ex;
1674 int i;
1675
1676 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1677 lockdep_is_held(&rt6_exception_lock));
1678
1679 if (bucket) {
1680 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1681 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1682 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1683 }
1684 bucket++;
1685 }
1686 }
1687}
1688
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001689static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1690 struct rt6_info *rt, int mtu)
1691{
1692 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1693 * lowest MTU in the path: always allow updating the route PMTU to
1694 * reflect PMTU decreases.
1695 *
1696 * If the new MTU is higher, and the route PMTU is equal to the local
1697 * MTU, this means the old MTU is the lowest in the path, so allow
1698 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1699 * handle this.
1700 */
1701
1702 if (dst_mtu(&rt->dst) >= mtu)
1703 return true;
1704
1705 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1706 return true;
1707
1708 return false;
1709}
1710
1711static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1712 struct rt6_info *rt, int mtu)
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001713{
1714 struct rt6_exception_bucket *bucket;
1715 struct rt6_exception *rt6_ex;
1716 int i;
1717
1718 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1719 lockdep_is_held(&rt6_exception_lock));
1720
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001721 if (!bucket)
1722 return;
1723
1724 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1725 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1726 struct rt6_info *entry = rt6_ex->rt6i;
1727
1728 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
David Ahernd4ead6b2018-04-17 17:33:16 -07001729 * route), the metrics of its rt->from have already
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001730 * been updated.
1731 */
David Ahernd4ead6b2018-04-17 17:33:16 -07001732 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001733 rt6_mtu_change_route_allowed(idev, entry, mtu))
David Ahernd4ead6b2018-04-17 17:33:16 -07001734 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001735 }
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001736 bucket++;
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001737 }
1738}
1739
Wei Wangb16cb452017-10-06 12:06:00 -07001740#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1741
1742static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1743 struct in6_addr *gateway)
1744{
1745 struct rt6_exception_bucket *bucket;
1746 struct rt6_exception *rt6_ex;
1747 struct hlist_node *tmp;
1748 int i;
1749
1750 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1751 return;
1752
1753 spin_lock_bh(&rt6_exception_lock);
1754 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1755 lockdep_is_held(&rt6_exception_lock));
1756
1757 if (bucket) {
1758 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1759 hlist_for_each_entry_safe(rt6_ex, tmp,
1760 &bucket->chain, hlist) {
1761 struct rt6_info *entry = rt6_ex->rt6i;
1762
1763 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1764 RTF_CACHE_GATEWAY &&
1765 ipv6_addr_equal(gateway,
1766 &entry->rt6i_gateway)) {
1767 rt6_remove_exception(bucket, rt6_ex);
1768 }
1769 }
1770 bucket++;
1771 }
1772 }
1773
1774 spin_unlock_bh(&rt6_exception_lock);
1775}
1776
Wei Wangc757faa2017-10-06 12:06:01 -07001777static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1778 struct rt6_exception *rt6_ex,
1779 struct fib6_gc_args *gc_args,
1780 unsigned long now)
1781{
1782 struct rt6_info *rt = rt6_ex->rt6i;
1783
Paolo Abeni1859bac2017-10-19 16:07:11 +02001784 /* we are pruning and obsoleting aged-out and non gateway exceptions
1785 * even if others have still references to them, so that on next
1786 * dst_check() such references can be dropped.
1787 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1788 * expired, independently from their aging, as per RFC 8201 section 4
1789 */
Wei Wang31afeb42018-01-26 11:40:17 -08001790 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1791 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1792 RT6_TRACE("aging clone %p\n", rt);
1793 rt6_remove_exception(bucket, rt6_ex);
1794 return;
1795 }
1796 } else if (time_after(jiffies, rt->dst.expires)) {
1797 RT6_TRACE("purging expired route %p\n", rt);
Wei Wangc757faa2017-10-06 12:06:01 -07001798 rt6_remove_exception(bucket, rt6_ex);
1799 return;
Wei Wang31afeb42018-01-26 11:40:17 -08001800 }
1801
1802 if (rt->rt6i_flags & RTF_GATEWAY) {
Wei Wangc757faa2017-10-06 12:06:01 -07001803 struct neighbour *neigh;
1804 __u8 neigh_flags = 0;
1805
Eric Dumazet1bfa26f2018-03-23 07:56:58 -07001806 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1807 if (neigh)
Wei Wangc757faa2017-10-06 12:06:01 -07001808 neigh_flags = neigh->flags;
Eric Dumazet1bfa26f2018-03-23 07:56:58 -07001809
Wei Wangc757faa2017-10-06 12:06:01 -07001810 if (!(neigh_flags & NTF_ROUTER)) {
1811 RT6_TRACE("purging route %p via non-router but gateway\n",
1812 rt);
1813 rt6_remove_exception(bucket, rt6_ex);
1814 return;
1815 }
1816 }
Wei Wang31afeb42018-01-26 11:40:17 -08001817
Wei Wangc757faa2017-10-06 12:06:01 -07001818 gc_args->more++;
1819}
1820
1821void rt6_age_exceptions(struct rt6_info *rt,
1822 struct fib6_gc_args *gc_args,
1823 unsigned long now)
1824{
1825 struct rt6_exception_bucket *bucket;
1826 struct rt6_exception *rt6_ex;
1827 struct hlist_node *tmp;
1828 int i;
1829
1830 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1831 return;
1832
Eric Dumazet1bfa26f2018-03-23 07:56:58 -07001833 rcu_read_lock_bh();
1834 spin_lock(&rt6_exception_lock);
Wei Wangc757faa2017-10-06 12:06:01 -07001835 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1836 lockdep_is_held(&rt6_exception_lock));
1837
1838 if (bucket) {
1839 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1840 hlist_for_each_entry_safe(rt6_ex, tmp,
1841 &bucket->chain, hlist) {
1842 rt6_age_examine_exception(bucket, rt6_ex,
1843 gc_args, now);
1844 }
1845 bucket++;
1846 }
1847 }
Eric Dumazet1bfa26f2018-03-23 07:56:58 -07001848 spin_unlock(&rt6_exception_lock);
1849 rcu_read_unlock_bh();
Wei Wangc757faa2017-10-06 12:06:01 -07001850}
1851
David Ahern9ff74382016-06-13 13:44:19 -07001852struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
David Ahernb75cc8f2018-03-02 08:32:17 -08001853 int oif, struct flowi6 *fl6,
1854 const struct sk_buff *skb, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855{
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001856 struct fib6_node *fn, *saved_fn;
Wei Wang2b760fc2017-10-06 12:06:03 -07001857 struct rt6_info *rt, *rt_cache;
Thomas Grafc71099a2006-08-04 23:20:06 -07001858 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001860 strict |= flags & RT6_LOOKUP_F_IFACE;
David Ahernd5d32e42016-10-24 12:27:23 -07001861 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001862 if (net->ipv6.devconf_all->forwarding == 0)
1863 strict |= RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864
Wei Wang66f5d6c2017-10-06 12:06:10 -07001865 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866
David S. Miller4c9483b2011-03-12 16:22:43 -05001867 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001868 saved_fn = fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869
David Ahernca254492015-10-12 11:47:10 -07001870 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1871 oif = 0;
1872
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001873redo_rt6_select:
Wei Wang8d1040e2017-10-06 12:06:08 -07001874 rt = rt6_select(net, fn, oif, strict);
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +02001875 if (rt->rt6i_nsiblings)
David Ahernb4bac172018-03-02 08:32:18 -08001876 rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
David Ahern421842e2018-04-17 17:33:18 -07001877 if (rt == net->ipv6.fib6_null_entry) {
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001878 fn = fib6_backtrack(fn, &fl6->saddr);
1879 if (fn)
1880 goto redo_rt6_select;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001881 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1882 /* also consider unreachable route */
1883 strict &= ~RT6_LOOKUP_F_REACHABLE;
1884 fn = saved_fn;
1885 goto redo_rt6_select;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001886 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001887 }
1888
Wei Wang2b760fc2017-10-06 12:06:03 -07001889 /*Search through exception table */
1890 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1891 if (rt_cache)
1892 rt = rt_cache;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -08001893
David Ahern421842e2018-04-17 17:33:18 -07001894 if (rt == net->ipv6.fib6_null_entry) {
1895 rt = net->ipv6.ip6_null_entry;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001896 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07001897 dst_hold(&rt->dst);
Paolo Abenib65f1642017-10-19 09:31:43 +02001898 trace_fib6_table_lookup(net, rt, table, fl6);
Wei Wangd3843fe2017-10-06 12:06:06 -07001899 return rt;
1900 } else if (rt->rt6i_flags & RTF_CACHE) {
David Ahernd4ead6b2018-04-17 17:33:16 -07001901 if (ip6_hold_safe(net, &rt, true))
Wei Wangd3843fe2017-10-06 12:06:06 -07001902 dst_use_noref(&rt->dst, jiffies);
David Ahernd4ead6b2018-04-17 17:33:16 -07001903
Wei Wang66f5d6c2017-10-06 12:06:10 -07001904 rcu_read_unlock();
Paolo Abenib65f1642017-10-19 09:31:43 +02001905 trace_fib6_table_lookup(net, rt, table, fl6);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001906 return rt;
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001907 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1908 !(rt->rt6i_flags & RTF_GATEWAY))) {
1909 /* Create a RTF_CACHE clone which will not be
1910 * owned by the fib6 tree. It is for the special case where
1911 * the daddr in the skb during the neighbor look-up is different
1912 * from the fl6->daddr used to look-up route here.
1913 */
Thomas Grafc71099a2006-08-04 23:20:06 -07001914
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001915 struct rt6_info *uncached_rt;
1916
Wei Wangd3843fe2017-10-06 12:06:06 -07001917 if (ip6_hold_safe(net, &rt, true)) {
1918 dst_use_noref(&rt->dst, jiffies);
1919 } else {
Wei Wang66f5d6c2017-10-06 12:06:10 -07001920 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07001921 uncached_rt = rt;
1922 goto uncached_rt_out;
1923 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07001924 rcu_read_unlock();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001925
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001926 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1927 dst_release(&rt->dst);
1928
Wei Wang1cfb71e2017-06-17 10:42:33 -07001929 if (uncached_rt) {
1930 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1931 * No need for another dst_hold()
1932 */
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07001933 rt6_uncached_list_add(uncached_rt);
Wei Wang81eb8442017-10-06 12:06:11 -07001934 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
Wei Wang1cfb71e2017-06-17 10:42:33 -07001935 } else {
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001936 uncached_rt = net->ipv6.ip6_null_entry;
Wei Wang1cfb71e2017-06-17 10:42:33 -07001937 dst_hold(&uncached_rt->dst);
1938 }
David Ahernb8115802015-11-19 12:24:22 -08001939
Wei Wangd3843fe2017-10-06 12:06:06 -07001940uncached_rt_out:
Paolo Abenib65f1642017-10-19 09:31:43 +02001941 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001942 return uncached_rt;
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001943
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001944 } else {
1945 /* Get a percpu copy */
1946
1947 struct rt6_info *pcpu_rt;
1948
Wei Wangd3843fe2017-10-06 12:06:06 -07001949 dst_use_noref(&rt->dst, jiffies);
Eric Dumazet951f7882017-10-08 21:07:18 -07001950 local_bh_disable();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001951 pcpu_rt = rt6_get_pcpu_route(rt);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001952
Eric Dumazet951f7882017-10-08 21:07:18 -07001953 if (!pcpu_rt) {
Wei Wanga94b9362017-10-06 12:06:04 -07001954 /* atomic_inc_not_zero() is needed when using rcu */
1955 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
Eric Dumazet951f7882017-10-08 21:07:18 -07001956 /* No dst_hold() on rt is needed because grabbing
Wei Wanga94b9362017-10-06 12:06:04 -07001957 * rt->rt6i_ref makes sure rt can't be released.
1958 */
David Ahernafb1d4b52018-04-17 17:33:11 -07001959 pcpu_rt = rt6_make_pcpu_route(net, rt);
Wei Wanga94b9362017-10-06 12:06:04 -07001960 rt6_release(rt);
1961 } else {
1962 /* rt is already removed from tree */
Wei Wanga94b9362017-10-06 12:06:04 -07001963 pcpu_rt = net->ipv6.ip6_null_entry;
1964 dst_hold(&pcpu_rt->dst);
1965 }
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001966 }
Eric Dumazet951f7882017-10-08 21:07:18 -07001967 local_bh_enable();
1968 rcu_read_unlock();
Paolo Abenib65f1642017-10-19 09:31:43 +02001969 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001970 return pcpu_rt;
1971 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001972}
David Ahern9ff74382016-06-13 13:44:19 -07001973EXPORT_SYMBOL_GPL(ip6_pol_route);
Thomas Grafc71099a2006-08-04 23:20:06 -07001974
David Ahernb75cc8f2018-03-02 08:32:17 -08001975static struct rt6_info *ip6_pol_route_input(struct net *net,
1976 struct fib6_table *table,
1977 struct flowi6 *fl6,
1978 const struct sk_buff *skb,
1979 int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -07001980{
David Ahernb75cc8f2018-03-02 08:32:17 -08001981 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -07001982}
1983
Mahesh Bandeward409b842016-09-16 12:59:08 -07001984struct dst_entry *ip6_route_input_lookup(struct net *net,
1985 struct net_device *dev,
David Ahernb75cc8f2018-03-02 08:32:17 -08001986 struct flowi6 *fl6,
1987 const struct sk_buff *skb,
1988 int flags)
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001989{
1990 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1991 flags |= RT6_LOOKUP_F_IFACE;
1992
David Ahernb75cc8f2018-03-02 08:32:17 -08001993 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001994}
Mahesh Bandeward409b842016-09-16 12:59:08 -07001995EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001996
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001997static void ip6_multipath_l3_keys(const struct sk_buff *skb,
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05001998 struct flow_keys *keys,
1999 struct flow_keys *flkeys)
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002000{
2001 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
2002 const struct ipv6hdr *key_iph = outer_iph;
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002003 struct flow_keys *_flkeys = flkeys;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002004 const struct ipv6hdr *inner_iph;
2005 const struct icmp6hdr *icmph;
2006 struct ipv6hdr _inner_iph;
2007
2008 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
2009 goto out;
2010
2011 icmph = icmp6_hdr(skb);
2012 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
2013 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
2014 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
2015 icmph->icmp6_type != ICMPV6_PARAMPROB)
2016 goto out;
2017
2018 inner_iph = skb_header_pointer(skb,
2019 skb_transport_offset(skb) + sizeof(*icmph),
2020 sizeof(_inner_iph), &_inner_iph);
2021 if (!inner_iph)
2022 goto out;
2023
2024 key_iph = inner_iph;
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002025 _flkeys = NULL;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002026out:
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002027 if (_flkeys) {
2028 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
2029 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
2030 keys->tags.flow_label = _flkeys->tags.flow_label;
2031 keys->basic.ip_proto = _flkeys->basic.ip_proto;
2032 } else {
2033 keys->addrs.v6addrs.src = key_iph->saddr;
2034 keys->addrs.v6addrs.dst = key_iph->daddr;
2035 keys->tags.flow_label = ip6_flowinfo(key_iph);
2036 keys->basic.ip_proto = key_iph->nexthdr;
2037 }
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002038}
2039
2040/* if skb is set it will be used and fl6 can be NULL */
David Ahernb4bac172018-03-02 08:32:18 -08002041u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2042 const struct sk_buff *skb, struct flow_keys *flkeys)
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002043{
2044 struct flow_keys hash_keys;
David Ahern9a2a5372018-03-02 08:32:15 -08002045 u32 mhash;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002046
David S. Millerbbfa0472018-03-12 11:09:33 -04002047 switch (ip6_multipath_hash_policy(net)) {
David Ahernb4bac172018-03-02 08:32:18 -08002048 case 0:
2049 memset(&hash_keys, 0, sizeof(hash_keys));
2050 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2051 if (skb) {
2052 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2053 } else {
2054 hash_keys.addrs.v6addrs.src = fl6->saddr;
2055 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2056 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
2057 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2058 }
2059 break;
2060 case 1:
2061 if (skb) {
2062 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2063 struct flow_keys keys;
2064
2065 /* short-circuit if we already have L4 hash present */
2066 if (skb->l4_hash)
2067 return skb_get_hash_raw(skb) >> 1;
2068
2069 memset(&hash_keys, 0, sizeof(hash_keys));
2070
2071 if (!flkeys) {
2072 skb_flow_dissect_flow_keys(skb, &keys, flag);
2073 flkeys = &keys;
2074 }
2075 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2076 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2077 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2078 hash_keys.ports.src = flkeys->ports.src;
2079 hash_keys.ports.dst = flkeys->ports.dst;
2080 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2081 } else {
2082 memset(&hash_keys, 0, sizeof(hash_keys));
2083 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2084 hash_keys.addrs.v6addrs.src = fl6->saddr;
2085 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2086 hash_keys.ports.src = fl6->fl6_sport;
2087 hash_keys.ports.dst = fl6->fl6_dport;
2088 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2089 }
2090 break;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002091 }
David Ahern9a2a5372018-03-02 08:32:15 -08002092 mhash = flow_hash_from_keys(&hash_keys);
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002093
David Ahern9a2a5372018-03-02 08:32:15 -08002094 return mhash >> 1;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002095}
2096
Thomas Grafc71099a2006-08-04 23:20:06 -07002097void ip6_route_input(struct sk_buff *skb)
2098{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002099 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002100 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -07002101 int flags = RT6_LOOKUP_F_HAS_SADDR;
Jiri Benc904af042015-08-20 13:56:31 +02002102 struct ip_tunnel_info *tun_info;
David S. Miller4c9483b2011-03-12 16:22:43 -05002103 struct flowi6 fl6 = {
David Aherne0d56fd2016-09-10 12:09:57 -07002104 .flowi6_iif = skb->dev->ifindex,
David S. Miller4c9483b2011-03-12 16:22:43 -05002105 .daddr = iph->daddr,
2106 .saddr = iph->saddr,
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002107 .flowlabel = ip6_flowinfo(iph),
David S. Miller4c9483b2011-03-12 16:22:43 -05002108 .flowi6_mark = skb->mark,
2109 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -07002110 };
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002111 struct flow_keys *flkeys = NULL, _flkeys;
Thomas Grafadaa70b2006-10-13 15:01:03 -07002112
Jiri Benc904af042015-08-20 13:56:31 +02002113 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02002114 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Jiri Benc904af042015-08-20 13:56:31 +02002115 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
Roopa Prabhu5e5d6fe2018-02-28 22:43:22 -05002116
2117 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2118 flkeys = &_flkeys;
2119
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02002120 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
David Ahernb4bac172018-03-02 08:32:18 -08002121 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
Jiri Benc06e9d042015-08-20 13:56:26 +02002122 skb_dst_drop(skb);
David Ahernb75cc8f2018-03-02 08:32:17 -08002123 skb_dst_set(skb,
2124 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
Thomas Grafc71099a2006-08-04 23:20:06 -07002125}
2126
David Ahernb75cc8f2018-03-02 08:32:17 -08002127static struct rt6_info *ip6_pol_route_output(struct net *net,
2128 struct fib6_table *table,
2129 struct flowi6 *fl6,
2130 const struct sk_buff *skb,
2131 int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -07002132{
David Ahernb75cc8f2018-03-02 08:32:17 -08002133 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -07002134}
2135
Paolo Abeni6f21c962016-01-29 12:30:19 +01002136struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2137 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -07002138{
David Ahernd46a9d62015-10-21 08:42:22 -07002139 bool any_src;
Thomas Grafc71099a2006-08-04 23:20:06 -07002140
David Ahern4c1feac2016-09-10 12:09:56 -07002141 if (rt6_need_strict(&fl6->daddr)) {
2142 struct dst_entry *dst;
2143
2144 dst = l3mdev_link_scope_lookup(net, fl6);
2145 if (dst)
2146 return dst;
2147 }
David Ahernca254492015-10-12 11:47:10 -07002148
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002149 fl6->flowi6_iif = LOOPBACK_IFINDEX;
David McCullough4dc27d1c2012-06-25 15:42:26 +00002150
David Ahernd46a9d62015-10-21 08:42:22 -07002151 any_src = ipv6_addr_any(&fl6->saddr);
David Ahern741a11d2015-09-28 10:12:13 -07002152 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
David Ahernd46a9d62015-10-21 08:42:22 -07002153 (fl6->flowi6_oif && any_src))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07002154 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -07002155
David Ahernd46a9d62015-10-21 08:42:22 -07002156 if (!any_src)
Thomas Grafadaa70b2006-10-13 15:01:03 -07002157 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +00002158 else if (sk)
2159 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -07002160
David Ahernb75cc8f2018-03-02 08:32:17 -08002161 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162}
Paolo Abeni6f21c962016-01-29 12:30:19 +01002163EXPORT_SYMBOL_GPL(ip6_route_output_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164
David S. Miller2774c132011-03-01 14:59:04 -08002165struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002166{
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002167 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
Wei Wang1dbe32522017-06-17 10:42:26 -07002168 struct net_device *loopback_dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002169 struct dst_entry *new = NULL;
2170
Wei Wang1dbe32522017-06-17 10:42:26 -07002171 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
Steffen Klassert62cf27e2017-10-09 08:39:43 +02002172 DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002173 if (rt) {
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07002174 rt6_info_init(rt);
Wei Wang81eb8442017-10-06 12:06:11 -07002175 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07002176
Changli Gaod8d1f302010-06-10 23:31:35 -07002177 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002178 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002179 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002180 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002181
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07002182 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -07002183
Wei Wang1dbe32522017-06-17 10:42:26 -07002184 rt->rt6i_idev = in6_dev_get(loopback_dev);
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002185 rt->rt6i_gateway = ort->rt6i_gateway;
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07002186 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
David S. Miller14e50e52007-05-24 18:17:54 -07002187 rt->rt6i_metric = 0;
2188
2189 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2190#ifdef CONFIG_IPV6_SUBTREES
2191 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2192#endif
David S. Miller14e50e52007-05-24 18:17:54 -07002193 }
2194
David S. Miller69ead7a2011-03-01 14:45:33 -08002195 dst_release(dst_orig);
2196 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002197}
David S. Miller14e50e52007-05-24 18:17:54 -07002198
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199/*
2200 * Destination cache support functions
2201 */
2202
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002203static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2204{
Steffen Klassert36143642017-08-25 09:05:42 +02002205 u32 rt_cookie = 0;
Wei Wangc5cff852017-08-21 09:47:10 -07002206
2207 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002208 return NULL;
2209
2210 if (rt6_check_expired(rt))
2211 return NULL;
2212
2213 return &rt->dst;
2214}
2215
2216static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2217{
Martin KaFai Lau5973fb12015-11-11 11:51:07 -08002218 if (!__rt6_check_expired(rt) &&
2219 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
David Miller3a2232e2017-11-28 15:40:40 -05002220 rt6_check(rt->from, cookie))
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002221 return &rt->dst;
2222 else
2223 return NULL;
2224}
2225
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2227{
2228 struct rt6_info *rt;
2229
2230 rt = (struct rt6_info *) dst;
2231
Nicolas Dichtel6f3118b2012-09-10 22:09:46 +00002232 /* All IPV6 dsts are created with ->obsolete set to the value
2233 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2234 * into this function always.
2235 */
Hannes Frederic Sowae3bc10b2013-10-24 07:48:24 +02002236
Martin KaFai Lau02bcf4e2015-11-11 11:51:08 -08002237 if (rt->rt6i_flags & RTF_PCPU ||
David Miller3a2232e2017-11-28 15:40:40 -05002238 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002239 return rt6_dst_from_check(rt, cookie);
2240 else
2241 return rt6_check(rt, cookie);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242}
2243
2244static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2245{
2246 struct rt6_info *rt = (struct rt6_info *) dst;
2247
2248 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002249 if (rt->rt6i_flags & RTF_CACHE) {
2250 if (rt6_check_expired(rt)) {
David Ahernafb1d4b52018-04-17 17:33:11 -07002251 ip6_del_rt(dev_net(dst->dev), rt);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002252 dst = NULL;
2253 }
2254 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002255 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002256 dst = NULL;
2257 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002259 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260}
2261
2262static void ip6_link_failure(struct sk_buff *skb)
2263{
2264 struct rt6_info *rt;
2265
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002266 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267
Eric Dumazetadf30902009-06-02 05:19:30 +00002268 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 if (rt) {
Hannes Frederic Sowa1eb4f752013-07-10 23:00:57 +02002270 if (rt->rt6i_flags & RTF_CACHE) {
Wei Wangad65a2f2017-06-17 10:42:35 -07002271 if (dst_hold_safe(&rt->dst))
David Ahernafb1d4b52018-04-17 17:33:11 -07002272 ip6_del_rt(dev_net(rt->dst.dev), rt);
Wei Wangc5cff852017-08-21 09:47:10 -07002273 } else {
2274 struct fib6_node *fn;
2275
2276 rcu_read_lock();
2277 fn = rcu_dereference(rt->rt6i_node);
2278 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2279 fn->fn_sernum = -1;
2280 rcu_read_unlock();
Hannes Frederic Sowa1eb4f752013-07-10 23:00:57 +02002281 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 }
2283}
2284
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002285static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2286{
2287 struct net *net = dev_net(rt->dst.dev);
2288
David Ahernd4ead6b2018-04-17 17:33:16 -07002289 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002290 rt->rt6i_flags |= RTF_MODIFIED;
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002291 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2292}
2293
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002294static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2295{
2296 return !(rt->rt6i_flags & RTF_CACHE) &&
Wei Wang4e587ea2017-08-25 15:03:10 -07002297 (rt->rt6i_flags & RTF_PCPU ||
2298 rcu_access_pointer(rt->rt6i_node));
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002299}
2300
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002301static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2302 const struct ipv6hdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002303{
Julian Anastasov0dec8792017-02-06 23:14:16 +02002304 const struct in6_addr *daddr, *saddr;
Ian Morris67ba4152014-08-24 21:53:10 +01002305 struct rt6_info *rt6 = (struct rt6_info *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002307 if (rt6->rt6i_flags & RTF_LOCAL)
2308 return;
2309
Xin Long19bda362016-10-28 18:18:01 +08002310 if (dst_metric_locked(dst, RTAX_MTU))
2311 return;
2312
Julian Anastasov0dec8792017-02-06 23:14:16 +02002313 if (iph) {
2314 daddr = &iph->daddr;
2315 saddr = &iph->saddr;
2316 } else if (sk) {
2317 daddr = &sk->sk_v6_daddr;
2318 saddr = &inet6_sk(sk)->saddr;
2319 } else {
2320 daddr = NULL;
2321 saddr = NULL;
2322 }
2323 dst_confirm_neigh(dst, daddr);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002324 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2325 if (mtu >= dst_mtu(dst))
2326 return;
David S. Miller81aded22012-06-15 14:54:11 -07002327
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002328 if (!rt6_cache_allowed_for_pmtu(rt6)) {
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002329 rt6_do_update_pmtu(rt6, mtu);
Wei Wang2b760fc2017-10-06 12:06:03 -07002330 /* update rt6_ex->stamp for cache */
2331 if (rt6->rt6i_flags & RTF_CACHE)
2332 rt6_update_exception_stamp_rt(rt6);
Julian Anastasov0dec8792017-02-06 23:14:16 +02002333 } else if (daddr) {
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002334 struct rt6_info *nrt6;
Hagen Paul Pfeifer9d289712015-01-15 22:34:25 +01002335
David Ahernd4ead6b2018-04-17 17:33:16 -07002336 nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002337 if (nrt6) {
2338 rt6_do_update_pmtu(nrt6, mtu);
David Ahernd4ead6b2018-04-17 17:33:16 -07002339 if (rt6_insert_exception(nrt6, rt6->from))
Wei Wang2b760fc2017-10-06 12:06:03 -07002340 dst_release_immediate(&nrt6->dst);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002341 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002342 }
2343}
2344
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002345static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2346 struct sk_buff *skb, u32 mtu)
2347{
2348 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2349}
2350
David S. Miller42ae66c2012-06-15 20:01:57 -07002351void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002352 int oif, u32 mark, kuid_t uid)
David S. Miller81aded22012-06-15 14:54:11 -07002353{
2354 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2355 struct dst_entry *dst;
2356 struct flowi6 fl6;
2357
2358 memset(&fl6, 0, sizeof(fl6));
2359 fl6.flowi6_oif = oif;
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07002360 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
David S. Miller81aded22012-06-15 14:54:11 -07002361 fl6.daddr = iph->daddr;
2362 fl6.saddr = iph->saddr;
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002363 fl6.flowlabel = ip6_flowinfo(iph);
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002364 fl6.flowi6_uid = uid;
David S. Miller81aded22012-06-15 14:54:11 -07002365
2366 dst = ip6_route_output(net, NULL, &fl6);
2367 if (!dst->error)
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002368 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
David S. Miller81aded22012-06-15 14:54:11 -07002369 dst_release(dst);
2370}
2371EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2372
2373void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2374{
Martin KaFai Lau33c162a2016-04-11 15:29:36 -07002375 struct dst_entry *dst;
2376
David S. Miller81aded22012-06-15 14:54:11 -07002377 ip6_update_pmtu(skb, sock_net(sk), mtu,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002378 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
Martin KaFai Lau33c162a2016-04-11 15:29:36 -07002379
2380 dst = __sk_dst_get(sk);
2381 if (!dst || !dst->obsolete ||
2382 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2383 return;
2384
2385 bh_lock_sock(sk);
2386 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2387 ip6_datagram_dst_update(sk, false);
2388 bh_unlock_sock(sk);
David S. Miller81aded22012-06-15 14:54:11 -07002389}
2390EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2391
Alexey Kodanev7d6850f2018-04-03 15:00:07 +03002392void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2393 const struct flowi6 *fl6)
2394{
2395#ifdef CONFIG_IPV6_SUBTREES
2396 struct ipv6_pinfo *np = inet6_sk(sk);
2397#endif
2398
2399 ip6_dst_store(sk, dst,
2400 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2401 &sk->sk_v6_daddr : NULL,
2402#ifdef CONFIG_IPV6_SUBTREES
2403 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2404 &np->saddr :
2405#endif
2406 NULL);
2407}
2408
Duan Jiongb55b76b2013-09-04 19:44:21 +08002409/* Handle redirects */
2410struct ip6rd_flowi {
2411 struct flowi6 fl6;
2412 struct in6_addr gateway;
2413};
2414
2415static struct rt6_info *__ip6_route_redirect(struct net *net,
2416 struct fib6_table *table,
2417 struct flowi6 *fl6,
David Ahernb75cc8f2018-03-02 08:32:17 -08002418 const struct sk_buff *skb,
Duan Jiongb55b76b2013-09-04 19:44:21 +08002419 int flags)
2420{
2421 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
Wei Wang2b760fc2017-10-06 12:06:03 -07002422 struct rt6_info *rt, *rt_cache;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002423 struct fib6_node *fn;
2424
2425 /* Get the "current" route for this destination and
Alexander Alemayhu67c408c2017-01-07 23:53:00 +01002426 * check if the redirect has come from appropriate router.
Duan Jiongb55b76b2013-09-04 19:44:21 +08002427 *
2428 * RFC 4861 specifies that redirects should only be
2429 * accepted if they come from the nexthop to the target.
2430 * Due to the way the routes are chosen, this notion
2431 * is a bit fuzzy and one might need to check all possible
2432 * routes.
2433 */
2434
Wei Wang66f5d6c2017-10-06 12:06:10 -07002435 rcu_read_lock();
Duan Jiongb55b76b2013-09-04 19:44:21 +08002436 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2437restart:
Wei Wang66f5d6c2017-10-06 12:06:10 -07002438 for_each_fib6_node_rt_rcu(fn) {
David Ahern5e670d82018-04-17 17:33:14 -07002439 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel8067bb82018-01-07 12:45:09 +02002440 continue;
David Ahern14895682018-04-17 17:33:17 -07002441 if (fib6_check_expired(rt))
Duan Jiongb55b76b2013-09-04 19:44:21 +08002442 continue;
David Ahern6edb3c92018-04-17 17:33:15 -07002443 if (rt->rt6i_flags & RTF_REJECT)
Duan Jiongb55b76b2013-09-04 19:44:21 +08002444 break;
2445 if (!(rt->rt6i_flags & RTF_GATEWAY))
2446 continue;
David Ahern5e670d82018-04-17 17:33:14 -07002447 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
Duan Jiongb55b76b2013-09-04 19:44:21 +08002448 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07002449 /* rt_cache's gateway might be different from its 'parent'
2450 * in the case of an ip redirect.
2451 * So we keep searching in the exception table if the gateway
2452 * is different.
2453 */
David Ahern5e670d82018-04-17 17:33:14 -07002454 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
Wei Wang2b760fc2017-10-06 12:06:03 -07002455 rt_cache = rt6_find_cached_rt(rt,
2456 &fl6->daddr,
2457 &fl6->saddr);
2458 if (rt_cache &&
2459 ipv6_addr_equal(&rdfl->gateway,
2460 &rt_cache->rt6i_gateway)) {
2461 rt = rt_cache;
2462 break;
2463 }
Duan Jiongb55b76b2013-09-04 19:44:21 +08002464 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07002465 }
Duan Jiongb55b76b2013-09-04 19:44:21 +08002466 break;
2467 }
2468
2469 if (!rt)
David Ahern421842e2018-04-17 17:33:18 -07002470 rt = net->ipv6.fib6_null_entry;
David Ahern6edb3c92018-04-17 17:33:15 -07002471 else if (rt->rt6i_flags & RTF_REJECT) {
Duan Jiongb55b76b2013-09-04 19:44:21 +08002472 rt = net->ipv6.ip6_null_entry;
Martin KaFai Laub0a1ba52015-01-20 19:16:02 -08002473 goto out;
2474 }
2475
David Ahern421842e2018-04-17 17:33:18 -07002476 if (rt == net->ipv6.fib6_null_entry) {
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07002477 fn = fib6_backtrack(fn, &fl6->saddr);
2478 if (fn)
2479 goto restart;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002480 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07002481
Martin KaFai Laub0a1ba52015-01-20 19:16:02 -08002482out:
Wei Wangd3843fe2017-10-06 12:06:06 -07002483 ip6_hold_safe(net, &rt, true);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002484
Wei Wang66f5d6c2017-10-06 12:06:10 -07002485 rcu_read_unlock();
Duan Jiongb55b76b2013-09-04 19:44:21 +08002486
Paolo Abenib65f1642017-10-19 09:31:43 +02002487 trace_fib6_table_lookup(net, rt, table, fl6);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002488 return rt;
2489};
2490
2491static struct dst_entry *ip6_route_redirect(struct net *net,
David Ahernb75cc8f2018-03-02 08:32:17 -08002492 const struct flowi6 *fl6,
2493 const struct sk_buff *skb,
2494 const struct in6_addr *gateway)
Duan Jiongb55b76b2013-09-04 19:44:21 +08002495{
2496 int flags = RT6_LOOKUP_F_HAS_SADDR;
2497 struct ip6rd_flowi rdfl;
2498
2499 rdfl.fl6 = *fl6;
2500 rdfl.gateway = *gateway;
2501
David Ahernb75cc8f2018-03-02 08:32:17 -08002502 return fib6_rule_lookup(net, &rdfl.fl6, skb,
Duan Jiongb55b76b2013-09-04 19:44:21 +08002503 flags, __ip6_route_redirect);
2504}
2505
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002506void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2507 kuid_t uid)
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002508{
2509 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2510 struct dst_entry *dst;
2511 struct flowi6 fl6;
2512
2513 memset(&fl6, 0, sizeof(fl6));
Julian Anastasove374c612014-04-28 10:51:56 +03002514 fl6.flowi6_iif = LOOPBACK_IFINDEX;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002515 fl6.flowi6_oif = oif;
2516 fl6.flowi6_mark = mark;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002517 fl6.daddr = iph->daddr;
2518 fl6.saddr = iph->saddr;
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002519 fl6.flowlabel = ip6_flowinfo(iph);
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002520 fl6.flowi6_uid = uid;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002521
David Ahernb75cc8f2018-03-02 08:32:17 -08002522 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002523 rt6_do_redirect(dst, NULL, skb);
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002524 dst_release(dst);
2525}
2526EXPORT_SYMBOL_GPL(ip6_redirect);
2527
Duan Jiongc92a59e2013-08-22 12:07:35 +08002528void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2529 u32 mark)
2530{
2531 const struct ipv6hdr *iph = ipv6_hdr(skb);
2532 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2533 struct dst_entry *dst;
2534 struct flowi6 fl6;
2535
2536 memset(&fl6, 0, sizeof(fl6));
Julian Anastasove374c612014-04-28 10:51:56 +03002537 fl6.flowi6_iif = LOOPBACK_IFINDEX;
Duan Jiongc92a59e2013-08-22 12:07:35 +08002538 fl6.flowi6_oif = oif;
2539 fl6.flowi6_mark = mark;
Duan Jiongc92a59e2013-08-22 12:07:35 +08002540 fl6.daddr = msg->dest;
2541 fl6.saddr = iph->daddr;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002542 fl6.flowi6_uid = sock_net_uid(net, NULL);
Duan Jiongc92a59e2013-08-22 12:07:35 +08002543
David Ahernb75cc8f2018-03-02 08:32:17 -08002544 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002545 rt6_do_redirect(dst, NULL, skb);
Duan Jiongc92a59e2013-08-22 12:07:35 +08002546 dst_release(dst);
2547}
2548
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002549void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2550{
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002551 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2552 sk->sk_uid);
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002553}
2554EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2555
David S. Miller0dbaee32010-12-13 12:52:14 -08002556static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002557{
David S. Miller0dbaee32010-12-13 12:52:14 -08002558 struct net_device *dev = dst->dev;
2559 unsigned int mtu = dst_mtu(dst);
2560 struct net *net = dev_net(dev);
2561
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2563
Daniel Lezcano55786892008-03-04 13:47:47 -08002564 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2565 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566
2567 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002568 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2569 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2570 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002571 * rely only on pmtu discovery"
2572 */
2573 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2574 mtu = IPV6_MAXPLEN;
2575 return mtu;
2576}
2577
Steffen Klassertebb762f2011-11-23 02:12:51 +00002578static unsigned int ip6_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08002579{
David S. Millerd33e4552010-12-14 13:01:14 -08002580 struct inet6_dev *idev;
David Ahernd4ead6b2018-04-17 17:33:16 -07002581 unsigned int mtu;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002582
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07002583 mtu = dst_metric_raw(dst, RTAX_MTU);
2584 if (mtu)
2585 goto out;
2586
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002587 mtu = IPV6_MIN_MTU;
David S. Millerd33e4552010-12-14 13:01:14 -08002588
2589 rcu_read_lock();
2590 idev = __in6_dev_get(dst->dev);
2591 if (idev)
2592 mtu = idev->cnf.mtu6;
2593 rcu_read_unlock();
2594
Eric Dumazet30f78d82014-04-10 21:23:36 -07002595out:
Roopa Prabhu14972cb2016-08-24 20:10:43 -07002596 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2597
2598 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08002599}
2600
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08002601struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
David S. Miller87a11572011-12-06 17:04:13 -05002602 struct flowi6 *fl6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603{
David S. Miller87a11572011-12-06 17:04:13 -05002604 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605 struct rt6_info *rt;
2606 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002607 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608
David S. Miller38308472011-12-03 18:02:47 -05002609 if (unlikely(!idev))
Eric Dumazet122bdf62012-03-14 21:13:11 +00002610 return ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611
Martin KaFai Lauad706862015-08-14 11:05:52 -07002612 rt = ip6_dst_alloc(net, dev, 0);
David S. Miller38308472011-12-03 18:02:47 -05002613 if (unlikely(!rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 in6_dev_put(idev);
David S. Miller87a11572011-12-06 17:04:13 -05002615 dst = ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002616 goto out;
2617 }
2618
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002619 rt->dst.flags |= DST_HOST;
Brendan McGrath588753f2017-12-13 22:14:57 +11002620 rt->dst.input = ip6_input;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002621 rt->dst.output = ip6_output;
Julian Anastasov550bab42013-10-20 15:43:04 +03002622 rt->rt6i_gateway = fl6->daddr;
David S. Miller87a11572011-12-06 17:04:13 -05002623 rt->rt6i_dst.addr = fl6->daddr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002624 rt->rt6i_dst.plen = 128;
2625 rt->rt6i_idev = idev;
Li RongQing14edd872012-10-24 14:01:18 +08002626 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627
Ido Schimmel4c981e22018-01-07 12:45:04 +02002628 /* Add this dst into uncached_list so that rt6_disable_ip() can
Wei Wang587fea72017-06-17 10:42:36 -07002629 * do proper release of the net_device
2630 */
2631 rt6_uncached_list_add(rt);
Wei Wang81eb8442017-10-06 12:06:11 -07002632 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633
David S. Miller87a11572011-12-06 17:04:13 -05002634 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2635
Linus Torvalds1da177e2005-04-16 15:20:36 -07002636out:
David S. Miller87a11572011-12-06 17:04:13 -05002637 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002638}
2639
Daniel Lezcano569d3642008-01-18 03:56:57 -08002640static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641{
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002642 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08002643 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2644 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2645 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2646 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2647 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00002648 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649
Eric Dumazetfc66f952010-10-08 06:37:34 +00002650 entries = dst_entries_get_fast(ops);
Michal Kubeček49a18d82013-08-01 10:04:24 +02002651 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00002652 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653 goto out;
2654
Benjamin Thery6891a342008-03-04 13:49:47 -08002655 net->ipv6.ip6_rt_gc_expire++;
Li RongQing14956642014-05-19 17:30:28 +08002656 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002657 entries = dst_entries_get_slow(ops);
2658 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08002659 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002660out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08002661 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00002662 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002663}
2664
David Ahernd4ead6b2018-04-17 17:33:16 -07002665static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
2666 struct fib6_config *cfg)
Florian Westphale715b6d2015-01-05 23:57:44 +01002667{
David Ahernd4ead6b2018-04-17 17:33:16 -07002668 int err = 0;
Florian Westphale715b6d2015-01-05 23:57:44 +01002669
David Ahernd4ead6b2018-04-17 17:33:16 -07002670 if (cfg->fc_mx) {
2671 rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
2672 GFP_KERNEL);
2673 if (unlikely(!rt->fib6_metrics))
2674 return -ENOMEM;
Florian Westphale715b6d2015-01-05 23:57:44 +01002675
David Ahernd4ead6b2018-04-17 17:33:16 -07002676 refcount_set(&rt->fib6_metrics->refcnt, 1);
Florian Westphale715b6d2015-01-05 23:57:44 +01002677
David Ahernd4ead6b2018-04-17 17:33:16 -07002678 err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
2679 rt->fib6_metrics->metrics);
Florian Westphale715b6d2015-01-05 23:57:44 +01002680 }
2681
David Ahernd4ead6b2018-04-17 17:33:16 -07002682 return err;
Florian Westphale715b6d2015-01-05 23:57:44 +01002683}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002684
David Ahern8c145862016-04-24 21:26:04 -07002685static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2686 struct fib6_config *cfg,
David Ahernf4797b32018-01-25 16:55:08 -08002687 const struct in6_addr *gw_addr,
2688 u32 tbid, int flags)
David Ahern8c145862016-04-24 21:26:04 -07002689{
2690 struct flowi6 fl6 = {
2691 .flowi6_oif = cfg->fc_ifindex,
2692 .daddr = *gw_addr,
2693 .saddr = cfg->fc_prefsrc,
2694 };
2695 struct fib6_table *table;
2696 struct rt6_info *rt;
David Ahern8c145862016-04-24 21:26:04 -07002697
David Ahernf4797b32018-01-25 16:55:08 -08002698 table = fib6_get_table(net, tbid);
David Ahern8c145862016-04-24 21:26:04 -07002699 if (!table)
2700 return NULL;
2701
2702 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2703 flags |= RT6_LOOKUP_F_HAS_SADDR;
2704
David Ahernf4797b32018-01-25 16:55:08 -08002705 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
David Ahernb75cc8f2018-03-02 08:32:17 -08002706 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
David Ahern8c145862016-04-24 21:26:04 -07002707
2708 /* if table lookup failed, fall back to full lookup */
2709 if (rt == net->ipv6.ip6_null_entry) {
2710 ip6_rt_put(rt);
2711 rt = NULL;
2712 }
2713
2714 return rt;
2715}
2716
David Ahernfc1e64e2018-01-25 16:55:09 -08002717static int ip6_route_check_nh_onlink(struct net *net,
2718 struct fib6_config *cfg,
David Ahern9fbb7042018-03-13 08:29:36 -07002719 const struct net_device *dev,
David Ahernfc1e64e2018-01-25 16:55:09 -08002720 struct netlink_ext_ack *extack)
2721{
David Ahern44750f82018-02-06 13:17:06 -08002722 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
David Ahernfc1e64e2018-01-25 16:55:09 -08002723 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2724 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2725 struct rt6_info *grt;
2726 int err;
2727
2728 err = 0;
2729 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2730 if (grt) {
David Ahern58e354c2018-02-06 12:14:12 -08002731 if (!grt->dst.error &&
2732 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
David Ahern44750f82018-02-06 13:17:06 -08002733 NL_SET_ERR_MSG(extack,
2734 "Nexthop has invalid gateway or device mismatch");
David Ahernfc1e64e2018-01-25 16:55:09 -08002735 err = -EINVAL;
2736 }
2737
2738 ip6_rt_put(grt);
2739 }
2740
2741 return err;
2742}
2743
David Ahern1edce992018-01-25 16:55:07 -08002744static int ip6_route_check_nh(struct net *net,
2745 struct fib6_config *cfg,
2746 struct net_device **_dev,
2747 struct inet6_dev **idev)
2748{
2749 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2750 struct net_device *dev = _dev ? *_dev : NULL;
2751 struct rt6_info *grt = NULL;
2752 int err = -EHOSTUNREACH;
2753
2754 if (cfg->fc_table) {
David Ahernf4797b32018-01-25 16:55:08 -08002755 int flags = RT6_LOOKUP_F_IFACE;
2756
2757 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2758 cfg->fc_table, flags);
David Ahern1edce992018-01-25 16:55:07 -08002759 if (grt) {
2760 if (grt->rt6i_flags & RTF_GATEWAY ||
2761 (dev && dev != grt->dst.dev)) {
2762 ip6_rt_put(grt);
2763 grt = NULL;
2764 }
2765 }
2766 }
2767
2768 if (!grt)
David Ahernb75cc8f2018-03-02 08:32:17 -08002769 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
David Ahern1edce992018-01-25 16:55:07 -08002770
2771 if (!grt)
2772 goto out;
2773
2774 if (dev) {
2775 if (dev != grt->dst.dev) {
2776 ip6_rt_put(grt);
2777 goto out;
2778 }
2779 } else {
2780 *_dev = dev = grt->dst.dev;
2781 *idev = grt->rt6i_idev;
2782 dev_hold(dev);
2783 in6_dev_hold(grt->rt6i_idev);
2784 }
2785
2786 if (!(grt->rt6i_flags & RTF_GATEWAY))
2787 err = 0;
2788
2789 ip6_rt_put(grt);
2790
2791out:
2792 return err;
2793}
2794
David Ahern9fbb7042018-03-13 08:29:36 -07002795static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2796 struct net_device **_dev, struct inet6_dev **idev,
2797 struct netlink_ext_ack *extack)
2798{
2799 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2800 int gwa_type = ipv6_addr_type(gw_addr);
David Ahern232378e2018-03-13 08:29:37 -07002801 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
David Ahern9fbb7042018-03-13 08:29:36 -07002802 const struct net_device *dev = *_dev;
David Ahern232378e2018-03-13 08:29:37 -07002803 bool need_addr_check = !dev;
David Ahern9fbb7042018-03-13 08:29:36 -07002804 int err = -EINVAL;
2805
2806 /* if gw_addr is local we will fail to detect this in case
2807 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2808 * will return already-added prefix route via interface that
2809 * prefix route was assigned to, which might be non-loopback.
2810 */
David Ahern232378e2018-03-13 08:29:37 -07002811 if (dev &&
2812 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2813 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
David Ahern9fbb7042018-03-13 08:29:36 -07002814 goto out;
2815 }
2816
2817 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2818 /* IPv6 strictly inhibits using not link-local
2819 * addresses as nexthop address.
2820 * Otherwise, router will not able to send redirects.
2821 * It is very good, but in some (rare!) circumstances
2822 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2823 * some exceptions. --ANK
2824 * We allow IPv4-mapped nexthops to support RFC4798-type
2825 * addressing
2826 */
2827 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2828 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2829 goto out;
2830 }
2831
2832 if (cfg->fc_flags & RTNH_F_ONLINK)
2833 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2834 else
2835 err = ip6_route_check_nh(net, cfg, _dev, idev);
2836
2837 if (err)
2838 goto out;
2839 }
2840
2841 /* reload in case device was changed */
2842 dev = *_dev;
2843
2844 err = -EINVAL;
2845 if (!dev) {
2846 NL_SET_ERR_MSG(extack, "Egress device not specified");
2847 goto out;
2848 } else if (dev->flags & IFF_LOOPBACK) {
2849 NL_SET_ERR_MSG(extack,
2850 "Egress device can not be loopback device for this route");
2851 goto out;
2852 }
David Ahern232378e2018-03-13 08:29:37 -07002853
2854 /* if we did not check gw_addr above, do so now that the
2855 * egress device has been resolved.
2856 */
2857 if (need_addr_check &&
2858 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2859 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2860 goto out;
2861 }
2862
David Ahern9fbb7042018-03-13 08:29:36 -07002863 err = 0;
2864out:
2865 return err;
2866}
2867
David Ahern333c4302017-05-21 10:12:04 -06002868static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
David Ahernacb54e32018-04-17 17:33:22 -07002869 gfp_t gfp_flags,
David Ahern333c4302017-05-21 10:12:04 -06002870 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002871{
Daniel Lezcano55786892008-03-04 13:47:47 -08002872 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002873 struct rt6_info *rt = NULL;
2874 struct net_device *dev = NULL;
2875 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07002876 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002877 int addr_type;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002878 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002879
David Ahern557c44b2017-04-19 14:19:43 -07002880 /* RTF_PCPU is an internal flag; can not be set by userspace */
David Ahernd5d531c2017-05-21 10:12:05 -06002881 if (cfg->fc_flags & RTF_PCPU) {
2882 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
David Ahern557c44b2017-04-19 14:19:43 -07002883 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002884 }
David Ahern557c44b2017-04-19 14:19:43 -07002885
Wei Wang2ea23522017-10-27 17:30:12 -07002886 /* RTF_CACHE is an internal flag; can not be set by userspace */
2887 if (cfg->fc_flags & RTF_CACHE) {
2888 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2889 goto out;
2890 }
2891
David Aherne8478e82018-04-17 17:33:13 -07002892 if (cfg->fc_type > RTN_MAX) {
2893 NL_SET_ERR_MSG(extack, "Invalid route type");
2894 goto out;
2895 }
2896
David Ahernd5d531c2017-05-21 10:12:05 -06002897 if (cfg->fc_dst_len > 128) {
2898 NL_SET_ERR_MSG(extack, "Invalid prefix length");
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002899 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002900 }
2901 if (cfg->fc_src_len > 128) {
2902 NL_SET_ERR_MSG(extack, "Invalid source address length");
2903 goto out;
2904 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002905#ifndef CONFIG_IPV6_SUBTREES
David Ahernd5d531c2017-05-21 10:12:05 -06002906 if (cfg->fc_src_len) {
2907 NL_SET_ERR_MSG(extack,
2908 "Specifying source address requires IPV6_SUBTREES to be enabled");
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002909 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002910 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002911#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07002912 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002913 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08002914 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002915 if (!dev)
2916 goto out;
2917 idev = in6_dev_get(dev);
2918 if (!idev)
2919 goto out;
2920 }
2921
Thomas Graf86872cb2006-08-22 00:01:08 -07002922 if (cfg->fc_metric == 0)
2923 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002924
David Ahernfc1e64e2018-01-25 16:55:09 -08002925 if (cfg->fc_flags & RTNH_F_ONLINK) {
2926 if (!dev) {
2927 NL_SET_ERR_MSG(extack,
2928 "Nexthop device required for onlink");
2929 err = -ENODEV;
2930 goto out;
2931 }
2932
2933 if (!(dev->flags & IFF_UP)) {
2934 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2935 err = -ENETDOWN;
2936 goto out;
2937 }
2938 }
2939
Matti Vaittinend71314b2011-11-14 00:14:49 +00002940 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05002941 if (cfg->fc_nlinfo.nlh &&
2942 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00002943 table = fib6_get_table(net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05002944 if (!table) {
Joe Perchesf3213832012-05-15 14:11:53 +00002945 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
Matti Vaittinend71314b2011-11-14 00:14:49 +00002946 table = fib6_new_table(net, cfg->fc_table);
2947 }
2948 } else {
2949 table = fib6_new_table(net, cfg->fc_table);
2950 }
David S. Miller38308472011-12-03 18:02:47 -05002951
2952 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07002953 goto out;
Thomas Grafc71099a2006-08-04 23:20:06 -07002954
Martin KaFai Lauad706862015-08-14 11:05:52 -07002955 rt = ip6_dst_alloc(net, NULL,
2956 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002957
David S. Miller38308472011-12-03 18:02:47 -05002958 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002959 err = -ENOMEM;
2960 goto out;
2961 }
2962
David Ahernd4ead6b2018-04-17 17:33:16 -07002963 err = ip6_convert_metrics(net, rt, cfg);
2964 if (err < 0)
2965 goto out;
2966
Gao feng1716a962012-04-06 00:13:10 +00002967 if (cfg->fc_flags & RTF_EXPIRES)
David Ahern14895682018-04-17 17:33:17 -07002968 fib6_set_expires(rt, jiffies +
Gao feng1716a962012-04-06 00:13:10 +00002969 clock_t_to_jiffies(cfg->fc_expires));
2970 else
David Ahern14895682018-04-17 17:33:17 -07002971 fib6_clean_expires(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002972
Thomas Graf86872cb2006-08-22 00:01:08 -07002973 if (cfg->fc_protocol == RTPROT_UNSPEC)
2974 cfg->fc_protocol = RTPROT_BOOT;
2975 rt->rt6i_protocol = cfg->fc_protocol;
2976
2977 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002978
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002979 if (cfg->fc_encap) {
2980 struct lwtunnel_state *lwtstate;
2981
David Ahern30357d72017-01-30 12:07:37 -08002982 err = lwtunnel_build_state(cfg->fc_encap_type,
Tom Herbert127eb7c2015-08-24 09:45:41 -07002983 cfg->fc_encap, AF_INET6, cfg,
David Ahern9ae28722017-05-27 16:19:28 -06002984 &lwtstate, extack);
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002985 if (err)
2986 goto out;
David Ahern5e670d82018-04-17 17:33:14 -07002987 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002988 }
2989
Thomas Graf86872cb2006-08-22 00:01:08 -07002990 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2991 rt->rt6i_dst.plen = cfg->fc_dst_len;
Martin KaFai Lauafc4eef2015-04-28 13:03:07 -07002992 if (rt->rt6i_dst.plen == 128)
David Ahern3b6761d2018-04-17 17:33:20 -07002993 rt->dst_host = true;
Michal Kubečeke5fd3872014-03-27 13:04:08 +01002994
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07002996 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2997 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002998#endif
2999
Thomas Graf86872cb2006-08-22 00:01:08 -07003000 rt->rt6i_metric = cfg->fc_metric;
David Ahern5e670d82018-04-17 17:33:14 -07003001 rt->fib6_nh.nh_weight = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003002
David Aherne8478e82018-04-17 17:33:13 -07003003 rt->fib6_type = cfg->fc_type;
3004
Linus Torvalds1da177e2005-04-16 15:20:36 -07003005 /* We cannot add true routes via loopback here,
3006 they would result in kernel looping; promote them to reject routes
3007 */
Thomas Graf86872cb2006-08-22 00:01:08 -07003008 if ((cfg->fc_flags & RTF_REJECT) ||
David S. Miller38308472011-12-03 18:02:47 -05003009 (dev && (dev->flags & IFF_LOOPBACK) &&
3010 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3011 !(cfg->fc_flags & RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003012 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08003013 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003014 if (dev) {
3015 dev_put(dev);
3016 in6_dev_put(idev);
3017 }
Daniel Lezcano55786892008-03-04 13:47:47 -08003018 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003019 dev_hold(dev);
3020 idev = in6_dev_get(dev);
3021 if (!idev) {
3022 err = -ENODEV;
3023 goto out;
3024 }
3025 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003026 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
3027 goto install_route;
3028 }
3029
Thomas Graf86872cb2006-08-22 00:01:08 -07003030 if (cfg->fc_flags & RTF_GATEWAY) {
David Ahern9fbb7042018-03-13 08:29:36 -07003031 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3032 if (err)
Florian Westphal48ed7b22015-05-21 00:25:41 +02003033 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003034
David Ahern5e670d82018-04-17 17:33:14 -07003035 rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003036 }
3037
3038 err = -ENODEV;
David S. Miller38308472011-12-03 18:02:47 -05003039 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003040 goto out;
3041
Lorenzo Bianconi428604f2018-03-29 11:02:24 +02003042 if (idev->cnf.disable_ipv6) {
3043 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3044 err = -EACCES;
3045 goto out;
3046 }
3047
David Ahern955ec4c2018-01-24 19:45:29 -08003048 if (!(dev->flags & IFF_UP)) {
3049 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3050 err = -ENETDOWN;
3051 goto out;
3052 }
3053
Daniel Walterc3968a82011-04-13 21:10:57 +00003054 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3055 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
David Ahernd5d531c2017-05-21 10:12:05 -06003056 NL_SET_ERR_MSG(extack, "Invalid source address");
Daniel Walterc3968a82011-04-13 21:10:57 +00003057 err = -EINVAL;
3058 goto out;
3059 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003060 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00003061 rt->rt6i_prefsrc.plen = 128;
3062 } else
3063 rt->rt6i_prefsrc.plen = 0;
3064
Thomas Graf86872cb2006-08-22 00:01:08 -07003065 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003066
3067install_route:
Ido Schimmel5609b802018-01-07 12:45:06 +02003068 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
3069 !netif_carrier_ok(dev))
David Ahern5e670d82018-04-17 17:33:14 -07003070 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3071 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
3072 rt->fib6_nh.nh_dev = rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003073 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07003074 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08003075
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003076 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08003077
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07003078 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003079out:
3080 if (dev)
3081 dev_put(dev);
3082 if (idev)
3083 in6_dev_put(idev);
Wei Wang587fea72017-06-17 10:42:36 -07003084 if (rt)
3085 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003086
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07003087 return ERR_PTR(err);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003088}
3089
David Ahernacb54e32018-04-17 17:33:22 -07003090int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3091 struct netlink_ext_ack *extack)
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003092{
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07003093 struct rt6_info *rt;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003094 int err;
3095
David Ahernacb54e32018-04-17 17:33:22 -07003096 rt = ip6_route_info_create(cfg, gfp_flags, extack);
David Ahernd4ead6b2018-04-17 17:33:16 -07003097 if (IS_ERR(rt))
3098 return PTR_ERR(rt);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003099
David Ahernd4ead6b2018-04-17 17:33:16 -07003100 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07003101
Linus Torvalds1da177e2005-04-16 15:20:36 -07003102 return err;
3103}
3104
Thomas Graf86872cb2006-08-22 00:01:08 -07003105static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003106{
David Ahernafb1d4b52018-04-17 17:33:11 -07003107 struct net *net = info->nl_net;
Thomas Grafc71099a2006-08-04 23:20:06 -07003108 struct fib6_table *table;
David Ahernafb1d4b52018-04-17 17:33:11 -07003109 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003110
David Ahern421842e2018-04-17 17:33:18 -07003111 if (rt == net->ipv6.fib6_null_entry) {
Gao feng6825a262012-09-19 19:25:34 +00003112 err = -ENOENT;
3113 goto out;
3114 }
Patrick McHardy6c813a72006-08-06 22:22:47 -07003115
Thomas Grafc71099a2006-08-04 23:20:06 -07003116 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07003117 spin_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -07003118 err = fib6_del(rt, info);
Wei Wang66f5d6c2017-10-06 12:06:10 -07003119 spin_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003120
Gao feng6825a262012-09-19 19:25:34 +00003121out:
Amerigo Wang94e187c2012-10-29 00:13:19 +00003122 ip6_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003123 return err;
3124}
3125
David Ahernafb1d4b52018-04-17 17:33:11 -07003126int ip6_del_rt(struct net *net, struct rt6_info *rt)
Thomas Grafe0a1ad732006-08-22 00:00:21 -07003127{
David Ahernafb1d4b52018-04-17 17:33:11 -07003128 struct nl_info info = { .nl_net = net };
3129
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08003130 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07003131}
3132
David Ahern0ae81332017-02-02 12:37:08 -08003133static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3134{
3135 struct nl_info *info = &cfg->fc_nlinfo;
WANG Conge3330032017-02-27 16:07:43 -08003136 struct net *net = info->nl_net;
David Ahern16a16cd2017-02-02 12:37:11 -08003137 struct sk_buff *skb = NULL;
David Ahern0ae81332017-02-02 12:37:08 -08003138 struct fib6_table *table;
WANG Conge3330032017-02-27 16:07:43 -08003139 int err = -ENOENT;
David Ahern0ae81332017-02-02 12:37:08 -08003140
David Ahern421842e2018-04-17 17:33:18 -07003141 if (rt == net->ipv6.fib6_null_entry)
WANG Conge3330032017-02-27 16:07:43 -08003142 goto out_put;
David Ahern0ae81332017-02-02 12:37:08 -08003143 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07003144 spin_lock_bh(&table->tb6_lock);
David Ahern0ae81332017-02-02 12:37:08 -08003145
3146 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3147 struct rt6_info *sibling, *next_sibling;
3148
David Ahern16a16cd2017-02-02 12:37:11 -08003149 /* prefer to send a single notification with all hops */
3150 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3151 if (skb) {
3152 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3153
David Ahernd4ead6b2018-04-17 17:33:16 -07003154 if (rt6_fill_node(net, skb, rt, NULL,
David Ahern16a16cd2017-02-02 12:37:11 -08003155 NULL, NULL, 0, RTM_DELROUTE,
3156 info->portid, seq, 0) < 0) {
3157 kfree_skb(skb);
3158 skb = NULL;
3159 } else
3160 info->skip_notify = 1;
3161 }
3162
David Ahern0ae81332017-02-02 12:37:08 -08003163 list_for_each_entry_safe(sibling, next_sibling,
3164 &rt->rt6i_siblings,
3165 rt6i_siblings) {
3166 err = fib6_del(sibling, info);
3167 if (err)
WANG Conge3330032017-02-27 16:07:43 -08003168 goto out_unlock;
David Ahern0ae81332017-02-02 12:37:08 -08003169 }
3170 }
3171
3172 err = fib6_del(rt, info);
WANG Conge3330032017-02-27 16:07:43 -08003173out_unlock:
Wei Wang66f5d6c2017-10-06 12:06:10 -07003174 spin_unlock_bh(&table->tb6_lock);
WANG Conge3330032017-02-27 16:07:43 -08003175out_put:
David Ahern0ae81332017-02-02 12:37:08 -08003176 ip6_rt_put(rt);
David Ahern16a16cd2017-02-02 12:37:11 -08003177
3178 if (skb) {
WANG Conge3330032017-02-27 16:07:43 -08003179 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
David Ahern16a16cd2017-02-02 12:37:11 -08003180 info->nlh, gfp_any());
3181 }
David Ahern0ae81332017-02-02 12:37:08 -08003182 return err;
3183}
3184
David Ahern333c4302017-05-21 10:12:04 -06003185static int ip6_route_del(struct fib6_config *cfg,
3186 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003187{
Wei Wang2b760fc2017-10-06 12:06:03 -07003188 struct rt6_info *rt, *rt_cache;
Thomas Grafc71099a2006-08-04 23:20:06 -07003189 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003190 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003191 int err = -ESRCH;
3192
Daniel Lezcano55786892008-03-04 13:47:47 -08003193 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
David Ahernd5d531c2017-05-21 10:12:05 -06003194 if (!table) {
3195 NL_SET_ERR_MSG(extack, "FIB table does not exist");
Thomas Grafc71099a2006-08-04 23:20:06 -07003196 return err;
David Ahernd5d531c2017-05-21 10:12:05 -06003197 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003198
Wei Wang66f5d6c2017-10-06 12:06:10 -07003199 rcu_read_lock();
Thomas Grafc71099a2006-08-04 23:20:06 -07003200
3201 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07003202 &cfg->fc_dst, cfg->fc_dst_len,
Wei Wang38fbeee2017-10-06 12:06:02 -07003203 &cfg->fc_src, cfg->fc_src_len,
Wei Wang2b760fc2017-10-06 12:06:03 -07003204 !(cfg->fc_flags & RTF_CACHE));
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09003205
Linus Torvalds1da177e2005-04-16 15:20:36 -07003206 if (fn) {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003207 for_each_fib6_node_rt_rcu(fn) {
Wei Wang2b760fc2017-10-06 12:06:03 -07003208 if (cfg->fc_flags & RTF_CACHE) {
3209 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3210 &cfg->fc_src);
3211 if (!rt_cache)
3212 continue;
3213 rt = rt_cache;
3214 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003215 if (cfg->fc_ifindex &&
David Ahern5e670d82018-04-17 17:33:14 -07003216 (!rt->fib6_nh.nh_dev ||
3217 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003218 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07003219 if (cfg->fc_flags & RTF_GATEWAY &&
David Ahern5e670d82018-04-17 17:33:14 -07003220 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003221 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07003222 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003223 continue;
Mantas Mc2ed1882016-12-16 10:30:59 +02003224 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3225 continue;
Wei Wangd3843fe2017-10-06 12:06:06 -07003226 if (!dst_hold_safe(&rt->dst))
3227 break;
Wei Wang66f5d6c2017-10-06 12:06:10 -07003228 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003229
David Ahern0ae81332017-02-02 12:37:08 -08003230 /* if gateway was specified only delete the one hop */
3231 if (cfg->fc_flags & RTF_GATEWAY)
3232 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3233
3234 return __ip6_del_rt_siblings(rt, cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003235 }
3236 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07003237 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003238
3239 return err;
3240}
3241
David S. Miller6700c272012-07-17 03:29:28 -07003242static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07003243{
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07003244 struct netevent_redirect netevent;
David S. Millere8599ff2012-07-11 23:43:53 -07003245 struct rt6_info *rt, *nrt = NULL;
David S. Millere8599ff2012-07-11 23:43:53 -07003246 struct ndisc_options ndopts;
3247 struct inet6_dev *in6_dev;
3248 struct neighbour *neigh;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003249 struct rd_msg *msg;
David S. Miller6e157b62012-07-12 00:05:02 -07003250 int optlen, on_link;
3251 u8 *lladdr;
David S. Millere8599ff2012-07-11 23:43:53 -07003252
Simon Horman29a3cad2013-05-28 20:34:26 +00003253 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003254 optlen -= sizeof(*msg);
David S. Millere8599ff2012-07-11 23:43:53 -07003255
3256 if (optlen < 0) {
David S. Miller6e157b62012-07-12 00:05:02 -07003257 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003258 return;
3259 }
3260
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003261 msg = (struct rd_msg *)icmp6_hdr(skb);
David S. Millere8599ff2012-07-11 23:43:53 -07003262
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003263 if (ipv6_addr_is_multicast(&msg->dest)) {
David S. Miller6e157b62012-07-12 00:05:02 -07003264 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003265 return;
3266 }
3267
David S. Miller6e157b62012-07-12 00:05:02 -07003268 on_link = 0;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003269 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
David S. Millere8599ff2012-07-11 23:43:53 -07003270 on_link = 1;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003271 } else if (ipv6_addr_type(&msg->target) !=
David S. Millere8599ff2012-07-11 23:43:53 -07003272 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
David S. Miller6e157b62012-07-12 00:05:02 -07003273 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003274 return;
3275 }
3276
3277 in6_dev = __in6_dev_get(skb->dev);
3278 if (!in6_dev)
3279 return;
3280 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3281 return;
3282
3283 /* RFC2461 8.1:
3284 * The IP source address of the Redirect MUST be the same as the current
3285 * first-hop router for the specified ICMP Destination Address.
3286 */
3287
Alexander Aringf997c552016-06-15 21:20:23 +02003288 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
David S. Millere8599ff2012-07-11 23:43:53 -07003289 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3290 return;
3291 }
David S. Miller6e157b62012-07-12 00:05:02 -07003292
3293 lladdr = NULL;
David S. Millere8599ff2012-07-11 23:43:53 -07003294 if (ndopts.nd_opts_tgt_lladdr) {
3295 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3296 skb->dev);
3297 if (!lladdr) {
3298 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3299 return;
3300 }
3301 }
3302
David S. Miller6e157b62012-07-12 00:05:02 -07003303 rt = (struct rt6_info *) dst;
Matthias Schifferec13ad12015-11-02 01:24:38 +01003304 if (rt->rt6i_flags & RTF_REJECT) {
David S. Miller6e157b62012-07-12 00:05:02 -07003305 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3306 return;
3307 }
3308
3309 /* Redirect received -> path was valid.
3310 * Look, redirects are sent only in response to data packets,
3311 * so that this nexthop apparently is reachable. --ANK
3312 */
Julian Anastasov0dec8792017-02-06 23:14:16 +02003313 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
David S. Miller6e157b62012-07-12 00:05:02 -07003314
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003315 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
David S. Millere8599ff2012-07-11 23:43:53 -07003316 if (!neigh)
3317 return;
3318
Linus Torvalds1da177e2005-04-16 15:20:36 -07003319 /*
3320 * We have finally decided to accept it.
3321 */
3322
Alexander Aringf997c552016-06-15 21:20:23 +02003323 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003324 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3325 NEIGH_UPDATE_F_OVERRIDE|
3326 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
Alexander Aringf997c552016-06-15 21:20:23 +02003327 NEIGH_UPDATE_F_ISROUTER)),
3328 NDISC_REDIRECT, &ndopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003329
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003330 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
David S. Miller38308472011-12-03 18:02:47 -05003331 if (!nrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003332 goto out;
3333
3334 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3335 if (on_link)
3336 nrt->rt6i_flags &= ~RTF_GATEWAY;
3337
Xin Longb91d5322017-08-03 14:13:46 +08003338 nrt->rt6i_protocol = RTPROT_REDIRECT;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003339 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003340
Wei Wang2b760fc2017-10-06 12:06:03 -07003341 /* No need to remove rt from the exception table if rt is
3342 * a cached route because rt6_insert_exception() will
3343 * takes care of it
3344 */
David Ahernd4ead6b2018-04-17 17:33:16 -07003345 if (rt6_insert_exception(nrt, rt->from)) {
Wei Wang2b760fc2017-10-06 12:06:03 -07003346 dst_release_immediate(&nrt->dst);
3347 goto out;
3348 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003349
Changli Gaod8d1f302010-06-10 23:31:35 -07003350 netevent.old = &rt->dst;
3351 netevent.new = &nrt->dst;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003352 netevent.daddr = &msg->dest;
YOSHIFUJI Hideaki / 吉藤英明60592832013-01-14 09:28:27 +00003353 netevent.neigh = neigh;
Tom Tucker8d717402006-07-30 20:43:36 -07003354 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3355
Linus Torvalds1da177e2005-04-16 15:20:36 -07003356out:
David S. Millere8599ff2012-07-11 23:43:53 -07003357 neigh_release(neigh);
David S. Miller6e157b62012-07-12 00:05:02 -07003358}
3359
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003360#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003361static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003362 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -07003363 const struct in6_addr *gwaddr,
3364 struct net_device *dev)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003365{
David Ahern830218c2016-10-24 10:52:35 -07003366 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3367 int ifindex = dev->ifindex;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003368 struct fib6_node *fn;
3369 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07003370 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003371
David Ahern830218c2016-10-24 10:52:35 -07003372 table = fib6_get_table(net, tb_id);
David S. Miller38308472011-12-03 18:02:47 -05003373 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07003374 return NULL;
3375
Wei Wang66f5d6c2017-10-06 12:06:10 -07003376 rcu_read_lock();
Wei Wang38fbeee2017-10-06 12:06:02 -07003377 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003378 if (!fn)
3379 goto out;
3380
Wei Wang66f5d6c2017-10-06 12:06:10 -07003381 for_each_fib6_node_rt_rcu(fn) {
David Ahern5e670d82018-04-17 17:33:14 -07003382 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003383 continue;
3384 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3385 continue;
David Ahern5e670d82018-04-17 17:33:14 -07003386 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003387 continue;
Wei Wangd3843fe2017-10-06 12:06:06 -07003388 ip6_hold_safe(NULL, &rt, false);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003389 break;
3390 }
3391out:
Wei Wang66f5d6c2017-10-06 12:06:10 -07003392 rcu_read_unlock();
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003393 return rt;
3394}
3395
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003396static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003397 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -07003398 const struct in6_addr *gwaddr,
3399 struct net_device *dev,
Eric Dumazet95c96172012-04-15 05:58:06 +00003400 unsigned int pref)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003401{
Thomas Graf86872cb2006-08-22 00:01:08 -07003402 struct fib6_config cfg = {
Rami Rosen238fc7e2008-02-09 23:43:11 -08003403 .fc_metric = IP6_RT_PRIO_USER,
David Ahern830218c2016-10-24 10:52:35 -07003404 .fc_ifindex = dev->ifindex,
Thomas Graf86872cb2006-08-22 00:01:08 -07003405 .fc_dst_len = prefixlen,
3406 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3407 RTF_UP | RTF_PREF(pref),
Xin Longb91d5322017-08-03 14:13:46 +08003408 .fc_protocol = RTPROT_RA,
David Aherne8478e82018-04-17 17:33:13 -07003409 .fc_type = RTN_UNICAST,
Eric W. Biederman15e47302012-09-07 20:12:54 +00003410 .fc_nlinfo.portid = 0,
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003411 .fc_nlinfo.nlh = NULL,
3412 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07003413 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003414
David Ahern830218c2016-10-24 10:52:35 -07003415 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003416 cfg.fc_dst = *prefix;
3417 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07003418
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08003419 /* We should treat it as a default route if prefix length is 0. */
3420 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07003421 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003422
David Ahernacb54e32018-04-17 17:33:22 -07003423 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003424
David Ahern830218c2016-10-24 10:52:35 -07003425 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003426}
3427#endif
3428
David Ahernafb1d4b52018-04-17 17:33:11 -07003429struct rt6_info *rt6_get_dflt_router(struct net *net,
3430 const struct in6_addr *addr,
3431 struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09003432{
David Ahern830218c2016-10-24 10:52:35 -07003433 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003434 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07003435 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003436
David Ahernafb1d4b52018-04-17 17:33:11 -07003437 table = fib6_get_table(net, tb_id);
David S. Miller38308472011-12-03 18:02:47 -05003438 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07003439 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003440
Wei Wang66f5d6c2017-10-06 12:06:10 -07003441 rcu_read_lock();
3442 for_each_fib6_node_rt_rcu(&table->tb6_root) {
David Ahern5e670d82018-04-17 17:33:14 -07003443 if (dev == rt->fib6_nh.nh_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08003444 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
David Ahern5e670d82018-04-17 17:33:14 -07003445 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003446 break;
3447 }
3448 if (rt)
Wei Wangd3843fe2017-10-06 12:06:06 -07003449 ip6_hold_safe(NULL, &rt, false);
Wei Wang66f5d6c2017-10-06 12:06:10 -07003450 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003451 return rt;
3452}
3453
David Ahernafb1d4b52018-04-17 17:33:11 -07003454struct rt6_info *rt6_add_dflt_router(struct net *net,
3455 const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08003456 struct net_device *dev,
3457 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003458{
Thomas Graf86872cb2006-08-22 00:01:08 -07003459 struct fib6_config cfg = {
David Ahernca254492015-10-12 11:47:10 -07003460 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08003461 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07003462 .fc_ifindex = dev->ifindex,
3463 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3464 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Xin Longb91d5322017-08-03 14:13:46 +08003465 .fc_protocol = RTPROT_RA,
David Aherne8478e82018-04-17 17:33:13 -07003466 .fc_type = RTN_UNICAST,
Eric W. Biederman15e47302012-09-07 20:12:54 +00003467 .fc_nlinfo.portid = 0,
Daniel Lezcano55786892008-03-04 13:47:47 -08003468 .fc_nlinfo.nlh = NULL,
David Ahernafb1d4b52018-04-17 17:33:11 -07003469 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07003470 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07003471
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003472 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003473
David Ahernacb54e32018-04-17 17:33:22 -07003474 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
David Ahern830218c2016-10-24 10:52:35 -07003475 struct fib6_table *table;
3476
3477 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3478 if (table)
3479 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3480 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003481
David Ahernafb1d4b52018-04-17 17:33:11 -07003482 return rt6_get_dflt_router(net, gwaddr, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003483}
3484
David Ahernafb1d4b52018-04-17 17:33:11 -07003485static void __rt6_purge_dflt_routers(struct net *net,
3486 struct fib6_table *table)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003487{
3488 struct rt6_info *rt;
3489
3490restart:
Wei Wang66f5d6c2017-10-06 12:06:10 -07003491 rcu_read_lock();
3492 for_each_fib6_node_rt_rcu(&table->tb6_root) {
Lorenzo Colitti3e8b0ac2013-03-03 20:46:46 +00003493 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3494 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
Wei Wangd3843fe2017-10-06 12:06:06 -07003495 if (dst_hold_safe(&rt->dst)) {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003496 rcu_read_unlock();
David Ahernafb1d4b52018-04-17 17:33:11 -07003497 ip6_del_rt(net, rt);
Wei Wangd3843fe2017-10-06 12:06:06 -07003498 } else {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003499 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07003500 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003501 goto restart;
3502 }
3503 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07003504 rcu_read_unlock();
David Ahern830218c2016-10-24 10:52:35 -07003505
3506 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3507}
3508
3509void rt6_purge_dflt_routers(struct net *net)
3510{
3511 struct fib6_table *table;
3512 struct hlist_head *head;
3513 unsigned int h;
3514
3515 rcu_read_lock();
3516
3517 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3518 head = &net->ipv6.fib_table_hash[h];
3519 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3520 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
David Ahernafb1d4b52018-04-17 17:33:11 -07003521 __rt6_purge_dflt_routers(net, table);
David Ahern830218c2016-10-24 10:52:35 -07003522 }
3523 }
3524
3525 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003526}
3527
Daniel Lezcano55786892008-03-04 13:47:47 -08003528static void rtmsg_to_fib6_config(struct net *net,
3529 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07003530 struct fib6_config *cfg)
3531{
3532 memset(cfg, 0, sizeof(*cfg));
3533
David Ahernca254492015-10-12 11:47:10 -07003534 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3535 : RT6_TABLE_MAIN;
Thomas Graf86872cb2006-08-22 00:01:08 -07003536 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3537 cfg->fc_metric = rtmsg->rtmsg_metric;
3538 cfg->fc_expires = rtmsg->rtmsg_info;
3539 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3540 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3541 cfg->fc_flags = rtmsg->rtmsg_flags;
David Aherne8478e82018-04-17 17:33:13 -07003542 cfg->fc_type = rtmsg->rtmsg_type;
Thomas Graf86872cb2006-08-22 00:01:08 -07003543
Daniel Lezcano55786892008-03-04 13:47:47 -08003544 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08003545
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003546 cfg->fc_dst = rtmsg->rtmsg_dst;
3547 cfg->fc_src = rtmsg->rtmsg_src;
3548 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07003549}
3550
Daniel Lezcano55786892008-03-04 13:47:47 -08003551int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003552{
Thomas Graf86872cb2006-08-22 00:01:08 -07003553 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003554 struct in6_rtmsg rtmsg;
3555 int err;
3556
Ian Morris67ba4152014-08-24 21:53:10 +01003557 switch (cmd) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003558 case SIOCADDRT: /* Add a route */
3559 case SIOCDELRT: /* Delete a route */
Eric W. Biedermanaf31f412012-11-16 03:03:06 +00003560 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003561 return -EPERM;
3562 err = copy_from_user(&rtmsg, arg,
3563 sizeof(struct in6_rtmsg));
3564 if (err)
3565 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07003566
Daniel Lezcano55786892008-03-04 13:47:47 -08003567 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07003568
Linus Torvalds1da177e2005-04-16 15:20:36 -07003569 rtnl_lock();
3570 switch (cmd) {
3571 case SIOCADDRT:
David Ahernacb54e32018-04-17 17:33:22 -07003572 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003573 break;
3574 case SIOCDELRT:
David Ahern333c4302017-05-21 10:12:04 -06003575 err = ip6_route_del(&cfg, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003576 break;
3577 default:
3578 err = -EINVAL;
3579 }
3580 rtnl_unlock();
3581
3582 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07003583 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003584
3585 return -EINVAL;
3586}
3587
3588/*
3589 * Drop the packet on the floor
3590 */
3591
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07003592static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003593{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003594 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00003595 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003596 switch (ipstats_mib_noroutes) {
3597 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07003598 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00003599 if (type == IPV6_ADDR_ANY) {
Stephen Suryaputrabdb7cc62018-04-16 13:42:16 -04003600 IP6_INC_STATS(dev_net(dst->dev),
3601 __in6_dev_get_safely(skb->dev),
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07003602 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003603 break;
3604 }
3605 /* FALLTHROUGH */
3606 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07003607 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3608 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003609 break;
3610 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00003611 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003612 kfree_skb(skb);
3613 return 0;
3614}
3615
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003616static int ip6_pkt_discard(struct sk_buff *skb)
3617{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003618 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003619}
3620
Eric W. Biedermanede20592015-10-07 16:48:47 -05003621static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003622{
Eric Dumazetadf30902009-06-02 05:19:30 +00003623 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003624 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003625}
3626
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003627static int ip6_pkt_prohibit(struct sk_buff *skb)
3628{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003629 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003630}
3631
Eric W. Biedermanede20592015-10-07 16:48:47 -05003632static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003633{
Eric Dumazetadf30902009-06-02 05:19:30 +00003634 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003635 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003636}
3637
Linus Torvalds1da177e2005-04-16 15:20:36 -07003638/*
3639 * Allocate a dst for local (unicast / anycast) address.
3640 */
3641
David Ahernafb1d4b52018-04-17 17:33:11 -07003642struct rt6_info *addrconf_dst_alloc(struct net *net,
3643 struct inet6_dev *idev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003644 const struct in6_addr *addr,
David Ahernacb54e32018-04-17 17:33:22 -07003645 bool anycast, gfp_t gfp_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003646{
David Ahernca254492015-10-12 11:47:10 -07003647 u32 tb_id;
David Ahern4832c302017-08-17 12:17:20 -07003648 struct net_device *dev = idev->dev;
David Ahern5f02ce242016-09-10 12:09:54 -07003649 struct rt6_info *rt;
3650
David Ahern5f02ce242016-09-10 12:09:54 -07003651 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
Hannes Frederic Sowaa3300ef2013-12-07 03:33:45 +01003652 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003653 return ERR_PTR(-ENOMEM);
3654
David Ahern3b6761d2018-04-17 17:33:20 -07003655 rt->dst_nocount = true;
3656
Linus Torvalds1da177e2005-04-16 15:20:36 -07003657 in6_dev_hold(idev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003658 rt->rt6i_idev = idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003659
David Ahern3b6761d2018-04-17 17:33:20 -07003660 rt->dst_host = true;
David Ahern94b5e0f2017-02-02 08:52:21 -08003661 rt->rt6i_protocol = RTPROT_KERNEL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003662 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
David Aherne8478e82018-04-17 17:33:13 -07003663 if (anycast) {
3664 rt->fib6_type = RTN_ANYCAST;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09003665 rt->rt6i_flags |= RTF_ANYCAST;
David Aherne8478e82018-04-17 17:33:13 -07003666 } else {
3667 rt->fib6_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003668 rt->rt6i_flags |= RTF_LOCAL;
David Aherne8478e82018-04-17 17:33:13 -07003669 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003670
David Ahern5e670d82018-04-17 17:33:14 -07003671 rt->fib6_nh.nh_gw = *addr;
3672 rt->fib6_nh.nh_dev = dev;
Julian Anastasov550bab42013-10-20 15:43:04 +03003673 rt->rt6i_gateway = *addr;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003674 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003675 rt->rt6i_dst.plen = 128;
David Ahernca254492015-10-12 11:47:10 -07003676 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3677 rt->rt6i_table = fib6_get_table(net, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003678
Linus Torvalds1da177e2005-04-16 15:20:36 -07003679 return rt;
3680}
3681
Daniel Walterc3968a82011-04-13 21:10:57 +00003682/* remove deleted ip from prefsrc entries */
3683struct arg_dev_net_ip {
3684 struct net_device *dev;
3685 struct net *net;
3686 struct in6_addr *addr;
3687};
3688
3689static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3690{
3691 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3692 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3693 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3694
David Ahern5e670d82018-04-17 17:33:14 -07003695 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
David Ahern421842e2018-04-17 17:33:18 -07003696 rt != net->ipv6.fib6_null_entry &&
Daniel Walterc3968a82011-04-13 21:10:57 +00003697 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
Wei Wang60006a42017-10-06 12:05:58 -07003698 spin_lock_bh(&rt6_exception_lock);
Daniel Walterc3968a82011-04-13 21:10:57 +00003699 /* remove prefsrc entry */
3700 rt->rt6i_prefsrc.plen = 0;
Wei Wang60006a42017-10-06 12:05:58 -07003701 /* need to update cache as well */
3702 rt6_exceptions_remove_prefsrc(rt);
3703 spin_unlock_bh(&rt6_exception_lock);
Daniel Walterc3968a82011-04-13 21:10:57 +00003704 }
3705 return 0;
3706}
3707
3708void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3709{
3710 struct net *net = dev_net(ifp->idev->dev);
3711 struct arg_dev_net_ip adni = {
3712 .dev = ifp->idev->dev,
3713 .net = net,
3714 .addr = &ifp->addr,
3715 };
Li RongQing0c3584d2013-12-27 16:32:38 +08003716 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
Daniel Walterc3968a82011-04-13 21:10:57 +00003717}
3718
Duan Jiongbe7a0102014-05-15 15:56:14 +08003719#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
Duan Jiongbe7a0102014-05-15 15:56:14 +08003720
3721/* Remove routers and update dst entries when gateway turn into host. */
3722static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3723{
3724 struct in6_addr *gateway = (struct in6_addr *)arg;
3725
Wei Wang2b760fc2017-10-06 12:06:03 -07003726 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
David Ahern5e670d82018-04-17 17:33:14 -07003727 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
Duan Jiongbe7a0102014-05-15 15:56:14 +08003728 return -1;
3729 }
Wei Wangb16cb452017-10-06 12:06:00 -07003730
3731 /* Further clean up cached routes in exception table.
3732 * This is needed because cached route may have a different
3733 * gateway than its 'parent' in the case of an ip redirect.
3734 */
3735 rt6_exceptions_clean_tohost(rt, gateway);
3736
Duan Jiongbe7a0102014-05-15 15:56:14 +08003737 return 0;
3738}
3739
3740void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3741{
3742 fib6_clean_all(net, fib6_clean_tohost, gateway);
3743}
3744
Ido Schimmel2127d952018-01-07 12:45:03 +02003745struct arg_netdev_event {
3746 const struct net_device *dev;
Ido Schimmel4c981e22018-01-07 12:45:04 +02003747 union {
3748 unsigned int nh_flags;
3749 unsigned long event;
3750 };
Ido Schimmel2127d952018-01-07 12:45:03 +02003751};
3752
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003753static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3754{
3755 struct rt6_info *iter;
3756 struct fib6_node *fn;
3757
3758 fn = rcu_dereference_protected(rt->rt6i_node,
3759 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3760 iter = rcu_dereference_protected(fn->leaf,
3761 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3762 while (iter) {
3763 if (iter->rt6i_metric == rt->rt6i_metric &&
3764 rt6_qualify_for_ecmp(iter))
3765 return iter;
3766 iter = rcu_dereference_protected(iter->rt6_next,
3767 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3768 }
3769
3770 return NULL;
3771}
3772
3773static bool rt6_is_dead(const struct rt6_info *rt)
3774{
David Ahern5e670d82018-04-17 17:33:14 -07003775 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3776 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003777 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3778 return true;
3779
3780 return false;
3781}
3782
3783static int rt6_multipath_total_weight(const struct rt6_info *rt)
3784{
3785 struct rt6_info *iter;
3786 int total = 0;
3787
3788 if (!rt6_is_dead(rt))
David Ahern5e670d82018-04-17 17:33:14 -07003789 total += rt->fib6_nh.nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003790
3791 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3792 if (!rt6_is_dead(iter))
David Ahern5e670d82018-04-17 17:33:14 -07003793 total += iter->fib6_nh.nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003794 }
3795
3796 return total;
3797}
3798
3799static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3800{
3801 int upper_bound = -1;
3802
3803 if (!rt6_is_dead(rt)) {
David Ahern5e670d82018-04-17 17:33:14 -07003804 *weight += rt->fib6_nh.nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003805 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3806 total) - 1;
3807 }
David Ahern5e670d82018-04-17 17:33:14 -07003808 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003809}
3810
3811static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3812{
3813 struct rt6_info *iter;
3814 int weight = 0;
3815
3816 rt6_upper_bound_set(rt, &weight, total);
3817
3818 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3819 rt6_upper_bound_set(iter, &weight, total);
3820}
3821
3822void rt6_multipath_rebalance(struct rt6_info *rt)
3823{
3824 struct rt6_info *first;
3825 int total;
3826
3827 /* In case the entire multipath route was marked for flushing,
3828 * then there is no need to rebalance upon the removal of every
3829 * sibling route.
3830 */
3831 if (!rt->rt6i_nsiblings || rt->should_flush)
3832 return;
3833
3834 /* During lookup routes are evaluated in order, so we need to
3835 * make sure upper bounds are assigned from the first sibling
3836 * onwards.
3837 */
3838 first = rt6_multipath_first_sibling(rt);
3839 if (WARN_ON_ONCE(!first))
3840 return;
3841
3842 total = rt6_multipath_total_weight(first);
3843 rt6_multipath_upper_bound_set(first, total);
3844}
3845
Ido Schimmel2127d952018-01-07 12:45:03 +02003846static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3847{
3848 const struct arg_netdev_event *arg = p_arg;
David Ahern7aef6852018-04-17 17:33:10 -07003849 struct net *net = dev_net(arg->dev);
Ido Schimmel2127d952018-01-07 12:45:03 +02003850
David Ahern421842e2018-04-17 17:33:18 -07003851 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
David Ahern5e670d82018-04-17 17:33:14 -07003852 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
David Ahern7aef6852018-04-17 17:33:10 -07003853 fib6_update_sernum_upto_root(net, rt);
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003854 rt6_multipath_rebalance(rt);
Ido Schimmel1de178e2018-01-07 12:45:15 +02003855 }
Ido Schimmel2127d952018-01-07 12:45:03 +02003856
3857 return 0;
3858}
3859
3860void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3861{
3862 struct arg_netdev_event arg = {
3863 .dev = dev,
Ido Schimmel6802f3a2018-01-12 22:07:36 +02003864 {
3865 .nh_flags = nh_flags,
3866 },
Ido Schimmel2127d952018-01-07 12:45:03 +02003867 };
3868
3869 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3870 arg.nh_flags |= RTNH_F_LINKDOWN;
3871
3872 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3873}
3874
Ido Schimmel1de178e2018-01-07 12:45:15 +02003875static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3876 const struct net_device *dev)
3877{
3878 struct rt6_info *iter;
3879
David Ahern5e670d82018-04-17 17:33:14 -07003880 if (rt->fib6_nh.nh_dev == dev)
Ido Schimmel1de178e2018-01-07 12:45:15 +02003881 return true;
3882 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
David Ahern5e670d82018-04-17 17:33:14 -07003883 if (iter->fib6_nh.nh_dev == dev)
Ido Schimmel1de178e2018-01-07 12:45:15 +02003884 return true;
3885
3886 return false;
3887}
3888
3889static void rt6_multipath_flush(struct rt6_info *rt)
3890{
3891 struct rt6_info *iter;
3892
3893 rt->should_flush = 1;
3894 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3895 iter->should_flush = 1;
3896}
3897
3898static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3899 const struct net_device *down_dev)
3900{
3901 struct rt6_info *iter;
3902 unsigned int dead = 0;
3903
David Ahern5e670d82018-04-17 17:33:14 -07003904 if (rt->fib6_nh.nh_dev == down_dev ||
3905 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel1de178e2018-01-07 12:45:15 +02003906 dead++;
3907 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
David Ahern5e670d82018-04-17 17:33:14 -07003908 if (iter->fib6_nh.nh_dev == down_dev ||
3909 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmel1de178e2018-01-07 12:45:15 +02003910 dead++;
3911
3912 return dead;
3913}
3914
3915static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3916 const struct net_device *dev,
3917 unsigned int nh_flags)
3918{
3919 struct rt6_info *iter;
3920
David Ahern5e670d82018-04-17 17:33:14 -07003921 if (rt->fib6_nh.nh_dev == dev)
3922 rt->fib6_nh.nh_flags |= nh_flags;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003923 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
David Ahern5e670d82018-04-17 17:33:14 -07003924 if (iter->fib6_nh.nh_dev == dev)
3925 iter->fib6_nh.nh_flags |= nh_flags;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003926}
3927
David Aherna1a22c12017-01-18 07:40:36 -08003928/* called with write lock held for table with rt */
Ido Schimmel4c981e22018-01-07 12:45:04 +02003929static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003930{
Ido Schimmel4c981e22018-01-07 12:45:04 +02003931 const struct arg_netdev_event *arg = p_arg;
3932 const struct net_device *dev = arg->dev;
David Ahern7aef6852018-04-17 17:33:10 -07003933 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003934
David Ahern421842e2018-04-17 17:33:18 -07003935 if (rt == net->ipv6.fib6_null_entry)
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003936 return 0;
3937
3938 switch (arg->event) {
3939 case NETDEV_UNREGISTER:
David Ahern5e670d82018-04-17 17:33:14 -07003940 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003941 case NETDEV_DOWN:
Ido Schimmel1de178e2018-01-07 12:45:15 +02003942 if (rt->should_flush)
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003943 return -1;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003944 if (!rt->rt6i_nsiblings)
David Ahern5e670d82018-04-17 17:33:14 -07003945 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003946 if (rt6_multipath_uses_dev(rt, dev)) {
3947 unsigned int count;
3948
3949 count = rt6_multipath_dead_count(rt, dev);
3950 if (rt->rt6i_nsiblings + 1 == count) {
3951 rt6_multipath_flush(rt);
3952 return -1;
3953 }
3954 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3955 RTNH_F_LINKDOWN);
David Ahern7aef6852018-04-17 17:33:10 -07003956 fib6_update_sernum(net, rt);
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003957 rt6_multipath_rebalance(rt);
Ido Schimmel1de178e2018-01-07 12:45:15 +02003958 }
3959 return -2;
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003960 case NETDEV_CHANGE:
David Ahern5e670d82018-04-17 17:33:14 -07003961 if (rt->fib6_nh.nh_dev != dev ||
Ido Schimmel1de178e2018-01-07 12:45:15 +02003962 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003963 break;
David Ahern5e670d82018-04-17 17:33:14 -07003964 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003965 rt6_multipath_rebalance(rt);
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003966 break;
Ido Schimmel2b241362018-01-07 12:45:02 +02003967 }
David S. Millerc159d302011-12-26 15:24:36 -05003968
Linus Torvalds1da177e2005-04-16 15:20:36 -07003969 return 0;
3970}
3971
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003972void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003973{
Ido Schimmel4c981e22018-01-07 12:45:04 +02003974 struct arg_netdev_event arg = {
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003975 .dev = dev,
Ido Schimmel6802f3a2018-01-12 22:07:36 +02003976 {
3977 .event = event,
3978 },
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003979 };
3980
Ido Schimmel4c981e22018-01-07 12:45:04 +02003981 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3982}
3983
3984void rt6_disable_ip(struct net_device *dev, unsigned long event)
3985{
3986 rt6_sync_down_dev(dev, event);
3987 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3988 neigh_ifdown(&nd_tbl, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003989}
3990
Eric Dumazet95c96172012-04-15 05:58:06 +00003991struct rt6_mtu_change_arg {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003992 struct net_device *dev;
Eric Dumazet95c96172012-04-15 05:58:06 +00003993 unsigned int mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003994};
3995
3996static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3997{
3998 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3999 struct inet6_dev *idev;
4000
4001 /* In IPv6 pmtu discovery is not optional,
4002 so that RTAX_MTU lock cannot disable it.
4003 We still use this lock to block changes
4004 caused by addrconf/ndisc.
4005 */
4006
4007 idev = __in6_dev_get(arg->dev);
David S. Miller38308472011-12-03 18:02:47 -05004008 if (!idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004009 return 0;
4010
4011 /* For administrative MTU increase, there is no way to discover
4012 IPv6 PMTU increase, so PMTU increase should be updated here.
4013 Since RFC 1981 doesn't include administrative MTU increase
4014 update PMTU increase is a MUST. (i.e. jumbo frame)
4015 */
David Ahern5e670d82018-04-17 17:33:14 -07004016 if (rt->fib6_nh.nh_dev == arg->dev &&
David Ahernd4ead6b2018-04-17 17:33:16 -07004017 !fib6_metric_locked(rt, RTAX_MTU)) {
4018 u32 mtu = rt->fib6_pmtu;
4019
4020 if (mtu >= arg->mtu ||
4021 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4022 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4023
Wei Wangf5bbe7e2017-10-06 12:05:59 -07004024 spin_lock_bh(&rt6_exception_lock);
Stefano Brivioe9fa1492018-03-06 11:10:19 +01004025 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
Wei Wangf5bbe7e2017-10-06 12:05:59 -07004026 spin_unlock_bh(&rt6_exception_lock);
Simon Arlott566cfd82007-07-26 00:09:55 -07004027 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004028 return 0;
4029}
4030
Eric Dumazet95c96172012-04-15 05:58:06 +00004031void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004032{
Thomas Grafc71099a2006-08-04 23:20:06 -07004033 struct rt6_mtu_change_arg arg = {
4034 .dev = dev,
4035 .mtu = mtu,
4036 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07004037
Li RongQing0c3584d2013-12-27 16:32:38 +08004038 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004039}
4040
Patrick McHardyef7c79e2007-06-05 12:38:30 -07004041static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07004042 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07004043 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07004044 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07004045 [RTA_PRIORITY] = { .type = NLA_U32 },
4046 [RTA_METRICS] = { .type = NLA_NESTED },
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004047 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004048 [RTA_PREF] = { .type = NLA_U8 },
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004049 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4050 [RTA_ENCAP] = { .type = NLA_NESTED },
Xin Long32bc2012015-12-16 17:50:11 +08004051 [RTA_EXPIRES] = { .type = NLA_U32 },
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09004052 [RTA_UID] = { .type = NLA_U32 },
Liping Zhang3b45a412017-02-27 20:59:39 +08004053 [RTA_MARK] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07004054};
4055
4056static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
David Ahern333c4302017-05-21 10:12:04 -06004057 struct fib6_config *cfg,
4058 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004059{
Thomas Graf86872cb2006-08-22 00:01:08 -07004060 struct rtmsg *rtm;
4061 struct nlattr *tb[RTA_MAX+1];
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004062 unsigned int pref;
Thomas Graf86872cb2006-08-22 00:01:08 -07004063 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004064
Johannes Bergfceb6432017-04-12 14:34:07 +02004065 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4066 NULL);
Thomas Graf86872cb2006-08-22 00:01:08 -07004067 if (err < 0)
4068 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004069
Thomas Graf86872cb2006-08-22 00:01:08 -07004070 err = -EINVAL;
4071 rtm = nlmsg_data(nlh);
4072 memset(cfg, 0, sizeof(*cfg));
4073
4074 cfg->fc_table = rtm->rtm_table;
4075 cfg->fc_dst_len = rtm->rtm_dst_len;
4076 cfg->fc_src_len = rtm->rtm_src_len;
4077 cfg->fc_flags = RTF_UP;
4078 cfg->fc_protocol = rtm->rtm_protocol;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00004079 cfg->fc_type = rtm->rtm_type;
Thomas Graf86872cb2006-08-22 00:01:08 -07004080
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00004081 if (rtm->rtm_type == RTN_UNREACHABLE ||
4082 rtm->rtm_type == RTN_BLACKHOLE ||
Nicolas Dichtelb4949ab2012-09-06 05:53:35 +00004083 rtm->rtm_type == RTN_PROHIBIT ||
4084 rtm->rtm_type == RTN_THROW)
Thomas Graf86872cb2006-08-22 00:01:08 -07004085 cfg->fc_flags |= RTF_REJECT;
4086
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00004087 if (rtm->rtm_type == RTN_LOCAL)
4088 cfg->fc_flags |= RTF_LOCAL;
4089
Martin KaFai Lau1f56a01f2015-04-28 13:03:03 -07004090 if (rtm->rtm_flags & RTM_F_CLONED)
4091 cfg->fc_flags |= RTF_CACHE;
4092
David Ahernfc1e64e2018-01-25 16:55:09 -08004093 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4094
Eric W. Biederman15e47302012-09-07 20:12:54 +00004095 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
Thomas Graf86872cb2006-08-22 00:01:08 -07004096 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09004097 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07004098
4099 if (tb[RTA_GATEWAY]) {
Jiri Benc67b61f62015-03-29 16:59:26 +02004100 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
Thomas Graf86872cb2006-08-22 00:01:08 -07004101 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004102 }
Thomas Graf86872cb2006-08-22 00:01:08 -07004103
4104 if (tb[RTA_DST]) {
4105 int plen = (rtm->rtm_dst_len + 7) >> 3;
4106
4107 if (nla_len(tb[RTA_DST]) < plen)
4108 goto errout;
4109
4110 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004111 }
Thomas Graf86872cb2006-08-22 00:01:08 -07004112
4113 if (tb[RTA_SRC]) {
4114 int plen = (rtm->rtm_src_len + 7) >> 3;
4115
4116 if (nla_len(tb[RTA_SRC]) < plen)
4117 goto errout;
4118
4119 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004120 }
Thomas Graf86872cb2006-08-22 00:01:08 -07004121
Daniel Walterc3968a82011-04-13 21:10:57 +00004122 if (tb[RTA_PREFSRC])
Jiri Benc67b61f62015-03-29 16:59:26 +02004123 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
Daniel Walterc3968a82011-04-13 21:10:57 +00004124
Thomas Graf86872cb2006-08-22 00:01:08 -07004125 if (tb[RTA_OIF])
4126 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4127
4128 if (tb[RTA_PRIORITY])
4129 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4130
4131 if (tb[RTA_METRICS]) {
4132 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4133 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004134 }
Thomas Graf86872cb2006-08-22 00:01:08 -07004135
4136 if (tb[RTA_TABLE])
4137 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4138
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004139 if (tb[RTA_MULTIPATH]) {
4140 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4141 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
David Ahern9ed59592017-01-17 14:57:36 -08004142
4143 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
David Ahernc255bd62017-05-27 16:19:27 -06004144 cfg->fc_mp_len, extack);
David Ahern9ed59592017-01-17 14:57:36 -08004145 if (err < 0)
4146 goto errout;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004147 }
4148
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004149 if (tb[RTA_PREF]) {
4150 pref = nla_get_u8(tb[RTA_PREF]);
4151 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4152 pref != ICMPV6_ROUTER_PREF_HIGH)
4153 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4154 cfg->fc_flags |= RTF_PREF(pref);
4155 }
4156
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004157 if (tb[RTA_ENCAP])
4158 cfg->fc_encap = tb[RTA_ENCAP];
4159
David Ahern9ed59592017-01-17 14:57:36 -08004160 if (tb[RTA_ENCAP_TYPE]) {
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004161 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4162
David Ahernc255bd62017-05-27 16:19:27 -06004163 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
David Ahern9ed59592017-01-17 14:57:36 -08004164 if (err < 0)
4165 goto errout;
4166 }
4167
Xin Long32bc2012015-12-16 17:50:11 +08004168 if (tb[RTA_EXPIRES]) {
4169 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4170
4171 if (addrconf_finite_timeout(timeout)) {
4172 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4173 cfg->fc_flags |= RTF_EXPIRES;
4174 }
4175 }
4176
Thomas Graf86872cb2006-08-22 00:01:08 -07004177 err = 0;
4178errout:
4179 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004180}
4181
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004182struct rt6_nh {
4183 struct rt6_info *rt6_info;
4184 struct fib6_config r_cfg;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004185 struct list_head next;
4186};
4187
4188static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4189{
4190 struct rt6_nh *nh;
4191
4192 list_for_each_entry(nh, rt6_nh_list, next) {
David Ahern7d4d5062017-02-02 12:37:12 -08004193 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004194 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4195 nh->r_cfg.fc_ifindex);
4196 }
4197}
4198
David Ahernd4ead6b2018-04-17 17:33:16 -07004199static int ip6_route_info_append(struct net *net,
4200 struct list_head *rt6_nh_list,
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004201 struct rt6_info *rt, struct fib6_config *r_cfg)
4202{
4203 struct rt6_nh *nh;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004204 int err = -EEXIST;
4205
4206 list_for_each_entry(nh, rt6_nh_list, next) {
4207 /* check if rt6_info already exists */
David Ahernf06b7542017-07-05 14:41:46 -06004208 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004209 return err;
4210 }
4211
4212 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4213 if (!nh)
4214 return -ENOMEM;
4215 nh->rt6_info = rt;
David Ahernd4ead6b2018-04-17 17:33:16 -07004216 err = ip6_convert_metrics(net, rt, r_cfg);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004217 if (err) {
4218 kfree(nh);
4219 return err;
4220 }
4221 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4222 list_add_tail(&nh->next, rt6_nh_list);
4223
4224 return 0;
4225}
4226
David Ahern3b1137f2017-02-02 12:37:10 -08004227static void ip6_route_mpath_notify(struct rt6_info *rt,
4228 struct rt6_info *rt_last,
4229 struct nl_info *info,
4230 __u16 nlflags)
4231{
4232 /* if this is an APPEND route, then rt points to the first route
4233 * inserted and rt_last points to last route inserted. Userspace
4234 * wants a consistent dump of the route which starts at the first
4235 * nexthop. Since sibling routes are always added at the end of
4236 * the list, find the first sibling of the last route appended
4237 */
4238 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4239 rt = list_first_entry(&rt_last->rt6i_siblings,
4240 struct rt6_info,
4241 rt6i_siblings);
4242 }
4243
4244 if (rt)
4245 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4246}
4247
David Ahern333c4302017-05-21 10:12:04 -06004248static int ip6_route_multipath_add(struct fib6_config *cfg,
4249 struct netlink_ext_ack *extack)
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004250{
David Ahern3b1137f2017-02-02 12:37:10 -08004251 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4252 struct nl_info *info = &cfg->fc_nlinfo;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004253 struct fib6_config r_cfg;
4254 struct rtnexthop *rtnh;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004255 struct rt6_info *rt;
4256 struct rt6_nh *err_nh;
4257 struct rt6_nh *nh, *nh_safe;
David Ahern3b1137f2017-02-02 12:37:10 -08004258 __u16 nlflags;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004259 int remaining;
4260 int attrlen;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004261 int err = 1;
4262 int nhn = 0;
4263 int replace = (cfg->fc_nlinfo.nlh &&
4264 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4265 LIST_HEAD(rt6_nh_list);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004266
David Ahern3b1137f2017-02-02 12:37:10 -08004267 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4268 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4269 nlflags |= NLM_F_APPEND;
4270
Michal Kubeček35f1b4e2015-05-18 20:53:55 +02004271 remaining = cfg->fc_mp_len;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004272 rtnh = (struct rtnexthop *)cfg->fc_mp;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004273
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004274 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4275 * rt6_info structs per nexthop
4276 */
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004277 while (rtnh_ok(rtnh, remaining)) {
4278 memcpy(&r_cfg, cfg, sizeof(*cfg));
4279 if (rtnh->rtnh_ifindex)
4280 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4281
4282 attrlen = rtnh_attrlen(rtnh);
4283 if (attrlen > 0) {
4284 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4285
4286 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4287 if (nla) {
Jiri Benc67b61f62015-03-29 16:59:26 +02004288 r_cfg.fc_gateway = nla_get_in6_addr(nla);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004289 r_cfg.fc_flags |= RTF_GATEWAY;
4290 }
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004291 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4292 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4293 if (nla)
4294 r_cfg.fc_encap_type = nla_get_u16(nla);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004295 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004296
David Ahern68e2ffd2018-03-20 10:06:59 -07004297 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
David Ahernacb54e32018-04-17 17:33:22 -07004298 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07004299 if (IS_ERR(rt)) {
4300 err = PTR_ERR(rt);
4301 rt = NULL;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004302 goto cleanup;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07004303 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004304
David Ahern5e670d82018-04-17 17:33:14 -07004305 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
Ido Schimmel398958a2018-01-09 16:40:28 +02004306
David Ahernd4ead6b2018-04-17 17:33:16 -07004307 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4308 rt, &r_cfg);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004309 if (err) {
Wei Wang587fea72017-06-17 10:42:36 -07004310 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004311 goto cleanup;
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004312 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004313
4314 rtnh = rtnh_next(rtnh, &remaining);
4315 }
4316
David Ahern3b1137f2017-02-02 12:37:10 -08004317 /* for add and replace send one notification with all nexthops.
4318 * Skip the notification in fib6_add_rt2node and send one with
4319 * the full route when done
4320 */
4321 info->skip_notify = 1;
4322
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004323 err_nh = NULL;
4324 list_for_each_entry(nh, &rt6_nh_list, next) {
David Ahern3b1137f2017-02-02 12:37:10 -08004325 rt_last = nh->rt6_info;
David Ahernd4ead6b2018-04-17 17:33:16 -07004326 err = __ip6_ins_rt(nh->rt6_info, info, extack);
David Ahern3b1137f2017-02-02 12:37:10 -08004327 /* save reference to first route for notification */
4328 if (!rt_notif && !err)
4329 rt_notif = nh->rt6_info;
4330
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004331 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4332 nh->rt6_info = NULL;
4333 if (err) {
4334 if (replace && nhn)
4335 ip6_print_replace_route_err(&rt6_nh_list);
4336 err_nh = nh;
4337 goto add_errout;
4338 }
4339
Nicolas Dichtel1a724182012-11-01 22:58:22 +00004340 /* Because each route is added like a single route we remove
Michal Kubeček27596472015-05-18 20:54:00 +02004341 * these flags after the first nexthop: if there is a collision,
4342 * we have already failed to add the first nexthop:
4343 * fib6_add_rt2node() has rejected it; when replacing, old
4344 * nexthops have been replaced by first new, the rest should
4345 * be added to it.
Nicolas Dichtel1a724182012-11-01 22:58:22 +00004346 */
Michal Kubeček27596472015-05-18 20:54:00 +02004347 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4348 NLM_F_REPLACE);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004349 nhn++;
4350 }
4351
David Ahern3b1137f2017-02-02 12:37:10 -08004352 /* success ... tell user about new route */
4353 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004354 goto cleanup;
4355
4356add_errout:
David Ahern3b1137f2017-02-02 12:37:10 -08004357 /* send notification for routes that were added so that
4358 * the delete notifications sent by ip6_route_del are
4359 * coherent
4360 */
4361 if (rt_notif)
4362 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4363
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004364 /* Delete routes that were already added */
4365 list_for_each_entry(nh, &rt6_nh_list, next) {
4366 if (err_nh == nh)
4367 break;
David Ahern333c4302017-05-21 10:12:04 -06004368 ip6_route_del(&nh->r_cfg, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004369 }
4370
4371cleanup:
4372 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
Wei Wang587fea72017-06-17 10:42:36 -07004373 if (nh->rt6_info)
4374 dst_release_immediate(&nh->rt6_info->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004375 list_del(&nh->next);
4376 kfree(nh);
4377 }
4378
4379 return err;
4380}
4381
David Ahern333c4302017-05-21 10:12:04 -06004382static int ip6_route_multipath_del(struct fib6_config *cfg,
4383 struct netlink_ext_ack *extack)
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004384{
4385 struct fib6_config r_cfg;
4386 struct rtnexthop *rtnh;
4387 int remaining;
4388 int attrlen;
4389 int err = 1, last_err = 0;
4390
4391 remaining = cfg->fc_mp_len;
4392 rtnh = (struct rtnexthop *)cfg->fc_mp;
4393
4394 /* Parse a Multipath Entry */
4395 while (rtnh_ok(rtnh, remaining)) {
4396 memcpy(&r_cfg, cfg, sizeof(*cfg));
4397 if (rtnh->rtnh_ifindex)
4398 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4399
4400 attrlen = rtnh_attrlen(rtnh);
4401 if (attrlen > 0) {
4402 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4403
4404 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4405 if (nla) {
4406 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4407 r_cfg.fc_flags |= RTF_GATEWAY;
4408 }
4409 }
David Ahern333c4302017-05-21 10:12:04 -06004410 err = ip6_route_del(&r_cfg, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004411 if (err)
4412 last_err = err;
4413
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004414 rtnh = rtnh_next(rtnh, &remaining);
4415 }
4416
4417 return last_err;
4418}
4419
David Ahernc21ef3e2017-04-16 09:48:24 -07004420static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4421 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004422{
Thomas Graf86872cb2006-08-22 00:01:08 -07004423 struct fib6_config cfg;
4424 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004425
David Ahern333c4302017-05-21 10:12:04 -06004426 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
Thomas Graf86872cb2006-08-22 00:01:08 -07004427 if (err < 0)
4428 return err;
4429
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004430 if (cfg.fc_mp)
David Ahern333c4302017-05-21 10:12:04 -06004431 return ip6_route_multipath_del(&cfg, extack);
David Ahern0ae81332017-02-02 12:37:08 -08004432 else {
4433 cfg.fc_delete_all_nh = 1;
David Ahern333c4302017-05-21 10:12:04 -06004434 return ip6_route_del(&cfg, extack);
David Ahern0ae81332017-02-02 12:37:08 -08004435 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004436}
4437
David Ahernc21ef3e2017-04-16 09:48:24 -07004438static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4439 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004440{
Thomas Graf86872cb2006-08-22 00:01:08 -07004441 struct fib6_config cfg;
4442 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004443
David Ahern333c4302017-05-21 10:12:04 -06004444 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
Thomas Graf86872cb2006-08-22 00:01:08 -07004445 if (err < 0)
4446 return err;
4447
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004448 if (cfg.fc_mp)
David Ahern333c4302017-05-21 10:12:04 -06004449 return ip6_route_multipath_add(&cfg, extack);
Nicolas Dichtel51ebd312012-10-22 03:42:09 +00004450 else
David Ahernacb54e32018-04-17 17:33:22 -07004451 return ip6_route_add(&cfg, GFP_KERNEL, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004452}
4453
David Ahernbeb1afac52017-02-02 12:37:09 -08004454static size_t rt6_nlmsg_size(struct rt6_info *rt)
Thomas Graf339bf982006-11-10 14:10:15 -08004455{
David Ahernbeb1afac52017-02-02 12:37:09 -08004456 int nexthop_len = 0;
4457
4458 if (rt->rt6i_nsiblings) {
4459 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4460 + NLA_ALIGN(sizeof(struct rtnexthop))
4461 + nla_total_size(16) /* RTA_GATEWAY */
David Ahern5e670d82018-04-17 17:33:14 -07004462 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
David Ahernbeb1afac52017-02-02 12:37:09 -08004463
4464 nexthop_len *= rt->rt6i_nsiblings;
4465 }
4466
Thomas Graf339bf982006-11-10 14:10:15 -08004467 return NLMSG_ALIGN(sizeof(struct rtmsg))
4468 + nla_total_size(16) /* RTA_SRC */
4469 + nla_total_size(16) /* RTA_DST */
4470 + nla_total_size(16) /* RTA_GATEWAY */
4471 + nla_total_size(16) /* RTA_PREFSRC */
4472 + nla_total_size(4) /* RTA_TABLE */
4473 + nla_total_size(4) /* RTA_IIF */
4474 + nla_total_size(4) /* RTA_OIF */
4475 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08004476 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Daniel Borkmannea697632015-01-05 23:57:47 +01004477 + nla_total_size(sizeof(struct rta_cacheinfo))
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004478 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004479 + nla_total_size(1) /* RTA_PREF */
David Ahern5e670d82018-04-17 17:33:14 -07004480 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
David Ahernbeb1afac52017-02-02 12:37:09 -08004481 + nexthop_len;
4482}
4483
4484static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
David Ahern5be083c2017-03-06 15:57:31 -08004485 unsigned int *flags, bool skip_oif)
David Ahernbeb1afac52017-02-02 12:37:09 -08004486{
David Ahern5e670d82018-04-17 17:33:14 -07004487 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
Ido Schimmelf9d882e2018-01-07 12:45:10 +02004488 *flags |= RTNH_F_DEAD;
4489
David Ahern5e670d82018-04-17 17:33:14 -07004490 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
David Ahernbeb1afac52017-02-02 12:37:09 -08004491 *flags |= RTNH_F_LINKDOWN;
4492 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4493 *flags |= RTNH_F_DEAD;
4494 }
4495
4496 if (rt->rt6i_flags & RTF_GATEWAY) {
David Ahern5e670d82018-04-17 17:33:14 -07004497 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004498 goto nla_put_failure;
4499 }
4500
David Ahern5e670d82018-04-17 17:33:14 -07004501 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4502 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
Ido Schimmel61e4d012017-08-03 13:28:20 +02004503 *flags |= RTNH_F_OFFLOAD;
4504
David Ahern5be083c2017-03-06 15:57:31 -08004505 /* not needed for multipath encoding b/c it has a rtnexthop struct */
David Ahern5e670d82018-04-17 17:33:14 -07004506 if (!skip_oif && rt->fib6_nh.nh_dev &&
4507 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
David Ahernbeb1afac52017-02-02 12:37:09 -08004508 goto nla_put_failure;
4509
David Ahern5e670d82018-04-17 17:33:14 -07004510 if (rt->fib6_nh.nh_lwtstate &&
4511 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004512 goto nla_put_failure;
4513
4514 return 0;
4515
4516nla_put_failure:
4517 return -EMSGSIZE;
4518}
4519
David Ahern5be083c2017-03-06 15:57:31 -08004520/* add multipath next hop */
David Ahernbeb1afac52017-02-02 12:37:09 -08004521static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4522{
David Ahern5e670d82018-04-17 17:33:14 -07004523 const struct net_device *dev = rt->fib6_nh.nh_dev;
David Ahernbeb1afac52017-02-02 12:37:09 -08004524 struct rtnexthop *rtnh;
4525 unsigned int flags = 0;
4526
4527 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4528 if (!rtnh)
4529 goto nla_put_failure;
4530
David Ahern5e670d82018-04-17 17:33:14 -07004531 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4532 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
David Ahernbeb1afac52017-02-02 12:37:09 -08004533
David Ahern5be083c2017-03-06 15:57:31 -08004534 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004535 goto nla_put_failure;
4536
4537 rtnh->rtnh_flags = flags;
4538
4539 /* length of rtnetlink header + attributes */
4540 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4541
4542 return 0;
4543
4544nla_put_failure:
4545 return -EMSGSIZE;
Thomas Graf339bf982006-11-10 14:10:15 -08004546}
4547
David Ahernd4ead6b2018-04-17 17:33:16 -07004548static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4549 struct rt6_info *rt, struct dst_entry *dst,
4550 struct in6_addr *dest, struct in6_addr *src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00004551 int iif, int type, u32 portid, u32 seq,
David Ahernf8cfe2c2017-01-17 15:51:08 -08004552 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004553{
4554 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004555 struct nlmsghdr *nlh;
David Ahernd4ead6b2018-04-17 17:33:16 -07004556 long expires = 0;
4557 u32 *pmetrics;
Patrick McHardy9e762a42006-08-10 23:09:48 -07004558 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004559
Eric W. Biederman15e47302012-09-07 20:12:54 +00004560 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
David S. Miller38308472011-12-03 18:02:47 -05004561 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08004562 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004563
4564 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004565 rtm->rtm_family = AF_INET6;
4566 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4567 rtm->rtm_src_len = rt->rt6i_src.plen;
4568 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07004569 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07004570 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07004571 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07004572 table = RT6_TABLE_UNSPEC;
4573 rtm->rtm_table = table;
David S. Millerc78679e2012-04-01 20:27:33 -04004574 if (nla_put_u32(skb, RTA_TABLE, table))
4575 goto nla_put_failure;
David Aherne8478e82018-04-17 17:33:13 -07004576
4577 rtm->rtm_type = rt->fib6_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004578 rtm->rtm_flags = 0;
4579 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4580 rtm->rtm_protocol = rt->rt6i_protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004581
David S. Miller38308472011-12-03 18:02:47 -05004582 if (rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004583 rtm->rtm_flags |= RTM_F_CLONED;
4584
David Ahernd4ead6b2018-04-17 17:33:16 -07004585 if (dest) {
4586 if (nla_put_in6_addr(skb, RTA_DST, dest))
David S. Millerc78679e2012-04-01 20:27:33 -04004587 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004588 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004589 } else if (rtm->rtm_dst_len)
Jiri Benc930345e2015-03-29 16:59:25 +02004590 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
David S. Millerc78679e2012-04-01 20:27:33 -04004591 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004592#ifdef CONFIG_IPV6_SUBTREES
4593 if (src) {
Jiri Benc930345e2015-03-29 16:59:25 +02004594 if (nla_put_in6_addr(skb, RTA_SRC, src))
David S. Millerc78679e2012-04-01 20:27:33 -04004595 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004596 rtm->rtm_src_len = 128;
David S. Millerc78679e2012-04-01 20:27:33 -04004597 } else if (rtm->rtm_src_len &&
Jiri Benc930345e2015-03-29 16:59:25 +02004598 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
David S. Millerc78679e2012-04-01 20:27:33 -04004599 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004600#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004601 if (iif) {
4602#ifdef CONFIG_IPV6_MROUTE
4603 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
David Ahernfd61c6b2017-01-17 15:51:07 -08004604 int err = ip6mr_get_route(net, skb, rtm, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02004605
David Ahernfd61c6b2017-01-17 15:51:07 -08004606 if (err == 0)
4607 return 0;
4608 if (err < 0)
4609 goto nla_put_failure;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004610 } else
4611#endif
David S. Millerc78679e2012-04-01 20:27:33 -04004612 if (nla_put_u32(skb, RTA_IIF, iif))
4613 goto nla_put_failure;
David Ahernd4ead6b2018-04-17 17:33:16 -07004614 } else if (dest) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004615 struct in6_addr saddr_buf;
David Ahernd4ead6b2018-04-17 17:33:16 -07004616 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
Jiri Benc930345e2015-03-29 16:59:25 +02004617 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
David S. Millerc78679e2012-04-01 20:27:33 -04004618 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004619 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07004620
Daniel Walterc3968a82011-04-13 21:10:57 +00004621 if (rt->rt6i_prefsrc.plen) {
4622 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004623 saddr_buf = rt->rt6i_prefsrc.addr;
Jiri Benc930345e2015-03-29 16:59:25 +02004624 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
David S. Millerc78679e2012-04-01 20:27:33 -04004625 goto nla_put_failure;
Daniel Walterc3968a82011-04-13 21:10:57 +00004626 }
4627
David Ahernd4ead6b2018-04-17 17:33:16 -07004628 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4629 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07004630 goto nla_put_failure;
4631
David S. Millerc78679e2012-04-01 20:27:33 -04004632 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4633 goto nla_put_failure;
Li Wei82539472012-07-29 16:01:30 +00004634
David Ahernbeb1afac52017-02-02 12:37:09 -08004635 /* For multipath routes, walk the siblings list and add
4636 * each as a nexthop within RTA_MULTIPATH.
4637 */
4638 if (rt->rt6i_nsiblings) {
4639 struct rt6_info *sibling, *next_sibling;
4640 struct nlattr *mp;
4641
4642 mp = nla_nest_start(skb, RTA_MULTIPATH);
4643 if (!mp)
4644 goto nla_put_failure;
4645
4646 if (rt6_add_nexthop(skb, rt) < 0)
4647 goto nla_put_failure;
4648
4649 list_for_each_entry_safe(sibling, next_sibling,
4650 &rt->rt6i_siblings, rt6i_siblings) {
4651 if (rt6_add_nexthop(skb, sibling) < 0)
4652 goto nla_put_failure;
4653 }
4654
4655 nla_nest_end(skb, mp);
4656 } else {
David Ahern5be083c2017-03-06 15:57:31 -08004657 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004658 goto nla_put_failure;
4659 }
4660
David Ahern14895682018-04-17 17:33:17 -07004661 if (rt->rt6i_flags & RTF_EXPIRES) {
4662 expires = dst ? dst->expires : rt->expires;
4663 expires -= jiffies;
4664 }
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07004665
David Ahernd4ead6b2018-04-17 17:33:16 -07004666 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08004667 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004668
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004669 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4670 goto nla_put_failure;
4671
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004672
Johannes Berg053c0952015-01-16 22:09:00 +01004673 nlmsg_end(skb, nlh);
4674 return 0;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004675
4676nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08004677 nlmsg_cancel(skb, nlh);
4678 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004679}
4680
Patrick McHardy1b43af52006-08-10 23:11:17 -07004681int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004682{
4683 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
David Ahern1f17e2f2017-01-26 13:54:08 -08004684 struct net *net = arg->net;
4685
David Ahern421842e2018-04-17 17:33:18 -07004686 if (rt == net->ipv6.fib6_null_entry)
David Ahern1f17e2f2017-01-26 13:54:08 -08004687 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004688
Thomas Graf2d7202b2006-08-22 00:01:27 -07004689 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4690 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
David Ahernf8cfe2c2017-01-17 15:51:08 -08004691
4692 /* user wants prefix routes only */
4693 if (rtm->rtm_flags & RTM_F_PREFIX &&
4694 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4695 /* success since this is not a prefix route */
4696 return 1;
4697 }
4698 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004699
David Ahernd4ead6b2018-04-17 17:33:16 -07004700 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4701 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4702 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004703}
4704
David Ahernc21ef3e2017-04-16 09:48:24 -07004705static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4706 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004707{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09004708 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07004709 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004710 int err, iif = 0, oif = 0;
4711 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004712 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07004713 struct sk_buff *skb;
4714 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05004715 struct flowi6 fl6;
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004716 bool fibmatch;
Thomas Grafab364a62006-08-22 00:01:47 -07004717
Johannes Bergfceb6432017-04-12 14:34:07 +02004718 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
David Ahernc21ef3e2017-04-16 09:48:24 -07004719 extack);
Thomas Grafab364a62006-08-22 00:01:47 -07004720 if (err < 0)
4721 goto errout;
4722
4723 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05004724 memset(&fl6, 0, sizeof(fl6));
Hannes Frederic Sowa38b70972016-06-11 20:08:19 +02004725 rtm = nlmsg_data(nlh);
4726 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004727 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
Thomas Grafab364a62006-08-22 00:01:47 -07004728
4729 if (tb[RTA_SRC]) {
4730 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4731 goto errout;
4732
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004733 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07004734 }
4735
4736 if (tb[RTA_DST]) {
4737 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4738 goto errout;
4739
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004740 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07004741 }
4742
4743 if (tb[RTA_IIF])
4744 iif = nla_get_u32(tb[RTA_IIF]);
4745
4746 if (tb[RTA_OIF])
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004747 oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07004748
Lorenzo Colitti2e47b292014-05-15 16:38:41 -07004749 if (tb[RTA_MARK])
4750 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4751
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09004752 if (tb[RTA_UID])
4753 fl6.flowi6_uid = make_kuid(current_user_ns(),
4754 nla_get_u32(tb[RTA_UID]));
4755 else
4756 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4757
Thomas Grafab364a62006-08-22 00:01:47 -07004758 if (iif) {
4759 struct net_device *dev;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004760 int flags = 0;
4761
Florian Westphal121622d2017-08-15 16:34:42 +02004762 rcu_read_lock();
4763
4764 dev = dev_get_by_index_rcu(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07004765 if (!dev) {
Florian Westphal121622d2017-08-15 16:34:42 +02004766 rcu_read_unlock();
Thomas Grafab364a62006-08-22 00:01:47 -07004767 err = -ENODEV;
4768 goto errout;
4769 }
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004770
4771 fl6.flowi6_iif = iif;
4772
4773 if (!ipv6_addr_any(&fl6.saddr))
4774 flags |= RT6_LOOKUP_F_HAS_SADDR;
4775
David Ahernb75cc8f2018-03-02 08:32:17 -08004776 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
Florian Westphal121622d2017-08-15 16:34:42 +02004777
4778 rcu_read_unlock();
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004779 } else {
4780 fl6.flowi6_oif = oif;
4781
Ido Schimmel58acfd72017-12-20 12:28:25 +02004782 dst = ip6_route_output(net, NULL, &fl6);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004783 }
4784
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004785
4786 rt = container_of(dst, struct rt6_info, dst);
4787 if (rt->dst.error) {
4788 err = rt->dst.error;
4789 ip6_rt_put(rt);
4790 goto errout;
Thomas Grafab364a62006-08-22 00:01:47 -07004791 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004792
WANG Cong9d6acb32017-03-01 20:48:39 -08004793 if (rt == net->ipv6.ip6_null_entry) {
4794 err = rt->dst.error;
4795 ip6_rt_put(rt);
4796 goto errout;
4797 }
4798
David S. Millerfba961a2017-12-22 11:16:31 -05004799 if (fibmatch && rt->from) {
4800 struct rt6_info *ort = rt->from;
Ido Schimmel58acfd72017-12-20 12:28:25 +02004801
4802 dst_hold(&ort->dst);
4803 ip6_rt_put(rt);
4804 rt = ort;
4805 }
4806
Linus Torvalds1da177e2005-04-16 15:20:36 -07004807 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
David S. Miller38308472011-12-03 18:02:47 -05004808 if (!skb) {
Amerigo Wang94e187c2012-10-29 00:13:19 +00004809 ip6_rt_put(rt);
Thomas Grafab364a62006-08-22 00:01:47 -07004810 err = -ENOBUFS;
4811 goto errout;
4812 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004813
Changli Gaod8d1f302010-06-10 23:31:35 -07004814 skb_dst_set(skb, &rt->dst);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004815 if (fibmatch)
David Ahernd4ead6b2018-04-17 17:33:16 -07004816 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004817 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4818 nlh->nlmsg_seq, 0);
4819 else
David Ahernd4ead6b2018-04-17 17:33:16 -07004820 err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
4821 iif, RTM_NEWROUTE,
4822 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4823 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004824 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07004825 kfree_skb(skb);
4826 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004827 }
4828
Eric W. Biederman15e47302012-09-07 20:12:54 +00004829 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafab364a62006-08-22 00:01:47 -07004830errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004831 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004832}
4833
Roopa Prabhu37a1d362015-09-13 10:18:33 -07004834void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4835 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004836{
4837 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08004838 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08004839 u32 seq;
4840 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004841
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08004842 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05004843 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07004844
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004845 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
David S. Miller38308472011-12-03 18:02:47 -05004846 if (!skb)
Thomas Graf21713eb2006-08-15 00:35:24 -07004847 goto errout;
4848
David Ahernd4ead6b2018-04-17 17:33:16 -07004849 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4850 event, info->portid, seq, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -08004851 if (err < 0) {
4852 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4853 WARN_ON(err == -EMSGSIZE);
4854 kfree_skb(skb);
4855 goto errout;
4856 }
Eric W. Biederman15e47302012-09-07 20:12:54 +00004857 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08004858 info->nlh, gfp_any());
4859 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07004860errout:
4861 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08004862 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004863}
4864
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004865static int ip6_route_dev_notify(struct notifier_block *this,
Jiri Pirko351638e2013-05-28 01:30:21 +00004866 unsigned long event, void *ptr)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004867{
Jiri Pirko351638e2013-05-28 01:30:21 +00004868 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004869 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004870
WANG Cong242d3a42017-05-08 10:12:13 -07004871 if (!(dev->flags & IFF_LOOPBACK))
4872 return NOTIFY_OK;
4873
4874 if (event == NETDEV_REGISTER) {
David Ahern421842e2018-04-17 17:33:18 -07004875 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
4876 net->ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07004877 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004878 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4879#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07004880 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004881 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07004882 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004883 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4884#endif
WANG Cong76da0702017-06-20 11:42:27 -07004885 } else if (event == NETDEV_UNREGISTER &&
4886 dev->reg_state != NETREG_UNREGISTERED) {
4887 /* NETDEV_UNREGISTER could be fired for multiple times by
4888 * netdev_wait_allrefs(). Make sure we only call this once.
4889 */
David Ahern421842e2018-04-17 17:33:18 -07004890 in6_dev_put_clear(&net->ipv6.fib6_null_entry->rt6i_idev);
Eric Dumazet12d94a82017-08-15 04:09:51 -07004891 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
WANG Cong242d3a42017-05-08 10:12:13 -07004892#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Eric Dumazet12d94a82017-08-15 04:09:51 -07004893 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4894 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
WANG Cong242d3a42017-05-08 10:12:13 -07004895#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004896 }
4897
4898 return NOTIFY_OK;
4899}
4900
Linus Torvalds1da177e2005-04-16 15:20:36 -07004901/*
4902 * /proc
4903 */
4904
4905#ifdef CONFIG_PROC_FS
4906
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004907static const struct file_operations ipv6_route_proc_fops = {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004908 .open = ipv6_route_open,
4909 .read = seq_read,
4910 .llseek = seq_lseek,
Hannes Frederic Sowa8d2ca1d2013-09-21 16:55:59 +02004911 .release = seq_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004912};
4913
Linus Torvalds1da177e2005-04-16 15:20:36 -07004914static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4915{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004916 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004917 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004918 net->ipv6.rt6_stats->fib_nodes,
4919 net->ipv6.rt6_stats->fib_route_nodes,
Wei Wang81eb8442017-10-06 12:06:11 -07004920 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004921 net->ipv6.rt6_stats->fib_rt_entries,
4922 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00004923 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004924 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004925
4926 return 0;
4927}
4928
4929static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4930{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07004931 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004932}
4933
Arjan van de Ven9a321442007-02-12 00:55:35 -08004934static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004935 .open = rt6_stats_seq_open,
4936 .read = seq_read,
4937 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07004938 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004939};
4940#endif /* CONFIG_PROC_FS */
4941
4942#ifdef CONFIG_SYSCTL
4943
Linus Torvalds1da177e2005-04-16 15:20:36 -07004944static
Joe Perchesfe2c6332013-06-11 23:04:25 -07004945int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004946 void __user *buffer, size_t *lenp, loff_t *ppos)
4947{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004948 struct net *net;
4949 int delay;
4950 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004951 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004952
4953 net = (struct net *)ctl->extra1;
4954 delay = net->ipv6.sysctl.flush_delay;
4955 proc_dointvec(ctl, write, buffer, lenp, ppos);
Michal Kubeček2ac3ac82013-08-01 10:04:14 +02004956 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004957 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004958}
4959
Joe Perchesfe2c6332013-06-11 23:04:25 -07004960struct ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004961 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004962 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08004963 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004964 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07004965 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004966 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07004967 },
4968 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004969 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08004970 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004971 .maxlen = sizeof(int),
4972 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004973 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004974 },
4975 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004976 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08004977 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004978 .maxlen = sizeof(int),
4979 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004980 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004981 },
4982 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004983 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08004984 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004985 .maxlen = sizeof(int),
4986 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004987 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004988 },
4989 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004990 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08004991 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004992 .maxlen = sizeof(int),
4993 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004994 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004995 },
4996 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004997 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08004998 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004999 .maxlen = sizeof(int),
5000 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08005001 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005002 },
5003 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005004 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08005005 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005006 .maxlen = sizeof(int),
5007 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07005008 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005009 },
5010 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005011 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08005012 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005013 .maxlen = sizeof(int),
5014 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08005015 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005016 },
5017 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005018 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08005019 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005020 .maxlen = sizeof(int),
5021 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07005022 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005023 },
5024 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07005025 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08005026 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005027 .maxlen = sizeof(int),
5028 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08005029 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07005030 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08005031 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07005032};
5033
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00005034struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08005035{
5036 struct ctl_table *table;
5037
5038 table = kmemdup(ipv6_route_table_template,
5039 sizeof(ipv6_route_table_template),
5040 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09005041
5042 if (table) {
5043 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00005044 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00005045 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09005046 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5047 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5048 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5049 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5050 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5051 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5052 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08005053 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
Eric W. Biederman464dc802012-11-16 03:02:59 +00005054
5055 /* Don't export sysctls to unprivileged users */
5056 if (net->user_ns != &init_user_ns)
5057 table[0].procname = NULL;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09005058 }
5059
Daniel Lezcano760f2d02008-01-10 02:53:43 -08005060 return table;
5061}
Linus Torvalds1da177e2005-04-16 15:20:36 -07005062#endif
5063
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00005064static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005065{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07005066 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005067
Alexey Dobriyan86393e52009-08-29 01:34:49 +00005068 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5069 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005070
Eric Dumazetfc66f952010-10-08 06:37:34 +00005071 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5072 goto out_ip6_dst_ops;
5073
David Ahern421842e2018-04-17 17:33:18 -07005074 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5075 sizeof(*net->ipv6.fib6_null_entry),
5076 GFP_KERNEL);
5077 if (!net->ipv6.fib6_null_entry)
5078 goto out_ip6_dst_entries;
5079
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005080 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5081 sizeof(*net->ipv6.ip6_null_entry),
5082 GFP_KERNEL);
5083 if (!net->ipv6.ip6_null_entry)
David Ahern421842e2018-04-17 17:33:18 -07005084 goto out_fib6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07005085 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08005086 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5087 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005088
5089#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Vincent Bernatfeca7d82017-08-08 20:23:49 +02005090 net->ipv6.fib6_has_custom_rules = false;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005091 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5092 sizeof(*net->ipv6.ip6_prohibit_entry),
5093 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07005094 if (!net->ipv6.ip6_prohibit_entry)
5095 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07005096 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08005097 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5098 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005099
5100 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5101 sizeof(*net->ipv6.ip6_blk_hole_entry),
5102 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07005103 if (!net->ipv6.ip6_blk_hole_entry)
5104 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07005105 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08005106 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5107 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005108#endif
5109
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07005110 net->ipv6.sysctl.flush_delay = 0;
5111 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5112 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5113 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5114 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5115 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5116 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5117 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5118
Benjamin Thery6891a342008-03-04 13:49:47 -08005119 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5120
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005121 ret = 0;
5122out:
5123 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005124
Peter Zijlstra68fffc62008-10-07 14:12:10 -07005125#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5126out_ip6_prohibit_entry:
5127 kfree(net->ipv6.ip6_prohibit_entry);
5128out_ip6_null_entry:
5129 kfree(net->ipv6.ip6_null_entry);
5130#endif
David Ahern421842e2018-04-17 17:33:18 -07005131out_fib6_null_entry:
5132 kfree(net->ipv6.fib6_null_entry);
Eric Dumazetfc66f952010-10-08 06:37:34 +00005133out_ip6_dst_entries:
5134 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005135out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005136 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005137}
5138
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00005139static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005140{
David Ahern421842e2018-04-17 17:33:18 -07005141 kfree(net->ipv6.fib6_null_entry);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005142 kfree(net->ipv6.ip6_null_entry);
5143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5144 kfree(net->ipv6.ip6_prohibit_entry);
5145 kfree(net->ipv6.ip6_blk_hole_entry);
5146#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00005147 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005148}
5149
Thomas Grafd1896342012-06-18 12:08:33 +00005150static int __net_init ip6_route_net_init_late(struct net *net)
5151{
5152#ifdef CONFIG_PROC_FS
Gao fengd4beaa62013-02-18 01:34:54 +00005153 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
Joe Perchesd6444062018-03-23 15:54:38 -07005154 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
Thomas Grafd1896342012-06-18 12:08:33 +00005155#endif
5156 return 0;
5157}
5158
5159static void __net_exit ip6_route_net_exit_late(struct net *net)
5160{
5161#ifdef CONFIG_PROC_FS
Gao fengece31ff2013-02-18 01:34:56 +00005162 remove_proc_entry("ipv6_route", net->proc_net);
5163 remove_proc_entry("rt6_stats", net->proc_net);
Thomas Grafd1896342012-06-18 12:08:33 +00005164#endif
5165}
5166
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005167static struct pernet_operations ip6_route_net_ops = {
5168 .init = ip6_route_net_init,
5169 .exit = ip6_route_net_exit,
5170};
5171
David S. Millerc3426b42012-06-09 16:27:05 -07005172static int __net_init ipv6_inetpeer_init(struct net *net)
5173{
5174 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5175
5176 if (!bp)
5177 return -ENOMEM;
5178 inet_peer_base_init(bp);
5179 net->ipv6.peers = bp;
5180 return 0;
5181}
5182
5183static void __net_exit ipv6_inetpeer_exit(struct net *net)
5184{
5185 struct inet_peer_base *bp = net->ipv6.peers;
5186
5187 net->ipv6.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07005188 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07005189 kfree(bp);
5190}
5191
David S. Miller2b823f72012-06-09 19:00:16 -07005192static struct pernet_operations ipv6_inetpeer_ops = {
David S. Millerc3426b42012-06-09 16:27:05 -07005193 .init = ipv6_inetpeer_init,
5194 .exit = ipv6_inetpeer_exit,
5195};
5196
Thomas Grafd1896342012-06-18 12:08:33 +00005197static struct pernet_operations ip6_route_net_late_ops = {
5198 .init = ip6_route_net_init_late,
5199 .exit = ip6_route_net_exit_late,
5200};
5201
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005202static struct notifier_block ip6_route_dev_notifier = {
5203 .notifier_call = ip6_route_dev_notify,
WANG Cong242d3a42017-05-08 10:12:13 -07005204 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005205};
5206
WANG Cong2f460932017-05-03 22:07:31 -07005207void __init ip6_route_init_special_entries(void)
5208{
5209 /* Registering of the loopback is done before this portion of code,
5210 * the loopback reference in rt6_info will not be taken, do it
5211 * manually for init_net */
David Ahern421842e2018-04-17 17:33:18 -07005212 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
5213 init_net.ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
WANG Cong2f460932017-05-03 22:07:31 -07005214 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5215 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5216 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5217 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5218 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5219 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5220 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5221 #endif
5222}
5223
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005224int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005225{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005226 int ret;
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07005227 int cpu;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005228
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08005229 ret = -ENOMEM;
5230 ip6_dst_ops_template.kmem_cachep =
5231 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5232 SLAB_HWCACHE_ALIGN, NULL);
5233 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08005234 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07005235
Eric Dumazetfc66f952010-10-08 06:37:34 +00005236 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005237 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08005238 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08005239
David S. Millerc3426b42012-06-09 16:27:05 -07005240 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5241 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07005242 goto out_dst_entries;
Thomas Graf2a0c4512012-06-14 23:00:17 +00005243
David S. Miller7e52b332012-06-15 15:51:55 -07005244 ret = register_pernet_subsys(&ip6_route_net_ops);
5245 if (ret)
5246 goto out_register_inetpeer;
David S. Millerc3426b42012-06-09 16:27:05 -07005247
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07005248 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5249
David S. Millere8803b62012-06-16 01:12:19 -07005250 ret = fib6_init();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005251 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005252 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005253
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005254 ret = xfrm6_init();
5255 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07005256 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08005257
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005258 ret = fib6_rules_init();
5259 if (ret)
5260 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08005261
Thomas Grafd1896342012-06-18 12:08:33 +00005262 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5263 if (ret)
5264 goto fib6_rules_init;
5265
Florian Westphal16feebc2017-12-02 21:44:08 +01005266 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5267 inet6_rtm_newroute, NULL, 0);
5268 if (ret < 0)
5269 goto out_register_late_subsys;
5270
5271 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5272 inet6_rtm_delroute, NULL, 0);
5273 if (ret < 0)
5274 goto out_register_late_subsys;
5275
5276 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5277 inet6_rtm_getroute, NULL,
5278 RTNL_FLAG_DOIT_UNLOCKED);
5279 if (ret < 0)
Thomas Grafd1896342012-06-18 12:08:33 +00005280 goto out_register_late_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005281
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005282 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005283 if (ret)
Thomas Grafd1896342012-06-18 12:08:33 +00005284 goto out_register_late_subsys;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005285
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07005286 for_each_possible_cpu(cpu) {
5287 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5288
5289 INIT_LIST_HEAD(&ul->head);
5290 spin_lock_init(&ul->lock);
5291 }
5292
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005293out:
5294 return ret;
5295
Thomas Grafd1896342012-06-18 12:08:33 +00005296out_register_late_subsys:
Florian Westphal16feebc2017-12-02 21:44:08 +01005297 rtnl_unregister_all(PF_INET6);
Thomas Grafd1896342012-06-18 12:08:33 +00005298 unregister_pernet_subsys(&ip6_route_net_late_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005299fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005300 fib6_rules_cleanup();
5301xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005302 xfrm6_fini();
Thomas Graf2a0c4512012-06-14 23:00:17 +00005303out_fib6_init:
5304 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005305out_register_subsys:
5306 unregister_pernet_subsys(&ip6_route_net_ops);
David S. Miller7e52b332012-06-15 15:51:55 -07005307out_register_inetpeer:
5308 unregister_pernet_subsys(&ipv6_inetpeer_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00005309out_dst_entries:
5310 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005311out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005312 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005313 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005314}
5315
5316void ip6_route_cleanup(void)
5317{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005318 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Grafd1896342012-06-18 12:08:33 +00005319 unregister_pernet_subsys(&ip6_route_net_late_ops);
Thomas Graf101367c2006-08-04 03:39:02 -07005320 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005321 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005322 fib6_gc_cleanup();
David S. Millerc3426b42012-06-09 16:27:05 -07005323 unregister_pernet_subsys(&ipv6_inetpeer_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005324 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00005325 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005326 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005327}