blob: 0db4218c9186b99161330c4d0ec2e6866f99a8ab [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Joe Perchesf3213832012-05-15 14:11:53 +000027#define pr_fmt(fmt) "IPv6: " fmt
28
Randy Dunlap4fc268d2006-01-11 12:17:47 -080029#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040031#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080045#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090046#include <linux/slab.h>
Wei Wang35732d02017-10-06 12:05:57 -070047#include <linux/jhash.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020048#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070049#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
Jiri Benc904af042015-08-20 13:56:31 +020058#include <net/dst_metadata.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070059#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070060#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070061#include <net/netlink.h>
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +000062#include <net/nexthop.h>
Roopa Prabhu19e42e42015-07-21 10:43:48 +020063#include <net/lwtunnel.h>
Jiri Benc904af042015-08-20 13:56:31 +020064#include <net/ip_tunnels.h>
David Ahernca254492015-10-12 11:47:10 -070065#include <net/l3mdev.h>
David Ahernb8115802015-11-19 12:24:22 -080066#include <trace/events/fib6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +020074enum rt6_nud_state {
Jiri Benc7e980562013-12-11 13:48:20 +010075 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +020078 RT6_NUD_SUCCEED = 1
79};
80
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -070081static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
Linus Torvalds1da177e2005-04-16 15:20:36 -070082static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080083static unsigned int ip6_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +000084static unsigned int ip6_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080089static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070090
91static int ip6_pkt_discard(struct sk_buff *skb);
Eric W. Biedermanede20592015-10-07 16:48:47 -050092static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
Kamala R7150aed2013-12-02 19:55:21 +053093static int ip6_pkt_prohibit(struct sk_buff *skb);
Eric W. Biedermanede20592015-10-07 16:48:47 -050094static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -070095static void ip6_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -070096static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
David Ahern16a16cd2017-02-02 12:37:11 -0800102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
Wei Wang35732d02017-10-06 12:05:57 -0700108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800112#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800113static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000114 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -0700115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
Eric Dumazet95c96172012-04-15 05:58:06 +0000117 unsigned int pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800118static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000119 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -0700120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800122#endif
123
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
131static void rt6_uncached_list_add(struct rt6_info *rt)
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
142static void rt6_uncached_list_del(struct rt6_info *rt)
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
Wei Wang81eb8442017-10-06 12:06:11 -0700146 struct net *net = dev_net(rt->dst.dev);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700147
148 spin_lock_bh(&ul->lock);
149 list_del(&rt->rt6i_uncached);
Wei Wang81eb8442017-10-06 12:06:11 -0700150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700151 spin_unlock_bh(&ul->lock);
152 }
153}
154
155static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156{
157 struct net_device *loopback_dev = net->loopback_dev;
158 int cpu;
159
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500160 if (dev == loopback_dev)
161 return;
162
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700163 for_each_possible_cpu(cpu) {
164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 struct rt6_info *rt;
166
167 spin_lock_bh(&ul->lock);
168 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 struct inet6_dev *rt_idev = rt->rt6i_idev;
170 struct net_device *rt_dev = rt->dst.dev;
171
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500172 if (rt_idev->dev == dev) {
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700173 rt->rt6i_idev = in6_dev_get(loopback_dev);
174 in6_dev_put(rt_idev);
175 }
176
Eric W. Biedermane332bc62015-10-12 11:02:08 -0500177 if (rt_dev == dev) {
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700178 rt->dst.dev = loopback_dev;
179 dev_hold(rt->dst.dev);
180 dev_put(rt_dev);
181 }
182 }
183 spin_unlock_bh(&ul->lock);
184 }
185}
186
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188{
David Miller3a2232e2017-11-28 15:40:40 -0500189 return dst_metrics_write_ptr(&rt->from->dst);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700190}
191
David S. Miller06582542011-01-27 14:58:42 -0800192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193{
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700194 struct rt6_info *rt = (struct rt6_info *)dst;
David S. Miller06582542011-01-27 14:58:42 -0800195
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700196 if (rt->rt6i_flags & RTF_PCPU)
197 return rt6_pcpu_cow_metrics(rt);
198 else if (rt->rt6i_flags & RTF_CACHE)
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700199 return NULL;
200 else
Martin KaFai Lau3b471172015-02-12 16:14:08 -0800201 return dst_cow_metrics_generic(dst, old);
David S. Miller06582542011-01-27 14:58:42 -0800202}
203
David S. Millerf894cbf2012-07-02 21:52:24 -0700204static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 struct sk_buff *skb,
206 const void *daddr)
David S. Miller39232972012-01-26 15:22:32 -0500207{
208 struct in6_addr *p = &rt->rt6i_gateway;
209
David S. Millera7563f32012-01-26 16:29:16 -0500210 if (!ipv6_addr_any(p))
David S. Miller39232972012-01-26 15:22:32 -0500211 return (const void *) p;
David S. Millerf894cbf2012-07-02 21:52:24 -0700212 else if (skb)
213 return &ipv6_hdr(skb)->daddr;
David S. Miller39232972012-01-26 15:22:32 -0500214 return daddr;
215}
216
David S. Millerf894cbf2012-07-02 21:52:24 -0700217static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 struct sk_buff *skb,
219 const void *daddr)
David S. Millerd3aaeb32011-07-18 00:40:17 -0700220{
David S. Miller39232972012-01-26 15:22:32 -0500221 struct rt6_info *rt = (struct rt6_info *) dst;
222 struct neighbour *n;
223
David S. Millerf894cbf2012-07-02 21:52:24 -0700224 daddr = choose_neigh_daddr(rt, skb, daddr);
YOSHIFUJI Hideaki / 吉藤英明8e022ee2013-01-17 12:53:09 +0000225 n = __ipv6_neigh_lookup(dst->dev, daddr);
David S. Millerf83c7792011-12-28 15:41:23 -0500226 if (n)
227 return n;
228 return neigh_create(&nd_tbl, daddr, dst->dev);
229}
230
Julian Anastasov63fca652017-02-06 23:14:15 +0200231static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232{
233 struct net_device *dev = dst->dev;
234 struct rt6_info *rt = (struct rt6_info *)dst;
235
236 daddr = choose_neigh_daddr(rt, NULL, daddr);
237 if (!daddr)
238 return;
239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 return;
241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 return;
243 __ipv6_confirm_neigh(dev, daddr);
244}
245
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800246static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 .family = AF_INET6,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 .gc = ip6_dst_gc,
249 .gc_thresh = 1024,
250 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800251 .default_advmss = ip6_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000252 .mtu = ip6_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800253 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 .destroy = ip6_dst_destroy,
255 .ifdown = ip6_dst_ifdown,
256 .negative_advice = ip6_negative_advice,
257 .link_failure = ip6_link_failure,
258 .update_pmtu = ip6_rt_update_pmtu,
David S. Miller6e157b62012-07-12 00:05:02 -0700259 .redirect = rt6_do_redirect,
Eric W. Biederman9f8955c2015-10-07 16:48:39 -0500260 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700261 .neigh_lookup = ip6_neigh_lookup,
Julian Anastasov63fca652017-02-06 23:14:15 +0200262 .confirm_neigh = ip6_confirm_neigh,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263};
264
Steffen Klassertebb762f2011-11-23 02:12:51 +0000265static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -0800266{
Steffen Klassert618f9bc2011-11-23 02:13:31 +0000267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268
269 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -0800270}
271
David S. Miller6700c272012-07-17 03:29:28 -0700272static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -0700274{
275}
276
David S. Miller6700c272012-07-17 03:29:28 -0700277static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -0700279{
280}
281
David S. Miller14e50e52007-05-24 18:17:54 -0700282static struct dst_ops ip6_dst_blackhole_ops = {
283 .family = AF_INET6,
David S. Miller14e50e52007-05-24 18:17:54 -0700284 .destroy = ip6_dst_destroy,
285 .check = ip6_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000286 .mtu = ip6_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800287 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700288 .update_pmtu = ip6_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -0700289 .redirect = ip6_rt_blackhole_redirect,
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -0700290 .cow_metrics = dst_cow_metrics_generic,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700291 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700292};
293
David S. Miller62fa8a82011-01-26 20:51:05 -0800294static const u32 ip6_template_metrics[RTAX_MAX] = {
Li RongQing14edd872012-10-24 14:01:18 +0800295 [RTAX_HOPLIMIT - 1] = 0,
David S. Miller62fa8a82011-01-26 20:51:05 -0800296};
297
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000298static const struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700299 .dst = {
300 .__refcnt = ATOMIC_INIT(1),
301 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000302 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700303 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700304 .input = ip6_pkt_discard,
305 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 },
307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700308 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 .rt6i_metric = ~(u32) 0,
310 .rt6i_ref = ATOMIC_INIT(1),
311};
312
Thomas Graf101367c2006-08-04 03:39:02 -0700313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000315static const struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000319 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700320 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700325 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700326 .rt6i_metric = ~(u32) 0,
327 .rt6i_ref = ATOMIC_INIT(1),
328};
329
Eric Dumazetfb0af4c2012-09-11 21:47:51 +0000330static const struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
Nicolas Dichtel2c20cbd2012-09-10 22:09:47 +0000334 .obsolete = DST_OBSOLETE_FORCE_CHK,
Changli Gaod8d1f302010-06-10 23:31:35 -0700335 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700336 .input = dst_discard,
Eric W. Biedermanede20592015-10-07 16:48:47 -0500337 .output = dst_discard_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700340 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700341 .rt6i_metric = ~(u32) 0,
342 .rt6i_ref = ATOMIC_INIT(1),
343};
344
345#endif
346
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
354}
355
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356/* allocate dst with ip6_dst_ops */
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700357static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 struct net_device *dev,
Martin KaFai Lauad706862015-08-14 11:05:52 -0700359 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360{
David S. Miller97bab732012-06-09 22:36:36 -0700361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
Wei Wangb2a9c0e2017-06-17 10:42:41 -0700362 1, DST_OBSOLETE_FORCE_CHK, flags);
David S. Millercf911662011-04-28 14:31:47 -0700363
Wei Wang81eb8442017-10-06 12:06:11 -0700364 if (rt) {
Martin KaFai Lauebfa45f2015-10-15 16:39:57 -0700365 rt6_info_init(rt);
Wei Wang81eb8442017-10-06 12:06:11 -0700366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
Steffen Klassert81048912012-07-05 23:37:09 +0000368
David S. Millercf911662011-04-28 14:31:47 -0700369 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370}
371
David Ahern9ab179d2016-04-07 11:10:06 -0700372struct rt6_info *ip6_dst_alloc(struct net *net,
373 struct net_device *dev,
374 int flags)
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700375{
Martin KaFai Lauad706862015-08-14 11:05:52 -0700376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700377
378 if (rt) {
379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
Eric Dumazetbfd8e5a2017-10-09 06:01:37 -0700380 if (!rt->rt6i_pcpu) {
Wei Wang587fea72017-06-17 10:42:36 -0700381 dst_release_immediate(&rt->dst);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700382 return NULL;
383 }
384 }
385
386 return rt;
387}
David Ahern9ab179d2016-04-07 11:10:06 -0700388EXPORT_SYMBOL(ip6_dst_alloc);
Martin KaFai Laud52d3992015-05-22 20:56:06 -0700389
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390static void ip6_dst_destroy(struct dst_entry *dst)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
Wei Wang35732d02017-10-06 12:05:57 -0700393 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -0500394 struct rt6_info *from = rt->from;
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700395 struct inet6_dev *idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -0700397 dst_destroy_metrics_generic(dst);
Markus Elfring87775312015-07-02 16:30:24 +0200398 free_percpu(rt->rt6i_pcpu);
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -0700399 rt6_uncached_list_del(rt);
400
401 idev = rt->rt6i_idev;
David S. Miller38308472011-12-03 18:02:47 -0500402 if (idev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 rt->rt6i_idev = NULL;
404 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900405 }
Wei Wang35732d02017-10-06 12:05:57 -0700406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
Gao feng1716a962012-04-06 00:13:10 +0000411
David Miller3a2232e2017-11-28 15:40:40 -0500412 rt->from = NULL;
413 dst_release(&from->dst);
David S. Millerb3419362010-11-30 12:27:11 -0800414}
415
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
417 int how)
418{
419 struct rt6_info *rt = (struct rt6_info *)dst;
420 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800421 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900422 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423
Wei Wange5645f52017-08-14 10:44:59 -0700424 if (idev && idev->dev != loopback_dev) {
425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
426 if (loopback_idev) {
427 rt->rt6i_idev = loopback_idev;
428 in6_dev_put(idev);
David S. Miller97cac082012-07-02 22:43:47 -0700429 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 }
431}
432
Martin KaFai Lau5973fb12015-11-11 11:51:07 -0800433static bool __rt6_check_expired(const struct rt6_info *rt)
434{
435 if (rt->rt6i_flags & RTF_EXPIRES)
436 return time_after(jiffies, rt->dst.expires);
437 else
438 return false;
439}
440
Eric Dumazeta50feda2012-05-18 18:57:34 +0000441static bool rt6_check_expired(const struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442{
Gao feng1716a962012-04-06 00:13:10 +0000443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires))
Eric Dumazeta50feda2012-05-18 18:57:34 +0000445 return true;
David Miller3a2232e2017-11-28 15:40:40 -0500446 } else if (rt->from) {
Xin Long1e2ea8a2017-08-26 20:10:10 +0800447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
David Miller3a2232e2017-11-28 15:40:40 -0500448 rt6_check_expired(rt->from);
Gao feng1716a962012-04-06 00:13:10 +0000449 }
Eric Dumazeta50feda2012-05-18 18:57:34 +0000450 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451}
452
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000453static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +0200454 struct flowi6 *fl6, int oif,
455 int strict)
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000456{
457 struct rt6_info *sibling, *next_sibling;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000458
Jakub Sitnickib673d6c2017-08-23 09:58:31 +0200459 /* We might have already computed the hash for ICMPv6 errors. In such
460 * case it will always be non-zero. Otherwise now is the time to do it.
461 */
462 if (!fl6->mp_hash)
463 fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
464
Ido Schimmel3d709f62018-01-09 16:40:27 +0200465 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
466 return match;
Ido Schimmelbbfcd772017-11-21 09:50:12 +0200467
Ido Schimmel3d709f62018-01-09 16:40:27 +0200468 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
469 rt6i_siblings) {
470 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
471 continue;
472 if (rt6_score_route(sibling, oif, strict) < 0)
473 break;
474 match = sibling;
475 break;
476 }
477
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +0000478 return match;
479}
480
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481/*
Wei Wang66f5d6c2017-10-06 12:06:10 -0700482 * Route lookup. rcu_read_lock() should be held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 */
484
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800485static inline struct rt6_info *rt6_device_match(struct net *net,
486 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000487 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700488 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700489 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490{
491 struct rt6_info *local = NULL;
492 struct rt6_info *sprt;
493
Ido Schimmel8067bb82018-01-07 12:45:09 +0200494 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
495 return rt;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900496
David Miller071fb372017-11-28 15:40:15 -0500497 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
David S. Millerd1918542011-12-28 20:19:20 -0500498 struct net_device *dev = sprt->dst.dev;
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900499
Ido Schimmel8067bb82018-01-07 12:45:09 +0200500 if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
501 continue;
502
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900503 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 if (dev->ifindex == oif)
505 return sprt;
506 if (dev->flags & IFF_LOOPBACK) {
David S. Miller38308472011-12-03 18:02:47 -0500507 if (!sprt->rt6i_idev ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 sprt->rt6i_idev->dev->ifindex != oif) {
David Ahern17fb0b22015-09-25 15:22:54 -0600509 if (flags & RT6_LOOKUP_F_IFACE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 continue;
David Ahern17fb0b22015-09-25 15:22:54 -0600511 if (local &&
512 local->rt6i_idev->dev->ifindex == oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 continue;
514 }
515 local = sprt;
516 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900517 } else {
518 if (ipv6_chk_addr(net, saddr, dev,
519 flags & RT6_LOOKUP_F_IFACE))
520 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900522 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900524 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 if (local)
526 return local;
527
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700528 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800529 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 }
Ido Schimmel8067bb82018-01-07 12:45:09 +0200531
532 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533}
534
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800535#ifdef CONFIG_IPV6_ROUTER_PREF
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200536struct __rt6_probe_work {
537 struct work_struct work;
538 struct in6_addr target;
539 struct net_device *dev;
540};
541
542static void rt6_probe_deferred(struct work_struct *w)
543{
544 struct in6_addr mcaddr;
545 struct __rt6_probe_work *work =
546 container_of(w, struct __rt6_probe_work, work);
547
548 addrconf_addr_solict_mult(&work->target, &mcaddr);
Erik Nordmarkadc176c2016-12-02 14:00:08 -0800549 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200550 dev_put(work->dev);
Michael Büsch662f5532015-02-08 10:14:07 +0100551 kfree(work);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200552}
553
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800554static void rt6_probe(struct rt6_info *rt)
555{
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700556 struct __rt6_probe_work *work;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000557 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800558 /*
559 * Okay, this does not seem to be appropriate
560 * for now, however, we need to check if it
561 * is really so; aka Router Reachability Probing.
562 *
563 * Router Reachability Probe MUST be rate-limited
564 * to no more than one per minute.
565 */
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000566 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
Amerigo Wangfdd66812012-09-10 02:48:44 +0000567 return;
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000568 rcu_read_lock_bh();
569 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
570 if (neigh) {
Martin KaFai Lau8d6c31b2015-07-24 09:57:43 -0700571 if (neigh->nud_state & NUD_VALID)
572 goto out;
573
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700574 work = NULL;
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000575 write_lock(&neigh->lock);
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700576 if (!(neigh->nud_state & NUD_VALID) &&
577 time_after(jiffies,
578 neigh->updated +
579 rt->rt6i_idev->cnf.rtr_probe_interval)) {
580 work = kmalloc(sizeof(*work), GFP_ATOMIC);
581 if (work)
582 __neigh_set_probe_once(neigh);
Hannes Frederic Sowac2f17e82013-10-21 06:17:15 +0200583 }
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000584 write_unlock(&neigh->lock);
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700585 } else {
586 work = kmalloc(sizeof(*work), GFP_ATOMIC);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000587 }
Martin KaFai Lau990edb42015-07-24 09:57:42 -0700588
589 if (work) {
590 INIT_WORK(&work->work, rt6_probe_deferred);
591 work->target = rt->rt6i_gateway;
592 dev_hold(rt->dst.dev);
593 work->dev = rt->dst.dev;
594 schedule_work(&work->work);
595 }
596
Martin KaFai Lau8d6c31b2015-07-24 09:57:43 -0700597out:
YOSHIFUJI Hideaki / 吉藤英明2152cae2013-01-17 12:53:43 +0000598 rcu_read_unlock_bh();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800599}
600#else
601static inline void rt6_probe(struct rt6_info *rt)
602{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800603}
604#endif
605
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800607 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700609static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610{
David S. Millerd1918542011-12-28 20:19:20 -0500611 struct net_device *dev = rt->dst.dev;
David S. Miller161980f2007-04-06 11:42:27 -0700612 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800613 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700614 if ((dev->flags & IFF_LOOPBACK) &&
615 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
616 return 1;
617 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618}
619
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200620static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000622 struct neighbour *neigh;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200623 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000624
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700625 if (rt->rt6i_flags & RTF_NONEXTHOP ||
626 !(rt->rt6i_flags & RTF_GATEWAY))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200627 return RT6_NUD_SUCCEED;
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000628
629 rcu_read_lock_bh();
630 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
631 if (neigh) {
632 read_lock(&neigh->lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800633 if (neigh->nud_state & NUD_VALID)
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200634 ret = RT6_NUD_SUCCEED;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800635#ifdef CONFIG_IPV6_ROUTER_PREF
Paul Marksa5a81f02012-12-03 10:26:54 +0000636 else if (!(neigh->nud_state & NUD_FAILED))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200637 ret = RT6_NUD_SUCCEED;
Jiri Benc7e980562013-12-11 13:48:20 +0100638 else
639 ret = RT6_NUD_FAIL_PROBE;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800640#endif
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000641 read_unlock(&neigh->lock);
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200642 } else {
643 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
Jiri Benc7e980562013-12-11 13:48:20 +0100644 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
Paul Marksa5a81f02012-12-03 10:26:54 +0000645 }
YOSHIFUJI Hideaki / 吉藤英明145a3622013-01-17 12:53:38 +0000646 rcu_read_unlock_bh();
647
Paul Marksa5a81f02012-12-03 10:26:54 +0000648 return ret;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800649}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800651static int rt6_score_route(struct rt6_info *rt, int oif,
652 int strict)
653{
Paul Marksa5a81f02012-12-03 10:26:54 +0000654 int m;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900655
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700656 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700657 if (!m && (strict & RT6_LOOKUP_F_IFACE))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200658 return RT6_NUD_FAIL_HARD;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800659#ifdef CONFIG_IPV6_ROUTER_PREF
660 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
661#endif
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200662 if (strict & RT6_LOOKUP_F_REACHABLE) {
663 int n = rt6_check_neigh(rt);
664 if (n < 0)
665 return n;
666 }
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800667 return m;
668}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
David S. Millerf11e6652007-03-24 20:36:25 -0700670static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200671 int *mpri, struct rt6_info *match,
672 bool *do_rr)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800673{
David S. Millerf11e6652007-03-24 20:36:25 -0700674 int m;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200675 bool match_do_rr = false;
Andy Gospodarek35103d12015-08-13 10:39:01 -0400676 struct inet6_dev *idev = rt->rt6i_idev;
Andy Gospodarek35103d12015-08-13 10:39:01 -0400677
Ido Schimmel8067bb82018-01-07 12:45:09 +0200678 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
679 goto out;
680
Ido Schimmel14c52062018-01-07 12:45:07 +0200681 if (idev->cnf.ignore_routes_with_linkdown &&
682 rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
David Ahernd5d32e42016-10-24 12:27:23 -0700683 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
Andy Gospodarek35103d12015-08-13 10:39:01 -0400684 goto out;
David S. Millerf11e6652007-03-24 20:36:25 -0700685
686 if (rt6_check_expired(rt))
687 goto out;
688
689 m = rt6_score_route(rt, oif, strict);
Jiri Benc7e980562013-12-11 13:48:20 +0100690 if (m == RT6_NUD_FAIL_DO_RR) {
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200691 match_do_rr = true;
692 m = 0; /* lowest valid score */
Jiri Benc7e980562013-12-11 13:48:20 +0100693 } else if (m == RT6_NUD_FAIL_HARD) {
David S. Millerf11e6652007-03-24 20:36:25 -0700694 goto out;
David S. Millerf11e6652007-03-24 20:36:25 -0700695 }
696
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200697 if (strict & RT6_LOOKUP_F_REACHABLE)
698 rt6_probe(rt);
699
Jiri Benc7e980562013-12-11 13:48:20 +0100700 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200701 if (m > *mpri) {
702 *do_rr = match_do_rr;
703 *mpri = m;
704 match = rt;
705 }
David S. Millerf11e6652007-03-24 20:36:25 -0700706out:
707 return match;
708}
709
710static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
Wei Wang8d1040e2017-10-06 12:06:08 -0700711 struct rt6_info *leaf,
David S. Millerf11e6652007-03-24 20:36:25 -0700712 struct rt6_info *rr_head,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200713 u32 metric, int oif, int strict,
714 bool *do_rr)
David S. Millerf11e6652007-03-24 20:36:25 -0700715{
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700716 struct rt6_info *rt, *match, *cont;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800717 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718
David S. Millerf11e6652007-03-24 20:36:25 -0700719 match = NULL;
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700720 cont = NULL;
David Miller071fb372017-11-28 15:40:15 -0500721 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700722 if (rt->rt6i_metric != metric) {
723 cont = rt;
724 break;
725 }
726
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200727 match = find_match(rt, oif, strict, &mpri, match, do_rr);
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700728 }
729
Wei Wang66f5d6c2017-10-06 12:06:10 -0700730 for (rt = leaf; rt && rt != rr_head;
David Miller071fb372017-11-28 15:40:15 -0500731 rt = rcu_dereference(rt->rt6_next)) {
Steffen Klassert9fbdcfa2015-04-28 13:03:04 -0700732 if (rt->rt6i_metric != metric) {
733 cont = rt;
734 break;
735 }
736
737 match = find_match(rt, oif, strict, &mpri, match, do_rr);
738 }
739
740 if (match || !cont)
741 return match;
742
David Miller071fb372017-11-28 15:40:15 -0500743 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200744 match = find_match(rt, oif, strict, &mpri, match, do_rr);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800745
David S. Millerf11e6652007-03-24 20:36:25 -0700746 return match;
747}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800748
Wei Wang8d1040e2017-10-06 12:06:08 -0700749static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
750 int oif, int strict)
David S. Millerf11e6652007-03-24 20:36:25 -0700751{
Wei Wang66f5d6c2017-10-06 12:06:10 -0700752 struct rt6_info *leaf = rcu_dereference(fn->leaf);
David S. Millerf11e6652007-03-24 20:36:25 -0700753 struct rt6_info *match, *rt0;
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200754 bool do_rr = false;
Wei Wang17ecf592017-10-06 12:06:09 -0700755 int key_plen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756
Wei Wang87b1af82017-10-23 14:59:35 -0700757 if (!leaf || leaf == net->ipv6.ip6_null_entry)
Wei Wang8d1040e2017-10-06 12:06:08 -0700758 return net->ipv6.ip6_null_entry;
759
Wei Wang66f5d6c2017-10-06 12:06:10 -0700760 rt0 = rcu_dereference(fn->rr_ptr);
David S. Millerf11e6652007-03-24 20:36:25 -0700761 if (!rt0)
Wei Wang66f5d6c2017-10-06 12:06:10 -0700762 rt0 = leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763
Wei Wang17ecf592017-10-06 12:06:09 -0700764 /* Double check to make sure fn is not an intermediate node
765 * and fn->leaf does not points to its child's leaf
766 * (This might happen if all routes under fn are deleted from
767 * the tree and fib6_repair_tree() is called on the node.)
768 */
769 key_plen = rt0->rt6i_dst.plen;
770#ifdef CONFIG_IPV6_SUBTREES
771 if (rt0->rt6i_src.plen)
772 key_plen = rt0->rt6i_src.plen;
773#endif
774 if (fn->fn_bit != key_plen)
775 return net->ipv6.ip6_null_entry;
776
Wei Wang8d1040e2017-10-06 12:06:08 -0700777 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200778 &do_rr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
Hannes Frederic Sowaafc154e2013-07-11 12:43:42 +0200780 if (do_rr) {
David Miller071fb372017-11-28 15:40:15 -0500781 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
David S. Millerf11e6652007-03-24 20:36:25 -0700782
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800783 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700784 if (!next || next->rt6i_metric != rt0->rt6i_metric)
Wei Wang8d1040e2017-10-06 12:06:08 -0700785 next = leaf;
David S. Millerf11e6652007-03-24 20:36:25 -0700786
Wei Wang66f5d6c2017-10-06 12:06:10 -0700787 if (next != rt0) {
788 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
789 /* make sure next is not being deleted from the tree */
790 if (next->rt6i_node)
791 rcu_assign_pointer(fn->rr_ptr, next);
792 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
793 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 }
795
Eric Dumazeta02cec22010-09-22 20:43:57 +0000796 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797}
798
Martin KaFai Lau8b9df262015-05-22 20:55:59 -0700799static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
800{
801 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
802}
803
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800804#ifdef CONFIG_IPV6_ROUTE_INFO
805int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000806 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800807{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900808 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800809 struct route_info *rinfo = (struct route_info *) opt;
810 struct in6_addr prefix_buf, *prefix;
811 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900812 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800813 struct rt6_info *rt;
814
815 if (len < sizeof(struct route_info)) {
816 return -EINVAL;
817 }
818
819 /* Sanity check for prefix_len and length */
820 if (rinfo->length > 3) {
821 return -EINVAL;
822 } else if (rinfo->prefix_len > 128) {
823 return -EINVAL;
824 } else if (rinfo->prefix_len > 64) {
825 if (rinfo->length < 2) {
826 return -EINVAL;
827 }
828 } else if (rinfo->prefix_len > 0) {
829 if (rinfo->length < 1) {
830 return -EINVAL;
831 }
832 }
833
834 pref = rinfo->route_pref;
835 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000836 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800837
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900838 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800839
840 if (rinfo->length == 3)
841 prefix = (struct in6_addr *)rinfo->prefix;
842 else {
843 /* this function is safe */
844 ipv6_addr_prefix(&prefix_buf,
845 (struct in6_addr *)rinfo->prefix,
846 rinfo->prefix_len);
847 prefix = &prefix_buf;
848 }
849
Duan Jiongf104a562013-11-08 09:56:53 +0800850 if (rinfo->prefix_len == 0)
851 rt = rt6_get_dflt_router(gwaddr, dev);
852 else
853 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
David Ahern830218c2016-10-24 10:52:35 -0700854 gwaddr, dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800855
856 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700857 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800858 rt = NULL;
859 }
860
861 if (!rt && lifetime)
David Ahern830218c2016-10-24 10:52:35 -0700862 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
863 dev, pref);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800864 else if (rt)
865 rt->rt6i_flags = RTF_ROUTEINFO |
866 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
867
868 if (rt) {
Gao feng1716a962012-04-06 00:13:10 +0000869 if (!addrconf_finite_timeout(lifetime))
870 rt6_clean_expires(rt);
871 else
872 rt6_set_expires(rt, jiffies + HZ * lifetime);
873
Amerigo Wang94e187c2012-10-29 00:13:19 +0000874 ip6_rt_put(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800875 }
876 return 0;
877}
878#endif
879
Martin KaFai Laua3c00e42014-10-20 13:42:43 -0700880static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
881 struct in6_addr *saddr)
882{
Wei Wang66f5d6c2017-10-06 12:06:10 -0700883 struct fib6_node *pn, *sn;
Martin KaFai Laua3c00e42014-10-20 13:42:43 -0700884 while (1) {
885 if (fn->fn_flags & RTN_TL_ROOT)
886 return NULL;
Wei Wang66f5d6c2017-10-06 12:06:10 -0700887 pn = rcu_dereference(fn->parent);
888 sn = FIB6_SUBTREE(pn);
889 if (sn && sn != fn)
890 fn = fib6_lookup(sn, NULL, saddr);
Martin KaFai Laua3c00e42014-10-20 13:42:43 -0700891 else
892 fn = pn;
893 if (fn->fn_flags & RTN_RTINFO)
894 return fn;
895 }
896}
Thomas Grafc71099a2006-08-04 23:20:06 -0700897
Wei Wangd3843fe2017-10-06 12:06:06 -0700898static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
899 bool null_fallback)
900{
901 struct rt6_info *rt = *prt;
902
903 if (dst_hold_safe(&rt->dst))
904 return true;
905 if (null_fallback) {
906 rt = net->ipv6.ip6_null_entry;
907 dst_hold(&rt->dst);
908 } else {
909 rt = NULL;
910 }
911 *prt = rt;
912 return false;
913}
914
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800915static struct rt6_info *ip6_pol_route_lookup(struct net *net,
916 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500917 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918{
Wei Wang2b760fc2017-10-06 12:06:03 -0700919 struct rt6_info *rt, *rt_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921
Wei Wang66f5d6c2017-10-06 12:06:10 -0700922 rcu_read_lock();
David S. Miller4c9483b2011-03-12 16:22:43 -0500923 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700924restart:
Wei Wang66f5d6c2017-10-06 12:06:10 -0700925 rt = rcu_dereference(fn->leaf);
926 if (!rt) {
927 rt = net->ipv6.ip6_null_entry;
928 } else {
929 rt = rt6_device_match(net, rt, &fl6->saddr,
930 fl6->flowi6_oif, flags);
931 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
932 rt = rt6_multipath_select(rt, fl6,
933 fl6->flowi6_oif, flags);
934 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -0700935 if (rt == net->ipv6.ip6_null_entry) {
936 fn = fib6_backtrack(fn, &fl6->saddr);
937 if (fn)
938 goto restart;
939 }
Wei Wang2b760fc2017-10-06 12:06:03 -0700940 /* Search through exception table */
941 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
942 if (rt_cache)
943 rt = rt_cache;
944
Wei Wangd3843fe2017-10-06 12:06:06 -0700945 if (ip6_hold_safe(net, &rt, true))
946 dst_use_noref(&rt->dst, jiffies);
947
Wei Wang66f5d6c2017-10-06 12:06:10 -0700948 rcu_read_unlock();
David Ahernb8115802015-11-19 12:24:22 -0800949
Paolo Abenib65f1642017-10-19 09:31:43 +0200950 trace_fib6_table_lookup(net, rt, table, fl6);
David Ahernb8115802015-11-19 12:24:22 -0800951
Thomas Grafc71099a2006-08-04 23:20:06 -0700952 return rt;
953
954}
955
Ian Morris67ba4152014-08-24 21:53:10 +0100956struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
Florian Westphalea6e5742011-09-05 16:05:44 +0200957 int flags)
958{
959 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
960}
961EXPORT_SYMBOL_GPL(ip6_route_lookup);
962
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900963struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
964 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700965{
David S. Miller4c9483b2011-03-12 16:22:43 -0500966 struct flowi6 fl6 = {
967 .flowi6_oif = oif,
968 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700969 };
970 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700971 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700972
Thomas Grafadaa70b2006-10-13 15:01:03 -0700973 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500974 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700975 flags |= RT6_LOOKUP_F_HAS_SADDR;
976 }
977
David S. Miller4c9483b2011-03-12 16:22:43 -0500978 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700979 if (dst->error == 0)
980 return (struct rt6_info *) dst;
981
982 dst_release(dst);
983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 return NULL;
985}
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900986EXPORT_SYMBOL(rt6_lookup);
987
Thomas Grafc71099a2006-08-04 23:20:06 -0700988/* ip6_ins_rt is called with FREE table->tb6_lock.
Wei Wang1cfb71e2017-06-17 10:42:33 -0700989 * It takes new route entry, the addition fails by any reason the
990 * route is released.
991 * Caller must hold dst before calling it.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 */
993
Michal Kubečeke5fd3872014-03-27 13:04:08 +0100994static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
David Ahern333c4302017-05-21 10:12:04 -0600995 struct mx6_config *mxc,
996 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997{
998 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700999 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
Thomas Grafc71099a2006-08-04 23:20:06 -07001001 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07001002 spin_lock_bh(&table->tb6_lock);
David Ahern333c4302017-05-21 10:12:04 -06001003 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
Wei Wang66f5d6c2017-10-06 12:06:10 -07001004 spin_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005
1006 return err;
1007}
1008
Thomas Graf40e22e82006-08-22 00:00:45 -07001009int ip6_ins_rt(struct rt6_info *rt)
1010{
Florian Westphale715b6d2015-01-05 23:57:44 +01001011 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
1012 struct mx6_config mxc = { .mx = NULL, };
1013
Wei Wang1cfb71e2017-06-17 10:42:33 -07001014 /* Hold dst to account for the reference from the fib6 tree */
1015 dst_hold(&rt->dst);
David Ahern333c4302017-05-21 10:12:04 -06001016 return __ip6_ins_rt(rt, &info, &mxc, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -07001017}
1018
David Ahern4832c302017-08-17 12:17:20 -07001019/* called with rcu_lock held */
1020static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1021{
1022 struct net_device *dev = rt->dst.dev;
1023
David Ahern98d11292017-11-21 07:08:57 -08001024 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
David Ahern4832c302017-08-17 12:17:20 -07001025 /* for copies of local routes, dst->dev needs to be the
1026 * device if it is a master device, the master device if
1027 * device is enslaved, and the loopback as the default
1028 */
1029 if (netif_is_l3_slave(dev) &&
1030 !rt6_need_strict(&rt->rt6i_dst.addr))
1031 dev = l3mdev_master_dev_rcu(dev);
1032 else if (!netif_is_l3_master(dev))
1033 dev = dev_net(dev)->loopback_dev;
1034 /* last case is netif_is_l3_master(dev) is true in which
1035 * case we want dev returned to be dev
1036 */
1037 }
1038
1039 return dev;
1040}
1041
Martin KaFai Lau8b9df262015-05-22 20:55:59 -07001042static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1043 const struct in6_addr *daddr,
1044 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045{
David Ahern4832c302017-08-17 12:17:20 -07001046 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 struct rt6_info *rt;
1048
1049 /*
1050 * Clone the route.
1051 */
1052
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001053 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
David Miller3a2232e2017-11-28 15:40:40 -05001054 ort = ort->from;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055
David Ahern4832c302017-08-17 12:17:20 -07001056 rcu_read_lock();
1057 dev = ip6_rt_get_dev_rcu(ort);
1058 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1059 rcu_read_unlock();
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001060 if (!rt)
1061 return NULL;
1062
1063 ip6_rt_copy_init(rt, ort);
1064 rt->rt6i_flags |= RTF_CACHE;
1065 rt->rt6i_metric = 0;
1066 rt->dst.flags |= DST_HOST;
1067 rt->rt6i_dst.addr = *daddr;
1068 rt->rt6i_dst.plen = 128;
1069
1070 if (!rt6_is_gw_or_nonexthop(ort)) {
1071 if (ort->rt6i_dst.plen != 128 &&
1072 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1073 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074#ifdef CONFIG_IPV6_SUBTREES
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001075 if (rt->rt6i_src.plen && saddr) {
1076 rt->rt6i_src.addr = *saddr;
1077 rt->rt6i_src.plen = 128;
Martin KaFai Lau8b9df262015-05-22 20:55:59 -07001078 }
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07001079#endif
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -08001080 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -08001082 return rt;
1083}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001085static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1086{
David Ahern4832c302017-08-17 12:17:20 -07001087 struct net_device *dev;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001088 struct rt6_info *pcpu_rt;
1089
David Ahern4832c302017-08-17 12:17:20 -07001090 rcu_read_lock();
1091 dev = ip6_rt_get_dev_rcu(rt);
1092 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1093 rcu_read_unlock();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001094 if (!pcpu_rt)
1095 return NULL;
1096 ip6_rt_copy_init(pcpu_rt, rt);
1097 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1098 pcpu_rt->rt6i_flags |= RTF_PCPU;
1099 return pcpu_rt;
1100}
1101
Wei Wang66f5d6c2017-10-06 12:06:10 -07001102/* It should be called with rcu_read_lock() acquired */
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001103static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1104{
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001105 struct rt6_info *pcpu_rt, **p;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001106
1107 p = this_cpu_ptr(rt->rt6i_pcpu);
1108 pcpu_rt = *p;
1109
Wei Wangd3843fe2017-10-06 12:06:06 -07001110 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001111 rt6_dst_from_metrics_check(pcpu_rt);
Wei Wangd3843fe2017-10-06 12:06:06 -07001112
Martin KaFai Laua73e4192015-08-14 11:05:53 -07001113 return pcpu_rt;
1114}
1115
1116static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1117{
1118 struct rt6_info *pcpu_rt, *prev, **p;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001119
1120 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1121 if (!pcpu_rt) {
1122 struct net *net = dev_net(rt->dst.dev);
1123
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001124 dst_hold(&net->ipv6.ip6_null_entry->dst);
1125 return net->ipv6.ip6_null_entry;
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001126 }
1127
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001128 dst_hold(&pcpu_rt->dst);
Wei Wanga94b9362017-10-06 12:06:04 -07001129 p = this_cpu_ptr(rt->rt6i_pcpu);
1130 prev = cmpxchg(p, NULL, pcpu_rt);
Eric Dumazet951f7882017-10-08 21:07:18 -07001131 BUG_ON(prev);
Wei Wanga94b9362017-10-06 12:06:04 -07001132
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001133 rt6_dst_from_metrics_check(pcpu_rt);
1134 return pcpu_rt;
1135}
1136
Wei Wang35732d02017-10-06 12:05:57 -07001137/* exception hash table implementation
1138 */
1139static DEFINE_SPINLOCK(rt6_exception_lock);
1140
1141/* Remove rt6_ex from hash table and free the memory
1142 * Caller must hold rt6_exception_lock
1143 */
1144static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1145 struct rt6_exception *rt6_ex)
1146{
Colin Ian Kingb2427e62017-10-10 18:01:16 +01001147 struct net *net;
Wei Wang81eb8442017-10-06 12:06:11 -07001148
Wei Wang35732d02017-10-06 12:05:57 -07001149 if (!bucket || !rt6_ex)
1150 return;
Colin Ian Kingb2427e62017-10-10 18:01:16 +01001151
1152 net = dev_net(rt6_ex->rt6i->dst.dev);
Wei Wang35732d02017-10-06 12:05:57 -07001153 rt6_ex->rt6i->rt6i_node = NULL;
1154 hlist_del_rcu(&rt6_ex->hlist);
1155 rt6_release(rt6_ex->rt6i);
1156 kfree_rcu(rt6_ex, rcu);
1157 WARN_ON_ONCE(!bucket->depth);
1158 bucket->depth--;
Wei Wang81eb8442017-10-06 12:06:11 -07001159 net->ipv6.rt6_stats->fib_rt_cache--;
Wei Wang35732d02017-10-06 12:05:57 -07001160}
1161
1162/* Remove oldest rt6_ex in bucket and free the memory
1163 * Caller must hold rt6_exception_lock
1164 */
1165static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1166{
1167 struct rt6_exception *rt6_ex, *oldest = NULL;
1168
1169 if (!bucket)
1170 return;
1171
1172 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1173 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1174 oldest = rt6_ex;
1175 }
1176 rt6_remove_exception(bucket, oldest);
1177}
1178
1179static u32 rt6_exception_hash(const struct in6_addr *dst,
1180 const struct in6_addr *src)
1181{
1182 static u32 seed __read_mostly;
1183 u32 val;
1184
1185 net_get_random_once(&seed, sizeof(seed));
1186 val = jhash(dst, sizeof(*dst), seed);
1187
1188#ifdef CONFIG_IPV6_SUBTREES
1189 if (src)
1190 val = jhash(src, sizeof(*src), val);
1191#endif
1192 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1193}
1194
1195/* Helper function to find the cached rt in the hash table
1196 * and update bucket pointer to point to the bucket for this
1197 * (daddr, saddr) pair
1198 * Caller must hold rt6_exception_lock
1199 */
1200static struct rt6_exception *
1201__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1202 const struct in6_addr *daddr,
1203 const struct in6_addr *saddr)
1204{
1205 struct rt6_exception *rt6_ex;
1206 u32 hval;
1207
1208 if (!(*bucket) || !daddr)
1209 return NULL;
1210
1211 hval = rt6_exception_hash(daddr, saddr);
1212 *bucket += hval;
1213
1214 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1215 struct rt6_info *rt6 = rt6_ex->rt6i;
1216 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1217
1218#ifdef CONFIG_IPV6_SUBTREES
1219 if (matched && saddr)
1220 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1221#endif
1222 if (matched)
1223 return rt6_ex;
1224 }
1225 return NULL;
1226}
1227
1228/* Helper function to find the cached rt in the hash table
1229 * and update bucket pointer to point to the bucket for this
1230 * (daddr, saddr) pair
1231 * Caller must hold rcu_read_lock()
1232 */
1233static struct rt6_exception *
1234__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1235 const struct in6_addr *daddr,
1236 const struct in6_addr *saddr)
1237{
1238 struct rt6_exception *rt6_ex;
1239 u32 hval;
1240
1241 WARN_ON_ONCE(!rcu_read_lock_held());
1242
1243 if (!(*bucket) || !daddr)
1244 return NULL;
1245
1246 hval = rt6_exception_hash(daddr, saddr);
1247 *bucket += hval;
1248
1249 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1250 struct rt6_info *rt6 = rt6_ex->rt6i;
1251 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1252
1253#ifdef CONFIG_IPV6_SUBTREES
1254 if (matched && saddr)
1255 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1256#endif
1257 if (matched)
1258 return rt6_ex;
1259 }
1260 return NULL;
1261}
1262
1263static int rt6_insert_exception(struct rt6_info *nrt,
1264 struct rt6_info *ort)
1265{
Wei Wang81eb8442017-10-06 12:06:11 -07001266 struct net *net = dev_net(ort->dst.dev);
Wei Wang35732d02017-10-06 12:05:57 -07001267 struct rt6_exception_bucket *bucket;
1268 struct in6_addr *src_key = NULL;
1269 struct rt6_exception *rt6_ex;
1270 int err = 0;
1271
1272 /* ort can't be a cache or pcpu route */
1273 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
David Miller3a2232e2017-11-28 15:40:40 -05001274 ort = ort->from;
Wei Wang35732d02017-10-06 12:05:57 -07001275 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1276
1277 spin_lock_bh(&rt6_exception_lock);
1278
1279 if (ort->exception_bucket_flushed) {
1280 err = -EINVAL;
1281 goto out;
1282 }
1283
1284 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1285 lockdep_is_held(&rt6_exception_lock));
1286 if (!bucket) {
1287 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1288 GFP_ATOMIC);
1289 if (!bucket) {
1290 err = -ENOMEM;
1291 goto out;
1292 }
1293 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1294 }
1295
1296#ifdef CONFIG_IPV6_SUBTREES
1297 /* rt6i_src.plen != 0 indicates ort is in subtree
1298 * and exception table is indexed by a hash of
1299 * both rt6i_dst and rt6i_src.
1300 * Otherwise, the exception table is indexed by
1301 * a hash of only rt6i_dst.
1302 */
1303 if (ort->rt6i_src.plen)
1304 src_key = &nrt->rt6i_src.addr;
1305#endif
Wei Wang60006a42017-10-06 12:05:58 -07001306
1307 /* Update rt6i_prefsrc as it could be changed
1308 * in rt6_remove_prefsrc()
1309 */
1310 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001311 /* rt6_mtu_change() might lower mtu on ort.
1312 * Only insert this exception route if its mtu
1313 * is less than ort's mtu value.
1314 */
1315 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1316 err = -EINVAL;
1317 goto out;
1318 }
Wei Wang60006a42017-10-06 12:05:58 -07001319
Wei Wang35732d02017-10-06 12:05:57 -07001320 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1321 src_key);
1322 if (rt6_ex)
1323 rt6_remove_exception(bucket, rt6_ex);
1324
1325 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1326 if (!rt6_ex) {
1327 err = -ENOMEM;
1328 goto out;
1329 }
1330 rt6_ex->rt6i = nrt;
1331 rt6_ex->stamp = jiffies;
1332 atomic_inc(&nrt->rt6i_ref);
1333 nrt->rt6i_node = ort->rt6i_node;
1334 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1335 bucket->depth++;
Wei Wang81eb8442017-10-06 12:06:11 -07001336 net->ipv6.rt6_stats->fib_rt_cache++;
Wei Wang35732d02017-10-06 12:05:57 -07001337
1338 if (bucket->depth > FIB6_MAX_DEPTH)
1339 rt6_exception_remove_oldest(bucket);
1340
1341out:
1342 spin_unlock_bh(&rt6_exception_lock);
1343
1344 /* Update fn->fn_sernum to invalidate all cached dst */
Paolo Abenib886d5f2017-10-19 16:07:10 +02001345 if (!err) {
Ido Schimmel922c2ac2018-01-07 12:45:14 +02001346 spin_lock_bh(&ort->rt6i_table->tb6_lock);
Wei Wang35732d02017-10-06 12:05:57 -07001347 fib6_update_sernum(ort);
Ido Schimmel922c2ac2018-01-07 12:45:14 +02001348 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
Paolo Abenib886d5f2017-10-19 16:07:10 +02001349 fib6_force_start_gc(net);
1350 }
Wei Wang35732d02017-10-06 12:05:57 -07001351
1352 return err;
1353}
1354
1355void rt6_flush_exceptions(struct rt6_info *rt)
1356{
1357 struct rt6_exception_bucket *bucket;
1358 struct rt6_exception *rt6_ex;
1359 struct hlist_node *tmp;
1360 int i;
1361
1362 spin_lock_bh(&rt6_exception_lock);
1363 /* Prevent rt6_insert_exception() to recreate the bucket list */
1364 rt->exception_bucket_flushed = 1;
1365
1366 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1367 lockdep_is_held(&rt6_exception_lock));
1368 if (!bucket)
1369 goto out;
1370
1371 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1372 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1373 rt6_remove_exception(bucket, rt6_ex);
1374 WARN_ON_ONCE(bucket->depth);
1375 bucket++;
1376 }
1377
1378out:
1379 spin_unlock_bh(&rt6_exception_lock);
1380}
1381
1382/* Find cached rt in the hash table inside passed in rt
1383 * Caller has to hold rcu_read_lock()
1384 */
1385static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1386 struct in6_addr *daddr,
1387 struct in6_addr *saddr)
1388{
1389 struct rt6_exception_bucket *bucket;
1390 struct in6_addr *src_key = NULL;
1391 struct rt6_exception *rt6_ex;
1392 struct rt6_info *res = NULL;
1393
1394 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1395
1396#ifdef CONFIG_IPV6_SUBTREES
1397 /* rt6i_src.plen != 0 indicates rt is in subtree
1398 * and exception table is indexed by a hash of
1399 * both rt6i_dst and rt6i_src.
1400 * Otherwise, the exception table is indexed by
1401 * a hash of only rt6i_dst.
1402 */
1403 if (rt->rt6i_src.plen)
1404 src_key = saddr;
1405#endif
1406 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1407
1408 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1409 res = rt6_ex->rt6i;
1410
1411 return res;
1412}
1413
1414/* Remove the passed in cached rt from the hash table that contains it */
1415int rt6_remove_exception_rt(struct rt6_info *rt)
1416{
Wei Wang35732d02017-10-06 12:05:57 -07001417 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -05001418 struct rt6_info *from = rt->from;
Wei Wang35732d02017-10-06 12:05:57 -07001419 struct in6_addr *src_key = NULL;
1420 struct rt6_exception *rt6_ex;
1421 int err;
1422
1423 if (!from ||
Colin Ian King442d7132017-10-10 19:10:30 +01001424 !(rt->rt6i_flags & RTF_CACHE))
Wei Wang35732d02017-10-06 12:05:57 -07001425 return -EINVAL;
1426
1427 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1428 return -ENOENT;
1429
1430 spin_lock_bh(&rt6_exception_lock);
1431 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1432 lockdep_is_held(&rt6_exception_lock));
1433#ifdef CONFIG_IPV6_SUBTREES
1434 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1435 * and exception table is indexed by a hash of
1436 * both rt6i_dst and rt6i_src.
1437 * Otherwise, the exception table is indexed by
1438 * a hash of only rt6i_dst.
1439 */
1440 if (from->rt6i_src.plen)
1441 src_key = &rt->rt6i_src.addr;
1442#endif
1443 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1444 &rt->rt6i_dst.addr,
1445 src_key);
1446 if (rt6_ex) {
1447 rt6_remove_exception(bucket, rt6_ex);
1448 err = 0;
1449 } else {
1450 err = -ENOENT;
1451 }
1452
1453 spin_unlock_bh(&rt6_exception_lock);
1454 return err;
1455}
1456
1457/* Find rt6_ex which contains the passed in rt cache and
1458 * refresh its stamp
1459 */
1460static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1461{
Wei Wang35732d02017-10-06 12:05:57 -07001462 struct rt6_exception_bucket *bucket;
David Miller3a2232e2017-11-28 15:40:40 -05001463 struct rt6_info *from = rt->from;
Wei Wang35732d02017-10-06 12:05:57 -07001464 struct in6_addr *src_key = NULL;
1465 struct rt6_exception *rt6_ex;
1466
1467 if (!from ||
Colin Ian King442d7132017-10-10 19:10:30 +01001468 !(rt->rt6i_flags & RTF_CACHE))
Wei Wang35732d02017-10-06 12:05:57 -07001469 return;
1470
1471 rcu_read_lock();
1472 bucket = rcu_dereference(from->rt6i_exception_bucket);
1473
1474#ifdef CONFIG_IPV6_SUBTREES
1475 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1476 * and exception table is indexed by a hash of
1477 * both rt6i_dst and rt6i_src.
1478 * Otherwise, the exception table is indexed by
1479 * a hash of only rt6i_dst.
1480 */
1481 if (from->rt6i_src.plen)
1482 src_key = &rt->rt6i_src.addr;
1483#endif
1484 rt6_ex = __rt6_find_exception_rcu(&bucket,
1485 &rt->rt6i_dst.addr,
1486 src_key);
1487 if (rt6_ex)
1488 rt6_ex->stamp = jiffies;
1489
1490 rcu_read_unlock();
1491}
1492
Wei Wang60006a42017-10-06 12:05:58 -07001493static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1494{
1495 struct rt6_exception_bucket *bucket;
1496 struct rt6_exception *rt6_ex;
1497 int i;
1498
1499 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1500 lockdep_is_held(&rt6_exception_lock));
1501
1502 if (bucket) {
1503 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1504 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1505 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1506 }
1507 bucket++;
1508 }
1509 }
1510}
1511
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001512static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1513 struct rt6_info *rt, int mtu)
1514{
1515 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1516 * lowest MTU in the path: always allow updating the route PMTU to
1517 * reflect PMTU decreases.
1518 *
1519 * If the new MTU is higher, and the route PMTU is equal to the local
1520 * MTU, this means the old MTU is the lowest in the path, so allow
1521 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1522 * handle this.
1523 */
1524
1525 if (dst_mtu(&rt->dst) >= mtu)
1526 return true;
1527
1528 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1529 return true;
1530
1531 return false;
1532}
1533
1534static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1535 struct rt6_info *rt, int mtu)
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001536{
1537 struct rt6_exception_bucket *bucket;
1538 struct rt6_exception *rt6_ex;
1539 int i;
1540
1541 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1542 lockdep_is_held(&rt6_exception_lock));
1543
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001544 if (!bucket)
1545 return;
1546
1547 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1548 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1549 struct rt6_info *entry = rt6_ex->rt6i;
1550
1551 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1552 * route), the metrics of its rt->dst.from have already
1553 * been updated.
1554 */
1555 if (entry->rt6i_pmtu &&
1556 rt6_mtu_change_route_allowed(idev, entry, mtu))
1557 entry->rt6i_pmtu = mtu;
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001558 }
Stefano Brivioe9fa1492018-03-06 11:10:19 +01001559 bucket++;
Wei Wangf5bbe7e2017-10-06 12:05:59 -07001560 }
1561}
1562
Wei Wangb16cb452017-10-06 12:06:00 -07001563#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1564
1565static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1566 struct in6_addr *gateway)
1567{
1568 struct rt6_exception_bucket *bucket;
1569 struct rt6_exception *rt6_ex;
1570 struct hlist_node *tmp;
1571 int i;
1572
1573 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1574 return;
1575
1576 spin_lock_bh(&rt6_exception_lock);
1577 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1578 lockdep_is_held(&rt6_exception_lock));
1579
1580 if (bucket) {
1581 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1582 hlist_for_each_entry_safe(rt6_ex, tmp,
1583 &bucket->chain, hlist) {
1584 struct rt6_info *entry = rt6_ex->rt6i;
1585
1586 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1587 RTF_CACHE_GATEWAY &&
1588 ipv6_addr_equal(gateway,
1589 &entry->rt6i_gateway)) {
1590 rt6_remove_exception(bucket, rt6_ex);
1591 }
1592 }
1593 bucket++;
1594 }
1595 }
1596
1597 spin_unlock_bh(&rt6_exception_lock);
1598}
1599
Wei Wangc757faa2017-10-06 12:06:01 -07001600static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1601 struct rt6_exception *rt6_ex,
1602 struct fib6_gc_args *gc_args,
1603 unsigned long now)
1604{
1605 struct rt6_info *rt = rt6_ex->rt6i;
1606
Paolo Abeni1859bac2017-10-19 16:07:11 +02001607 /* we are pruning and obsoleting aged-out and non gateway exceptions
1608 * even if others have still references to them, so that on next
1609 * dst_check() such references can be dropped.
1610 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1611 * expired, independently from their aging, as per RFC 8201 section 4
1612 */
Wei Wang31afeb42018-01-26 11:40:17 -08001613 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1614 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1615 RT6_TRACE("aging clone %p\n", rt);
1616 rt6_remove_exception(bucket, rt6_ex);
1617 return;
1618 }
1619 } else if (time_after(jiffies, rt->dst.expires)) {
1620 RT6_TRACE("purging expired route %p\n", rt);
Wei Wangc757faa2017-10-06 12:06:01 -07001621 rt6_remove_exception(bucket, rt6_ex);
1622 return;
Wei Wang31afeb42018-01-26 11:40:17 -08001623 }
1624
1625 if (rt->rt6i_flags & RTF_GATEWAY) {
Wei Wangc757faa2017-10-06 12:06:01 -07001626 struct neighbour *neigh;
1627 __u8 neigh_flags = 0;
1628
1629 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
1630 if (neigh) {
1631 neigh_flags = neigh->flags;
1632 neigh_release(neigh);
1633 }
1634 if (!(neigh_flags & NTF_ROUTER)) {
1635 RT6_TRACE("purging route %p via non-router but gateway\n",
1636 rt);
1637 rt6_remove_exception(bucket, rt6_ex);
1638 return;
1639 }
1640 }
Wei Wang31afeb42018-01-26 11:40:17 -08001641
Wei Wangc757faa2017-10-06 12:06:01 -07001642 gc_args->more++;
1643}
1644
1645void rt6_age_exceptions(struct rt6_info *rt,
1646 struct fib6_gc_args *gc_args,
1647 unsigned long now)
1648{
1649 struct rt6_exception_bucket *bucket;
1650 struct rt6_exception *rt6_ex;
1651 struct hlist_node *tmp;
1652 int i;
1653
1654 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1655 return;
1656
1657 spin_lock_bh(&rt6_exception_lock);
1658 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1659 lockdep_is_held(&rt6_exception_lock));
1660
1661 if (bucket) {
1662 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1663 hlist_for_each_entry_safe(rt6_ex, tmp,
1664 &bucket->chain, hlist) {
1665 rt6_age_examine_exception(bucket, rt6_ex,
1666 gc_args, now);
1667 }
1668 bucket++;
1669 }
1670 }
1671 spin_unlock_bh(&rt6_exception_lock);
1672}
1673
David Ahern9ff74382016-06-13 13:44:19 -07001674struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1675 int oif, struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676{
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001677 struct fib6_node *fn, *saved_fn;
Wei Wang2b760fc2017-10-06 12:06:03 -07001678 struct rt6_info *rt, *rt_cache;
Thomas Grafc71099a2006-08-04 23:20:06 -07001679 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001681 strict |= flags & RT6_LOOKUP_F_IFACE;
David Ahernd5d32e42016-10-24 12:27:23 -07001682 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001683 if (net->ipv6.devconf_all->forwarding == 0)
1684 strict |= RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685
Wei Wang66f5d6c2017-10-06 12:06:10 -07001686 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687
David S. Miller4c9483b2011-03-12 16:22:43 -05001688 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001689 saved_fn = fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690
David Ahernca254492015-10-12 11:47:10 -07001691 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1692 oif = 0;
1693
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001694redo_rt6_select:
Wei Wang8d1040e2017-10-06 12:06:08 -07001695 rt = rt6_select(net, fn, oif, strict);
Nicolas Dichtel52bd4c02013-06-28 17:35:48 +02001696 if (rt->rt6i_nsiblings)
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001697 rt = rt6_multipath_select(rt, fl6, oif, strict);
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001698 if (rt == net->ipv6.ip6_null_entry) {
1699 fn = fib6_backtrack(fn, &fl6->saddr);
1700 if (fn)
1701 goto redo_rt6_select;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001702 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1703 /* also consider unreachable route */
1704 strict &= ~RT6_LOOKUP_F_REACHABLE;
1705 fn = saved_fn;
1706 goto redo_rt6_select;
Martin KaFai Lau367efcb2014-10-20 13:42:45 -07001707 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07001708 }
1709
Wei Wang2b760fc2017-10-06 12:06:03 -07001710 /*Search through exception table */
1711 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1712 if (rt_cache)
1713 rt = rt_cache;
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -08001714
Wei Wangd3843fe2017-10-06 12:06:06 -07001715 if (rt == net->ipv6.ip6_null_entry) {
Wei Wang66f5d6c2017-10-06 12:06:10 -07001716 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07001717 dst_hold(&rt->dst);
Paolo Abenib65f1642017-10-19 09:31:43 +02001718 trace_fib6_table_lookup(net, rt, table, fl6);
Wei Wangd3843fe2017-10-06 12:06:06 -07001719 return rt;
1720 } else if (rt->rt6i_flags & RTF_CACHE) {
1721 if (ip6_hold_safe(net, &rt, true)) {
1722 dst_use_noref(&rt->dst, jiffies);
1723 rt6_dst_from_metrics_check(rt);
1724 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07001725 rcu_read_unlock();
Paolo Abenib65f1642017-10-19 09:31:43 +02001726 trace_fib6_table_lookup(net, rt, table, fl6);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001727 return rt;
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001728 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1729 !(rt->rt6i_flags & RTF_GATEWAY))) {
1730 /* Create a RTF_CACHE clone which will not be
1731 * owned by the fib6 tree. It is for the special case where
1732 * the daddr in the skb during the neighbor look-up is different
1733 * from the fl6->daddr used to look-up route here.
1734 */
Thomas Grafc71099a2006-08-04 23:20:06 -07001735
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001736 struct rt6_info *uncached_rt;
1737
Wei Wangd3843fe2017-10-06 12:06:06 -07001738 if (ip6_hold_safe(net, &rt, true)) {
1739 dst_use_noref(&rt->dst, jiffies);
1740 } else {
Wei Wang66f5d6c2017-10-06 12:06:10 -07001741 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07001742 uncached_rt = rt;
1743 goto uncached_rt_out;
1744 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07001745 rcu_read_unlock();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001746
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001747 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1748 dst_release(&rt->dst);
1749
Wei Wang1cfb71e2017-06-17 10:42:33 -07001750 if (uncached_rt) {
1751 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1752 * No need for another dst_hold()
1753 */
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07001754 rt6_uncached_list_add(uncached_rt);
Wei Wang81eb8442017-10-06 12:06:11 -07001755 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
Wei Wang1cfb71e2017-06-17 10:42:33 -07001756 } else {
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001757 uncached_rt = net->ipv6.ip6_null_entry;
Wei Wang1cfb71e2017-06-17 10:42:33 -07001758 dst_hold(&uncached_rt->dst);
1759 }
David Ahernb8115802015-11-19 12:24:22 -08001760
Wei Wangd3843fe2017-10-06 12:06:06 -07001761uncached_rt_out:
Paolo Abenib65f1642017-10-19 09:31:43 +02001762 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001763 return uncached_rt;
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001764
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001765 } else {
1766 /* Get a percpu copy */
1767
1768 struct rt6_info *pcpu_rt;
1769
Wei Wangd3843fe2017-10-06 12:06:06 -07001770 dst_use_noref(&rt->dst, jiffies);
Eric Dumazet951f7882017-10-08 21:07:18 -07001771 local_bh_disable();
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001772 pcpu_rt = rt6_get_pcpu_route(rt);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001773
Eric Dumazet951f7882017-10-08 21:07:18 -07001774 if (!pcpu_rt) {
Wei Wanga94b9362017-10-06 12:06:04 -07001775 /* atomic_inc_not_zero() is needed when using rcu */
1776 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
Eric Dumazet951f7882017-10-08 21:07:18 -07001777 /* No dst_hold() on rt is needed because grabbing
Wei Wanga94b9362017-10-06 12:06:04 -07001778 * rt->rt6i_ref makes sure rt can't be released.
1779 */
Wei Wanga94b9362017-10-06 12:06:04 -07001780 pcpu_rt = rt6_make_pcpu_route(rt);
1781 rt6_release(rt);
1782 } else {
1783 /* rt is already removed from tree */
Wei Wanga94b9362017-10-06 12:06:04 -07001784 pcpu_rt = net->ipv6.ip6_null_entry;
1785 dst_hold(&pcpu_rt->dst);
1786 }
Martin KaFai Lau9c7370a2015-08-14 11:05:54 -07001787 }
Eric Dumazet951f7882017-10-08 21:07:18 -07001788 local_bh_enable();
1789 rcu_read_unlock();
Paolo Abenib65f1642017-10-19 09:31:43 +02001790 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
Martin KaFai Laud52d3992015-05-22 20:56:06 -07001791 return pcpu_rt;
1792 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001793}
David Ahern9ff74382016-06-13 13:44:19 -07001794EXPORT_SYMBOL_GPL(ip6_pol_route);
Thomas Grafc71099a2006-08-04 23:20:06 -07001795
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001796static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001797 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -07001798{
David S. Miller4c9483b2011-03-12 16:22:43 -05001799 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -07001800}
1801
Mahesh Bandeward409b842016-09-16 12:59:08 -07001802struct dst_entry *ip6_route_input_lookup(struct net *net,
1803 struct net_device *dev,
1804 struct flowi6 *fl6, int flags)
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001805{
1806 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1807 flags |= RT6_LOOKUP_F_IFACE;
1808
1809 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1810}
Mahesh Bandeward409b842016-09-16 12:59:08 -07001811EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001812
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001813static void ip6_multipath_l3_keys(const struct sk_buff *skb,
1814 struct flow_keys *keys)
1815{
1816 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1817 const struct ipv6hdr *key_iph = outer_iph;
1818 const struct ipv6hdr *inner_iph;
1819 const struct icmp6hdr *icmph;
1820 struct ipv6hdr _inner_iph;
1821
1822 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1823 goto out;
1824
1825 icmph = icmp6_hdr(skb);
1826 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1827 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1828 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1829 icmph->icmp6_type != ICMPV6_PARAMPROB)
1830 goto out;
1831
1832 inner_iph = skb_header_pointer(skb,
1833 skb_transport_offset(skb) + sizeof(*icmph),
1834 sizeof(_inner_iph), &_inner_iph);
1835 if (!inner_iph)
1836 goto out;
1837
1838 key_iph = inner_iph;
1839out:
1840 memset(keys, 0, sizeof(*keys));
1841 keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1842 keys->addrs.v6addrs.src = key_iph->saddr;
1843 keys->addrs.v6addrs.dst = key_iph->daddr;
1844 keys->tags.flow_label = ip6_flowinfo(key_iph);
1845 keys->basic.ip_proto = key_iph->nexthdr;
1846}
1847
1848/* if skb is set it will be used and fl6 can be NULL */
1849u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
1850{
1851 struct flow_keys hash_keys;
1852
1853 if (skb) {
1854 ip6_multipath_l3_keys(skb, &hash_keys);
Ido Schimmel7696c062018-01-09 16:40:26 +02001855 return flow_hash_from_keys(&hash_keys) >> 1;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001856 }
1857
Ido Schimmel7696c062018-01-09 16:40:26 +02001858 return get_hash_from_flowi6(fl6) >> 1;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001859}
1860
Thomas Grafc71099a2006-08-04 23:20:06 -07001861void ip6_route_input(struct sk_buff *skb)
1862{
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001863 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001864 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -07001865 int flags = RT6_LOOKUP_F_HAS_SADDR;
Jiri Benc904af042015-08-20 13:56:31 +02001866 struct ip_tunnel_info *tun_info;
David S. Miller4c9483b2011-03-12 16:22:43 -05001867 struct flowi6 fl6 = {
David Aherne0d56fd2016-09-10 12:09:57 -07001868 .flowi6_iif = skb->dev->ifindex,
David S. Miller4c9483b2011-03-12 16:22:43 -05001869 .daddr = iph->daddr,
1870 .saddr = iph->saddr,
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00001871 .flowlabel = ip6_flowinfo(iph),
David S. Miller4c9483b2011-03-12 16:22:43 -05001872 .flowi6_mark = skb->mark,
1873 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -07001874 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001875
Jiri Benc904af042015-08-20 13:56:31 +02001876 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001877 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Jiri Benc904af042015-08-20 13:56:31 +02001878 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
Jakub Sitnicki23aebda2017-08-23 09:58:29 +02001879 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
1880 fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
Jiri Benc06e9d042015-08-20 13:56:26 +02001881 skb_dst_drop(skb);
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00001882 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
Thomas Grafc71099a2006-08-04 23:20:06 -07001883}
1884
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001885static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001886 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -07001887{
David S. Miller4c9483b2011-03-12 16:22:43 -05001888 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -07001889}
1890
Paolo Abeni6f21c962016-01-29 12:30:19 +01001891struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1892 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -07001893{
David Ahernd46a9d62015-10-21 08:42:22 -07001894 bool any_src;
Thomas Grafc71099a2006-08-04 23:20:06 -07001895
David Ahern4c1feac2016-09-10 12:09:56 -07001896 if (rt6_need_strict(&fl6->daddr)) {
1897 struct dst_entry *dst;
1898
1899 dst = l3mdev_link_scope_lookup(net, fl6);
1900 if (dst)
1901 return dst;
1902 }
David Ahernca254492015-10-12 11:47:10 -07001903
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001904 fl6->flowi6_iif = LOOPBACK_IFINDEX;
David McCullough4dc27d1c2012-06-25 15:42:26 +00001905
David Ahernd46a9d62015-10-21 08:42:22 -07001906 any_src = ipv6_addr_any(&fl6->saddr);
David Ahern741a11d2015-09-28 10:12:13 -07001907 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
David Ahernd46a9d62015-10-21 08:42:22 -07001908 (fl6->flowi6_oif && any_src))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -07001909 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -07001910
David Ahernd46a9d62015-10-21 08:42:22 -07001911 if (!any_src)
Thomas Grafadaa70b2006-10-13 15:01:03 -07001912 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +00001913 else if (sk)
1914 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -07001915
David S. Miller4c9483b2011-03-12 16:22:43 -05001916 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917}
Paolo Abeni6f21c962016-01-29 12:30:19 +01001918EXPORT_SYMBOL_GPL(ip6_route_output_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919
David S. Miller2774c132011-03-01 14:59:04 -08001920struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07001921{
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001922 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
Wei Wang1dbe32522017-06-17 10:42:26 -07001923 struct net_device *loopback_dev = net->loopback_dev;
David S. Miller14e50e52007-05-24 18:17:54 -07001924 struct dst_entry *new = NULL;
1925
Wei Wang1dbe32522017-06-17 10:42:26 -07001926 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
Steffen Klassert62cf27e2017-10-09 08:39:43 +02001927 DST_OBSOLETE_DEAD, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07001928 if (rt) {
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07001929 rt6_info_init(rt);
Wei Wang81eb8442017-10-06 12:06:11 -07001930 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07001931
Changli Gaod8d1f302010-06-10 23:31:35 -07001932 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07001933 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08001934 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05001935 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07001936
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07001937 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -07001938
Wei Wang1dbe32522017-06-17 10:42:26 -07001939 rt->rt6i_idev = in6_dev_get(loopback_dev);
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00001940 rt->rt6i_gateway = ort->rt6i_gateway;
Martin KaFai Lau0a1f5962015-10-15 16:39:58 -07001941 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
David S. Miller14e50e52007-05-24 18:17:54 -07001942 rt->rt6i_metric = 0;
1943
1944 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1945#ifdef CONFIG_IPV6_SUBTREES
1946 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1947#endif
David S. Miller14e50e52007-05-24 18:17:54 -07001948 }
1949
David S. Miller69ead7a2011-03-01 14:45:33 -08001950 dst_release(dst_orig);
1951 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07001952}
David S. Miller14e50e52007-05-24 18:17:54 -07001953
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954/*
1955 * Destination cache support functions
1956 */
1957
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07001958static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1959{
David Miller3a2232e2017-11-28 15:40:40 -05001960 if (rt->from &&
1961 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
1962 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07001963}
1964
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001965static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1966{
Steffen Klassert36143642017-08-25 09:05:42 +02001967 u32 rt_cookie = 0;
Wei Wangc5cff852017-08-21 09:47:10 -07001968
1969 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001970 return NULL;
1971
1972 if (rt6_check_expired(rt))
1973 return NULL;
1974
1975 return &rt->dst;
1976}
1977
1978static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1979{
Martin KaFai Lau5973fb12015-11-11 11:51:07 -08001980 if (!__rt6_check_expired(rt) &&
1981 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
David Miller3a2232e2017-11-28 15:40:40 -05001982 rt6_check(rt->from, cookie))
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07001983 return &rt->dst;
1984 else
1985 return NULL;
1986}
1987
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1989{
1990 struct rt6_info *rt;
1991
1992 rt = (struct rt6_info *) dst;
1993
Nicolas Dichtel6f3118b2012-09-10 22:09:46 +00001994 /* All IPV6 dsts are created with ->obsolete set to the value
1995 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1996 * into this function always.
1997 */
Hannes Frederic Sowae3bc10b2013-10-24 07:48:24 +02001998
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07001999 rt6_dst_from_metrics_check(rt);
2000
Martin KaFai Lau02bcf4e2015-11-11 11:51:08 -08002001 if (rt->rt6i_flags & RTF_PCPU ||
David Miller3a2232e2017-11-28 15:40:40 -05002002 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
Martin KaFai Lau3da59bd2015-05-22 20:56:03 -07002003 return rt6_dst_from_check(rt, cookie);
2004 else
2005 return rt6_check(rt, cookie);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006}
2007
2008static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2009{
2010 struct rt6_info *rt = (struct rt6_info *) dst;
2011
2012 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002013 if (rt->rt6i_flags & RTF_CACHE) {
2014 if (rt6_check_expired(rt)) {
2015 ip6_del_rt(rt);
2016 dst = NULL;
2017 }
2018 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002020 dst = NULL;
2021 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +00002023 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024}
2025
2026static void ip6_link_failure(struct sk_buff *skb)
2027{
2028 struct rt6_info *rt;
2029
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002030 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002031
Eric Dumazetadf30902009-06-02 05:19:30 +00002032 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 if (rt) {
Hannes Frederic Sowa1eb4f752013-07-10 23:00:57 +02002034 if (rt->rt6i_flags & RTF_CACHE) {
Wei Wangad65a2f2017-06-17 10:42:35 -07002035 if (dst_hold_safe(&rt->dst))
2036 ip6_del_rt(rt);
Wei Wangc5cff852017-08-21 09:47:10 -07002037 } else {
2038 struct fib6_node *fn;
2039
2040 rcu_read_lock();
2041 fn = rcu_dereference(rt->rt6i_node);
2042 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2043 fn->fn_sernum = -1;
2044 rcu_read_unlock();
Hannes Frederic Sowa1eb4f752013-07-10 23:00:57 +02002045 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002046 }
2047}
2048
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002049static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2050{
2051 struct net *net = dev_net(rt->dst.dev);
2052
2053 rt->rt6i_flags |= RTF_MODIFIED;
2054 rt->rt6i_pmtu = mtu;
2055 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2056}
2057
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002058static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2059{
2060 return !(rt->rt6i_flags & RTF_CACHE) &&
Wei Wang4e587ea2017-08-25 15:03:10 -07002061 (rt->rt6i_flags & RTF_PCPU ||
2062 rcu_access_pointer(rt->rt6i_node));
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002063}
2064
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002065static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2066 const struct ipv6hdr *iph, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067{
Julian Anastasov0dec8792017-02-06 23:14:16 +02002068 const struct in6_addr *daddr, *saddr;
Ian Morris67ba4152014-08-24 21:53:10 +01002069 struct rt6_info *rt6 = (struct rt6_info *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002071 if (rt6->rt6i_flags & RTF_LOCAL)
2072 return;
2073
Xin Long19bda362016-10-28 18:18:01 +08002074 if (dst_metric_locked(dst, RTAX_MTU))
2075 return;
2076
Julian Anastasov0dec8792017-02-06 23:14:16 +02002077 if (iph) {
2078 daddr = &iph->daddr;
2079 saddr = &iph->saddr;
2080 } else if (sk) {
2081 daddr = &sk->sk_v6_daddr;
2082 saddr = &inet6_sk(sk)->saddr;
2083 } else {
2084 daddr = NULL;
2085 saddr = NULL;
2086 }
2087 dst_confirm_neigh(dst, daddr);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002088 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2089 if (mtu >= dst_mtu(dst))
2090 return;
David S. Miller81aded22012-06-15 14:54:11 -07002091
Martin KaFai Lau0d3f6d22015-11-11 11:51:06 -08002092 if (!rt6_cache_allowed_for_pmtu(rt6)) {
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002093 rt6_do_update_pmtu(rt6, mtu);
Wei Wang2b760fc2017-10-06 12:06:03 -07002094 /* update rt6_ex->stamp for cache */
2095 if (rt6->rt6i_flags & RTF_CACHE)
2096 rt6_update_exception_stamp_rt(rt6);
Julian Anastasov0dec8792017-02-06 23:14:16 +02002097 } else if (daddr) {
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002098 struct rt6_info *nrt6;
Hagen Paul Pfeifer9d289712015-01-15 22:34:25 +01002099
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002100 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2101 if (nrt6) {
2102 rt6_do_update_pmtu(nrt6, mtu);
Wei Wang2b760fc2017-10-06 12:06:03 -07002103 if (rt6_insert_exception(nrt6, rt6))
2104 dst_release_immediate(&nrt6->dst);
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002105 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 }
2107}
2108
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002109static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2110 struct sk_buff *skb, u32 mtu)
2111{
2112 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2113}
2114
David S. Miller42ae66c2012-06-15 20:01:57 -07002115void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002116 int oif, u32 mark, kuid_t uid)
David S. Miller81aded22012-06-15 14:54:11 -07002117{
2118 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2119 struct dst_entry *dst;
2120 struct flowi6 fl6;
2121
2122 memset(&fl6, 0, sizeof(fl6));
2123 fl6.flowi6_oif = oif;
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07002124 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
David S. Miller81aded22012-06-15 14:54:11 -07002125 fl6.daddr = iph->daddr;
2126 fl6.saddr = iph->saddr;
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002127 fl6.flowlabel = ip6_flowinfo(iph);
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002128 fl6.flowi6_uid = uid;
David S. Miller81aded22012-06-15 14:54:11 -07002129
2130 dst = ip6_route_output(net, NULL, &fl6);
2131 if (!dst->error)
Martin KaFai Lau45e4fd22015-05-22 20:56:00 -07002132 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
David S. Miller81aded22012-06-15 14:54:11 -07002133 dst_release(dst);
2134}
2135EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2136
2137void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2138{
Martin KaFai Lau33c162a2016-04-11 15:29:36 -07002139 struct dst_entry *dst;
2140
David S. Miller81aded22012-06-15 14:54:11 -07002141 ip6_update_pmtu(skb, sock_net(sk), mtu,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002142 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
Martin KaFai Lau33c162a2016-04-11 15:29:36 -07002143
2144 dst = __sk_dst_get(sk);
2145 if (!dst || !dst->obsolete ||
2146 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2147 return;
2148
2149 bh_lock_sock(sk);
2150 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2151 ip6_datagram_dst_update(sk, false);
2152 bh_unlock_sock(sk);
David S. Miller81aded22012-06-15 14:54:11 -07002153}
2154EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2155
Duan Jiongb55b76b2013-09-04 19:44:21 +08002156/* Handle redirects */
2157struct ip6rd_flowi {
2158 struct flowi6 fl6;
2159 struct in6_addr gateway;
2160};
2161
2162static struct rt6_info *__ip6_route_redirect(struct net *net,
2163 struct fib6_table *table,
2164 struct flowi6 *fl6,
2165 int flags)
2166{
2167 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
Wei Wang2b760fc2017-10-06 12:06:03 -07002168 struct rt6_info *rt, *rt_cache;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002169 struct fib6_node *fn;
2170
2171 /* Get the "current" route for this destination and
Alexander Alemayhu67c408c2017-01-07 23:53:00 +01002172 * check if the redirect has come from appropriate router.
Duan Jiongb55b76b2013-09-04 19:44:21 +08002173 *
2174 * RFC 4861 specifies that redirects should only be
2175 * accepted if they come from the nexthop to the target.
2176 * Due to the way the routes are chosen, this notion
2177 * is a bit fuzzy and one might need to check all possible
2178 * routes.
2179 */
2180
Wei Wang66f5d6c2017-10-06 12:06:10 -07002181 rcu_read_lock();
Duan Jiongb55b76b2013-09-04 19:44:21 +08002182 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2183restart:
Wei Wang66f5d6c2017-10-06 12:06:10 -07002184 for_each_fib6_node_rt_rcu(fn) {
Ido Schimmel8067bb82018-01-07 12:45:09 +02002185 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
2186 continue;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002187 if (rt6_check_expired(rt))
2188 continue;
2189 if (rt->dst.error)
2190 break;
2191 if (!(rt->rt6i_flags & RTF_GATEWAY))
2192 continue;
2193 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2194 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07002195 /* rt_cache's gateway might be different from its 'parent'
2196 * in the case of an ip redirect.
2197 * So we keep searching in the exception table if the gateway
2198 * is different.
2199 */
2200 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2201 rt_cache = rt6_find_cached_rt(rt,
2202 &fl6->daddr,
2203 &fl6->saddr);
2204 if (rt_cache &&
2205 ipv6_addr_equal(&rdfl->gateway,
2206 &rt_cache->rt6i_gateway)) {
2207 rt = rt_cache;
2208 break;
2209 }
Duan Jiongb55b76b2013-09-04 19:44:21 +08002210 continue;
Wei Wang2b760fc2017-10-06 12:06:03 -07002211 }
Duan Jiongb55b76b2013-09-04 19:44:21 +08002212 break;
2213 }
2214
2215 if (!rt)
2216 rt = net->ipv6.ip6_null_entry;
2217 else if (rt->dst.error) {
2218 rt = net->ipv6.ip6_null_entry;
Martin KaFai Laub0a1ba52015-01-20 19:16:02 -08002219 goto out;
2220 }
2221
2222 if (rt == net->ipv6.ip6_null_entry) {
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07002223 fn = fib6_backtrack(fn, &fl6->saddr);
2224 if (fn)
2225 goto restart;
Duan Jiongb55b76b2013-09-04 19:44:21 +08002226 }
Martin KaFai Laua3c00e42014-10-20 13:42:43 -07002227
Martin KaFai Laub0a1ba52015-01-20 19:16:02 -08002228out:
Wei Wangd3843fe2017-10-06 12:06:06 -07002229 ip6_hold_safe(net, &rt, true);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002230
Wei Wang66f5d6c2017-10-06 12:06:10 -07002231 rcu_read_unlock();
Duan Jiongb55b76b2013-09-04 19:44:21 +08002232
Paolo Abenib65f1642017-10-19 09:31:43 +02002233 trace_fib6_table_lookup(net, rt, table, fl6);
Duan Jiongb55b76b2013-09-04 19:44:21 +08002234 return rt;
2235};
2236
2237static struct dst_entry *ip6_route_redirect(struct net *net,
2238 const struct flowi6 *fl6,
2239 const struct in6_addr *gateway)
2240{
2241 int flags = RT6_LOOKUP_F_HAS_SADDR;
2242 struct ip6rd_flowi rdfl;
2243
2244 rdfl.fl6 = *fl6;
2245 rdfl.gateway = *gateway;
2246
2247 return fib6_rule_lookup(net, &rdfl.fl6,
2248 flags, __ip6_route_redirect);
2249}
2250
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002251void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2252 kuid_t uid)
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002253{
2254 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2255 struct dst_entry *dst;
2256 struct flowi6 fl6;
2257
2258 memset(&fl6, 0, sizeof(fl6));
Julian Anastasove374c612014-04-28 10:51:56 +03002259 fl6.flowi6_iif = LOOPBACK_IFINDEX;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002260 fl6.flowi6_oif = oif;
2261 fl6.flowi6_mark = mark;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002262 fl6.daddr = iph->daddr;
2263 fl6.saddr = iph->saddr;
YOSHIFUJI Hideaki / 吉藤英明6502ca52013-01-13 05:01:51 +00002264 fl6.flowlabel = ip6_flowinfo(iph);
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002265 fl6.flowi6_uid = uid;
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002266
Duan Jiongb55b76b2013-09-04 19:44:21 +08002267 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
2268 rt6_do_redirect(dst, NULL, skb);
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002269 dst_release(dst);
2270}
2271EXPORT_SYMBOL_GPL(ip6_redirect);
2272
Duan Jiongc92a59e2013-08-22 12:07:35 +08002273void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2274 u32 mark)
2275{
2276 const struct ipv6hdr *iph = ipv6_hdr(skb);
2277 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2278 struct dst_entry *dst;
2279 struct flowi6 fl6;
2280
2281 memset(&fl6, 0, sizeof(fl6));
Julian Anastasove374c612014-04-28 10:51:56 +03002282 fl6.flowi6_iif = LOOPBACK_IFINDEX;
Duan Jiongc92a59e2013-08-22 12:07:35 +08002283 fl6.flowi6_oif = oif;
2284 fl6.flowi6_mark = mark;
Duan Jiongc92a59e2013-08-22 12:07:35 +08002285 fl6.daddr = msg->dest;
2286 fl6.saddr = iph->daddr;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002287 fl6.flowi6_uid = sock_net_uid(net, NULL);
Duan Jiongc92a59e2013-08-22 12:07:35 +08002288
Duan Jiongb55b76b2013-09-04 19:44:21 +08002289 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
2290 rt6_do_redirect(dst, NULL, skb);
Duan Jiongc92a59e2013-08-22 12:07:35 +08002291 dst_release(dst);
2292}
2293
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002294void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2295{
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09002296 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2297 sk->sk_uid);
David S. Miller3a5ad2e2012-07-12 00:08:07 -07002298}
2299EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2300
David S. Miller0dbaee32010-12-13 12:52:14 -08002301static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302{
David S. Miller0dbaee32010-12-13 12:52:14 -08002303 struct net_device *dev = dst->dev;
2304 unsigned int mtu = dst_mtu(dst);
2305 struct net *net = dev_net(dev);
2306
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2308
Daniel Lezcano55786892008-03-04 13:47:47 -08002309 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2310 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311
2312 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002313 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2314 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2315 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316 * rely only on pmtu discovery"
2317 */
2318 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2319 mtu = IPV6_MAXPLEN;
2320 return mtu;
2321}
2322
Steffen Klassertebb762f2011-11-23 02:12:51 +00002323static unsigned int ip6_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08002324{
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07002325 const struct rt6_info *rt = (const struct rt6_info *)dst;
2326 unsigned int mtu = rt->rt6i_pmtu;
David S. Millerd33e4552010-12-14 13:01:14 -08002327 struct inet6_dev *idev;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002328
2329 if (mtu)
Eric Dumazet30f78d82014-04-10 21:23:36 -07002330 goto out;
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002331
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07002332 mtu = dst_metric_raw(dst, RTAX_MTU);
2333 if (mtu)
2334 goto out;
2335
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002336 mtu = IPV6_MIN_MTU;
David S. Millerd33e4552010-12-14 13:01:14 -08002337
2338 rcu_read_lock();
2339 idev = __in6_dev_get(dst->dev);
2340 if (idev)
2341 mtu = idev->cnf.mtu6;
2342 rcu_read_unlock();
2343
Eric Dumazet30f78d82014-04-10 21:23:36 -07002344out:
Roopa Prabhu14972cb2016-08-24 20:10:43 -07002345 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2346
2347 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08002348}
2349
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08002350struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
David S. Miller87a11572011-12-06 17:04:13 -05002351 struct flowi6 *fl6)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352{
David S. Miller87a11572011-12-06 17:04:13 -05002353 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354 struct rt6_info *rt;
2355 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002356 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357
David S. Miller38308472011-12-03 18:02:47 -05002358 if (unlikely(!idev))
Eric Dumazet122bdf62012-03-14 21:13:11 +00002359 return ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360
Martin KaFai Lauad706862015-08-14 11:05:52 -07002361 rt = ip6_dst_alloc(net, dev, 0);
David S. Miller38308472011-12-03 18:02:47 -05002362 if (unlikely(!rt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 in6_dev_put(idev);
David S. Miller87a11572011-12-06 17:04:13 -05002364 dst = ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 goto out;
2366 }
2367
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002368 rt->dst.flags |= DST_HOST;
Brendan McGrath588753f2017-12-13 22:14:57 +11002369 rt->dst.input = ip6_input;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002370 rt->dst.output = ip6_output;
Julian Anastasov550bab42013-10-20 15:43:04 +03002371 rt->rt6i_gateway = fl6->daddr;
David S. Miller87a11572011-12-06 17:04:13 -05002372 rt->rt6i_dst.addr = fl6->daddr;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00002373 rt->rt6i_dst.plen = 128;
2374 rt->rt6i_idev = idev;
Li RongQing14edd872012-10-24 14:01:18 +08002375 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376
Ido Schimmel4c981e22018-01-07 12:45:04 +02002377 /* Add this dst into uncached_list so that rt6_disable_ip() can
Wei Wang587fea72017-06-17 10:42:36 -07002378 * do proper release of the net_device
2379 */
2380 rt6_uncached_list_add(rt);
Wei Wang81eb8442017-10-06 12:06:11 -07002381 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002382
David S. Miller87a11572011-12-06 17:04:13 -05002383 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2384
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385out:
David S. Miller87a11572011-12-06 17:04:13 -05002386 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387}
2388
Daniel Lezcano569d3642008-01-18 03:56:57 -08002389static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390{
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002391 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08002392 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2393 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2394 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2395 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2396 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00002397 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398
Eric Dumazetfc66f952010-10-08 06:37:34 +00002399 entries = dst_entries_get_fast(ops);
Michal Kubeček49a18d82013-08-01 10:04:24 +02002400 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00002401 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402 goto out;
2403
Benjamin Thery6891a342008-03-04 13:49:47 -08002404 net->ipv6.ip6_rt_gc_expire++;
Li RongQing14956642014-05-19 17:30:28 +08002405 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002406 entries = dst_entries_get_slow(ops);
2407 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08002408 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08002410 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00002411 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412}
2413
Florian Westphale715b6d2015-01-05 23:57:44 +01002414static int ip6_convert_metrics(struct mx6_config *mxc,
2415 const struct fib6_config *cfg)
2416{
Stephen Hemminger6670e152017-11-14 08:25:49 -08002417 struct net *net = cfg->fc_nlinfo.nl_net;
Daniel Borkmannc3a8d942015-08-31 15:58:47 +02002418 bool ecn_ca = false;
Florian Westphale715b6d2015-01-05 23:57:44 +01002419 struct nlattr *nla;
2420 int remaining;
2421 u32 *mp;
2422
Ian Morris63159f22015-03-29 14:00:04 +01002423 if (!cfg->fc_mx)
Florian Westphale715b6d2015-01-05 23:57:44 +01002424 return 0;
2425
2426 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2427 if (unlikely(!mp))
2428 return -ENOMEM;
2429
2430 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2431 int type = nla_type(nla);
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002432 u32 val;
Florian Westphale715b6d2015-01-05 23:57:44 +01002433
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002434 if (!type)
2435 continue;
2436 if (unlikely(type > RTAX_MAX))
2437 goto err;
Daniel Borkmannea697632015-01-05 23:57:47 +01002438
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002439 if (type == RTAX_CC_ALGO) {
2440 char tmp[TCP_CA_NAME_MAX];
2441
2442 nla_strlcpy(tmp, nla, sizeof(tmp));
Stephen Hemminger6670e152017-11-14 08:25:49 -08002443 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002444 if (val == TCP_CA_UNSPEC)
Florian Westphale715b6d2015-01-05 23:57:44 +01002445 goto err;
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002446 } else {
2447 val = nla_get_u32(nla);
Florian Westphale715b6d2015-01-05 23:57:44 +01002448 }
Paolo Abeni626abd52016-05-13 18:33:41 +02002449 if (type == RTAX_HOPLIMIT && val > 255)
2450 val = 255;
Daniel Borkmannb8d3e412015-08-31 15:58:46 +02002451 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2452 goto err;
Daniel Borkmann1bb14802015-08-31 15:58:45 +02002453
2454 mp[type - 1] = val;
2455 __set_bit(type - 1, mxc->mx_valid);
Florian Westphale715b6d2015-01-05 23:57:44 +01002456 }
2457
Daniel Borkmannc3a8d942015-08-31 15:58:47 +02002458 if (ecn_ca) {
2459 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2460 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2461 }
Florian Westphale715b6d2015-01-05 23:57:44 +01002462
Daniel Borkmannc3a8d942015-08-31 15:58:47 +02002463 mxc->mx = mp;
Florian Westphale715b6d2015-01-05 23:57:44 +01002464 return 0;
2465 err:
2466 kfree(mp);
2467 return -EINVAL;
2468}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469
David Ahern8c145862016-04-24 21:26:04 -07002470static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2471 struct fib6_config *cfg,
David Ahernf4797b32018-01-25 16:55:08 -08002472 const struct in6_addr *gw_addr,
2473 u32 tbid, int flags)
David Ahern8c145862016-04-24 21:26:04 -07002474{
2475 struct flowi6 fl6 = {
2476 .flowi6_oif = cfg->fc_ifindex,
2477 .daddr = *gw_addr,
2478 .saddr = cfg->fc_prefsrc,
2479 };
2480 struct fib6_table *table;
2481 struct rt6_info *rt;
David Ahern8c145862016-04-24 21:26:04 -07002482
David Ahernf4797b32018-01-25 16:55:08 -08002483 table = fib6_get_table(net, tbid);
David Ahern8c145862016-04-24 21:26:04 -07002484 if (!table)
2485 return NULL;
2486
2487 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2488 flags |= RT6_LOOKUP_F_HAS_SADDR;
2489
David Ahernf4797b32018-01-25 16:55:08 -08002490 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
David Ahern8c145862016-04-24 21:26:04 -07002491 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
2492
2493 /* if table lookup failed, fall back to full lookup */
2494 if (rt == net->ipv6.ip6_null_entry) {
2495 ip6_rt_put(rt);
2496 rt = NULL;
2497 }
2498
2499 return rt;
2500}
2501
David Ahernfc1e64e2018-01-25 16:55:09 -08002502static int ip6_route_check_nh_onlink(struct net *net,
2503 struct fib6_config *cfg,
2504 struct net_device *dev,
2505 struct netlink_ext_ack *extack)
2506{
David Ahern44750f82018-02-06 13:17:06 -08002507 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
David Ahernfc1e64e2018-01-25 16:55:09 -08002508 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2509 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2510 struct rt6_info *grt;
2511 int err;
2512
2513 err = 0;
2514 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2515 if (grt) {
David Ahern58e354c2018-02-06 12:14:12 -08002516 if (!grt->dst.error &&
2517 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
David Ahern44750f82018-02-06 13:17:06 -08002518 NL_SET_ERR_MSG(extack,
2519 "Nexthop has invalid gateway or device mismatch");
David Ahernfc1e64e2018-01-25 16:55:09 -08002520 err = -EINVAL;
2521 }
2522
2523 ip6_rt_put(grt);
2524 }
2525
2526 return err;
2527}
2528
David Ahern1edce992018-01-25 16:55:07 -08002529static int ip6_route_check_nh(struct net *net,
2530 struct fib6_config *cfg,
2531 struct net_device **_dev,
2532 struct inet6_dev **idev)
2533{
2534 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2535 struct net_device *dev = _dev ? *_dev : NULL;
2536 struct rt6_info *grt = NULL;
2537 int err = -EHOSTUNREACH;
2538
2539 if (cfg->fc_table) {
David Ahernf4797b32018-01-25 16:55:08 -08002540 int flags = RT6_LOOKUP_F_IFACE;
2541
2542 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2543 cfg->fc_table, flags);
David Ahern1edce992018-01-25 16:55:07 -08002544 if (grt) {
2545 if (grt->rt6i_flags & RTF_GATEWAY ||
2546 (dev && dev != grt->dst.dev)) {
2547 ip6_rt_put(grt);
2548 grt = NULL;
2549 }
2550 }
2551 }
2552
2553 if (!grt)
2554 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
2555
2556 if (!grt)
2557 goto out;
2558
2559 if (dev) {
2560 if (dev != grt->dst.dev) {
2561 ip6_rt_put(grt);
2562 goto out;
2563 }
2564 } else {
2565 *_dev = dev = grt->dst.dev;
2566 *idev = grt->rt6i_idev;
2567 dev_hold(dev);
2568 in6_dev_hold(grt->rt6i_idev);
2569 }
2570
2571 if (!(grt->rt6i_flags & RTF_GATEWAY))
2572 err = 0;
2573
2574 ip6_rt_put(grt);
2575
2576out:
2577 return err;
2578}
2579
David Ahern333c4302017-05-21 10:12:04 -06002580static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2581 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582{
Daniel Lezcano55786892008-03-04 13:47:47 -08002583 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002584 struct rt6_info *rt = NULL;
2585 struct net_device *dev = NULL;
2586 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07002587 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002588 int addr_type;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002589 int err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590
David Ahern557c44b2017-04-19 14:19:43 -07002591 /* RTF_PCPU is an internal flag; can not be set by userspace */
David Ahernd5d531c2017-05-21 10:12:05 -06002592 if (cfg->fc_flags & RTF_PCPU) {
2593 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
David Ahern557c44b2017-04-19 14:19:43 -07002594 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002595 }
David Ahern557c44b2017-04-19 14:19:43 -07002596
Wei Wang2ea23522017-10-27 17:30:12 -07002597 /* RTF_CACHE is an internal flag; can not be set by userspace */
2598 if (cfg->fc_flags & RTF_CACHE) {
2599 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2600 goto out;
2601 }
2602
David Ahernd5d531c2017-05-21 10:12:05 -06002603 if (cfg->fc_dst_len > 128) {
2604 NL_SET_ERR_MSG(extack, "Invalid prefix length");
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002605 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002606 }
2607 if (cfg->fc_src_len > 128) {
2608 NL_SET_ERR_MSG(extack, "Invalid source address length");
2609 goto out;
2610 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611#ifndef CONFIG_IPV6_SUBTREES
David Ahernd5d531c2017-05-21 10:12:05 -06002612 if (cfg->fc_src_len) {
2613 NL_SET_ERR_MSG(extack,
2614 "Specifying source address requires IPV6_SUBTREES to be enabled");
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002615 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002616 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07002618 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002619 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08002620 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002621 if (!dev)
2622 goto out;
2623 idev = in6_dev_get(dev);
2624 if (!idev)
2625 goto out;
2626 }
2627
Thomas Graf86872cb2006-08-22 00:01:08 -07002628 if (cfg->fc_metric == 0)
2629 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630
David Ahernfc1e64e2018-01-25 16:55:09 -08002631 if (cfg->fc_flags & RTNH_F_ONLINK) {
2632 if (!dev) {
2633 NL_SET_ERR_MSG(extack,
2634 "Nexthop device required for onlink");
2635 err = -ENODEV;
2636 goto out;
2637 }
2638
2639 if (!(dev->flags & IFF_UP)) {
2640 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2641 err = -ENETDOWN;
2642 goto out;
2643 }
2644 }
2645
Matti Vaittinend71314b2011-11-14 00:14:49 +00002646 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05002647 if (cfg->fc_nlinfo.nlh &&
2648 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
Matti Vaittinend71314b2011-11-14 00:14:49 +00002649 table = fib6_get_table(net, cfg->fc_table);
David S. Miller38308472011-12-03 18:02:47 -05002650 if (!table) {
Joe Perchesf3213832012-05-15 14:11:53 +00002651 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
Matti Vaittinend71314b2011-11-14 00:14:49 +00002652 table = fib6_new_table(net, cfg->fc_table);
2653 }
2654 } else {
2655 table = fib6_new_table(net, cfg->fc_table);
2656 }
David S. Miller38308472011-12-03 18:02:47 -05002657
2658 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07002659 goto out;
Thomas Grafc71099a2006-08-04 23:20:06 -07002660
Martin KaFai Lauad706862015-08-14 11:05:52 -07002661 rt = ip6_dst_alloc(net, NULL,
2662 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002663
David S. Miller38308472011-12-03 18:02:47 -05002664 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665 err = -ENOMEM;
2666 goto out;
2667 }
2668
Gao feng1716a962012-04-06 00:13:10 +00002669 if (cfg->fc_flags & RTF_EXPIRES)
2670 rt6_set_expires(rt, jiffies +
2671 clock_t_to_jiffies(cfg->fc_expires));
2672 else
2673 rt6_clean_expires(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674
Thomas Graf86872cb2006-08-22 00:01:08 -07002675 if (cfg->fc_protocol == RTPROT_UNSPEC)
2676 cfg->fc_protocol = RTPROT_BOOT;
2677 rt->rt6i_protocol = cfg->fc_protocol;
2678
2679 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002680
2681 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07002682 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002683 else if (cfg->fc_flags & RTF_LOCAL)
2684 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685 else
Changli Gaod8d1f302010-06-10 23:31:35 -07002686 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002687
Changli Gaod8d1f302010-06-10 23:31:35 -07002688 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002689
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002690 if (cfg->fc_encap) {
2691 struct lwtunnel_state *lwtstate;
2692
David Ahern30357d72017-01-30 12:07:37 -08002693 err = lwtunnel_build_state(cfg->fc_encap_type,
Tom Herbert127eb7c2015-08-24 09:45:41 -07002694 cfg->fc_encap, AF_INET6, cfg,
David Ahern9ae28722017-05-27 16:19:28 -06002695 &lwtstate, extack);
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002696 if (err)
2697 goto out;
Jiri Benc61adedf2015-08-20 13:56:25 +02002698 rt->dst.lwtstate = lwtstate_get(lwtstate);
2699 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
2700 rt->dst.lwtstate->orig_output = rt->dst.output;
2701 rt->dst.output = lwtunnel_output;
Tom Herbert25368622015-08-17 13:42:24 -07002702 }
Jiri Benc61adedf2015-08-20 13:56:25 +02002703 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
2704 rt->dst.lwtstate->orig_input = rt->dst.input;
2705 rt->dst.input = lwtunnel_input;
Tom Herbert25368622015-08-17 13:42:24 -07002706 }
Roopa Prabhu19e42e42015-07-21 10:43:48 +02002707 }
2708
Thomas Graf86872cb2006-08-22 00:01:08 -07002709 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2710 rt->rt6i_dst.plen = cfg->fc_dst_len;
Martin KaFai Lauafc4eef2015-04-28 13:03:07 -07002711 if (rt->rt6i_dst.plen == 128)
Michal Kubečeke5fd3872014-03-27 13:04:08 +01002712 rt->dst.flags |= DST_HOST;
Michal Kubečeke5fd3872014-03-27 13:04:08 +01002713
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07002715 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2716 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002717#endif
2718
Thomas Graf86872cb2006-08-22 00:01:08 -07002719 rt->rt6i_metric = cfg->fc_metric;
Ido Schimmel398958a2018-01-09 16:40:28 +02002720 rt->rt6i_nh_weight = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721
2722 /* We cannot add true routes via loopback here,
2723 they would result in kernel looping; promote them to reject routes
2724 */
Thomas Graf86872cb2006-08-22 00:01:08 -07002725 if ((cfg->fc_flags & RTF_REJECT) ||
David S. Miller38308472011-12-03 18:02:47 -05002726 (dev && (dev->flags & IFF_LOOPBACK) &&
2727 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2728 !(cfg->fc_flags & RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08002730 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731 if (dev) {
2732 dev_put(dev);
2733 in6_dev_put(idev);
2734 }
Daniel Lezcano55786892008-03-04 13:47:47 -08002735 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 dev_hold(dev);
2737 idev = in6_dev_get(dev);
2738 if (!idev) {
2739 err = -ENODEV;
2740 goto out;
2741 }
2742 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002743 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002744 switch (cfg->fc_type) {
2745 case RTN_BLACKHOLE:
2746 rt->dst.error = -EINVAL;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002747 rt->dst.output = dst_discard_out;
Kamala R7150aed2013-12-02 19:55:21 +05302748 rt->dst.input = dst_discard;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002749 break;
2750 case RTN_PROHIBIT:
2751 rt->dst.error = -EACCES;
Kamala R7150aed2013-12-02 19:55:21 +05302752 rt->dst.output = ip6_pkt_prohibit_out;
2753 rt->dst.input = ip6_pkt_prohibit;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002754 break;
Nicolas Dichtelb4949ab2012-09-06 05:53:35 +00002755 case RTN_THROW:
Nikola Forró0315e382015-09-17 16:01:32 +02002756 case RTN_UNREACHABLE:
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002757 default:
Kamala R7150aed2013-12-02 19:55:21 +05302758 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
Nikola Forró0315e382015-09-17 16:01:32 +02002759 : (cfg->fc_type == RTN_UNREACHABLE)
2760 ? -EHOSTUNREACH : -ENETUNREACH;
Kamala R7150aed2013-12-02 19:55:21 +05302761 rt->dst.output = ip6_pkt_discard_out;
2762 rt->dst.input = ip6_pkt_discard;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00002763 break;
2764 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002765 goto install_route;
2766 }
2767
Thomas Graf86872cb2006-08-22 00:01:08 -07002768 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002769 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002770 int gwa_type;
2771
Thomas Graf86872cb2006-08-22 00:01:08 -07002772 gw_addr = &cfg->fc_gateway;
Florian Westphal330567b2015-08-07 10:54:28 +02002773 gwa_type = ipv6_addr_type(gw_addr);
Florian Westphal48ed7b22015-05-21 00:25:41 +02002774
2775 /* if gw_addr is local we will fail to detect this in case
2776 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2777 * will return already-added prefix route via interface that
2778 * prefix route was assigned to, which might be non-loopback.
2779 */
2780 err = -EINVAL;
Florian Westphal330567b2015-08-07 10:54:28 +02002781 if (ipv6_chk_addr_and_flags(net, gw_addr,
2782 gwa_type & IPV6_ADDR_LINKLOCAL ?
David Ahernd5d531c2017-05-21 10:12:05 -06002783 dev : NULL, 0, 0)) {
2784 NL_SET_ERR_MSG(extack, "Invalid gateway address");
Florian Westphal48ed7b22015-05-21 00:25:41 +02002785 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002786 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002787 rt->rt6i_gateway = *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002788
2789 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002790 /* IPv6 strictly inhibits using not link-local
2791 addresses as nexthop address.
2792 Otherwise, router will not able to send redirects.
2793 It is very good, but in some (rare!) circumstances
2794 (SIT, PtP, NBMA NOARP links) it is handy to allow
2795 some exceptions. --ANK
Erik Nordmark96d58222016-12-03 20:57:09 -08002796 We allow IPv4-mapped nexthops to support RFC4798-type
2797 addressing
Linus Torvalds1da177e2005-04-16 15:20:36 -07002798 */
Erik Nordmark96d58222016-12-03 20:57:09 -08002799 if (!(gwa_type & (IPV6_ADDR_UNICAST |
David Ahernd5d531c2017-05-21 10:12:05 -06002800 IPV6_ADDR_MAPPED))) {
2801 NL_SET_ERR_MSG(extack,
2802 "Invalid gateway address");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002803 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002804 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002805
David Ahernfc1e64e2018-01-25 16:55:09 -08002806 if (cfg->fc_flags & RTNH_F_ONLINK) {
2807 err = ip6_route_check_nh_onlink(net, cfg, dev,
2808 extack);
2809 } else {
2810 err = ip6_route_check_nh(net, cfg, &dev, &idev);
2811 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002812 if (err)
2813 goto out;
2814 }
2815 err = -EINVAL;
David Ahernd5d531c2017-05-21 10:12:05 -06002816 if (!dev) {
2817 NL_SET_ERR_MSG(extack, "Egress device not specified");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002818 goto out;
David Ahernd5d531c2017-05-21 10:12:05 -06002819 } else if (dev->flags & IFF_LOOPBACK) {
2820 NL_SET_ERR_MSG(extack,
2821 "Egress device can not be loopback device for this route");
2822 goto out;
2823 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002824 }
2825
2826 err = -ENODEV;
David S. Miller38308472011-12-03 18:02:47 -05002827 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002828 goto out;
2829
David Ahern955ec4c2018-01-24 19:45:29 -08002830 if (!(dev->flags & IFF_UP)) {
2831 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2832 err = -ENETDOWN;
2833 goto out;
2834 }
2835
Daniel Walterc3968a82011-04-13 21:10:57 +00002836 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2837 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
David Ahernd5d531c2017-05-21 10:12:05 -06002838 NL_SET_ERR_MSG(extack, "Invalid source address");
Daniel Walterc3968a82011-04-13 21:10:57 +00002839 err = -EINVAL;
2840 goto out;
2841 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00002842 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
Daniel Walterc3968a82011-04-13 21:10:57 +00002843 rt->rt6i_prefsrc.plen = 128;
2844 } else
2845 rt->rt6i_prefsrc.plen = 0;
2846
Thomas Graf86872cb2006-08-22 00:01:08 -07002847 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002848
2849install_route:
Ido Schimmel5609b802018-01-07 12:45:06 +02002850 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2851 !netif_carrier_ok(dev))
2852 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
David Ahernfc1e64e2018-01-25 16:55:09 -08002853 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
Changli Gaod8d1f302010-06-10 23:31:35 -07002854 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002855 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07002856 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08002857
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002858 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08002859
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002860 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002861out:
2862 if (dev)
2863 dev_put(dev);
2864 if (idev)
2865 in6_dev_put(idev);
Wei Wang587fea72017-06-17 10:42:36 -07002866 if (rt)
2867 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002868
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002869 return ERR_PTR(err);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002870}
2871
David Ahern333c4302017-05-21 10:12:04 -06002872int ip6_route_add(struct fib6_config *cfg,
2873 struct netlink_ext_ack *extack)
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002874{
2875 struct mx6_config mxc = { .mx = NULL, };
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002876 struct rt6_info *rt;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002877 int err;
2878
David Ahern333c4302017-05-21 10:12:04 -06002879 rt = ip6_route_info_create(cfg, extack);
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002880 if (IS_ERR(rt)) {
2881 err = PTR_ERR(rt);
2882 rt = NULL;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002883 goto out;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07002884 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002885
2886 err = ip6_convert_metrics(&mxc, cfg);
2887 if (err)
2888 goto out;
2889
David Ahern333c4302017-05-21 10:12:04 -06002890 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002891
2892 kfree(mxc.mx);
2893
2894 return err;
2895out:
Wei Wang587fea72017-06-17 10:42:36 -07002896 if (rt)
2897 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07002898
Linus Torvalds1da177e2005-04-16 15:20:36 -07002899 return err;
2900}
2901
Thomas Graf86872cb2006-08-22 00:01:08 -07002902static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002903{
2904 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07002905 struct fib6_table *table;
David S. Millerd1918542011-12-28 20:19:20 -05002906 struct net *net = dev_net(rt->dst.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002907
Wei Wanga4c2fd72017-06-17 10:42:42 -07002908 if (rt == net->ipv6.ip6_null_entry) {
Gao feng6825a262012-09-19 19:25:34 +00002909 err = -ENOENT;
2910 goto out;
2911 }
Patrick McHardy6c813a72006-08-06 22:22:47 -07002912
Thomas Grafc71099a2006-08-04 23:20:06 -07002913 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07002914 spin_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -07002915 err = fib6_del(rt, info);
Wei Wang66f5d6c2017-10-06 12:06:10 -07002916 spin_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002917
Gao feng6825a262012-09-19 19:25:34 +00002918out:
Amerigo Wang94e187c2012-10-29 00:13:19 +00002919 ip6_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002920 return err;
2921}
2922
Thomas Grafe0a1ad732006-08-22 00:00:21 -07002923int ip6_del_rt(struct rt6_info *rt)
2924{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08002925 struct nl_info info = {
David S. Millerd1918542011-12-28 20:19:20 -05002926 .nl_net = dev_net(rt->dst.dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08002927 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002928 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07002929}
2930
David Ahern0ae81332017-02-02 12:37:08 -08002931static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2932{
2933 struct nl_info *info = &cfg->fc_nlinfo;
WANG Conge3330032017-02-27 16:07:43 -08002934 struct net *net = info->nl_net;
David Ahern16a16cd2017-02-02 12:37:11 -08002935 struct sk_buff *skb = NULL;
David Ahern0ae81332017-02-02 12:37:08 -08002936 struct fib6_table *table;
WANG Conge3330032017-02-27 16:07:43 -08002937 int err = -ENOENT;
David Ahern0ae81332017-02-02 12:37:08 -08002938
WANG Conge3330032017-02-27 16:07:43 -08002939 if (rt == net->ipv6.ip6_null_entry)
2940 goto out_put;
David Ahern0ae81332017-02-02 12:37:08 -08002941 table = rt->rt6i_table;
Wei Wang66f5d6c2017-10-06 12:06:10 -07002942 spin_lock_bh(&table->tb6_lock);
David Ahern0ae81332017-02-02 12:37:08 -08002943
2944 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2945 struct rt6_info *sibling, *next_sibling;
2946
David Ahern16a16cd2017-02-02 12:37:11 -08002947 /* prefer to send a single notification with all hops */
2948 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2949 if (skb) {
2950 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2951
WANG Conge3330032017-02-27 16:07:43 -08002952 if (rt6_fill_node(net, skb, rt,
David Ahern16a16cd2017-02-02 12:37:11 -08002953 NULL, NULL, 0, RTM_DELROUTE,
2954 info->portid, seq, 0) < 0) {
2955 kfree_skb(skb);
2956 skb = NULL;
2957 } else
2958 info->skip_notify = 1;
2959 }
2960
David Ahern0ae81332017-02-02 12:37:08 -08002961 list_for_each_entry_safe(sibling, next_sibling,
2962 &rt->rt6i_siblings,
2963 rt6i_siblings) {
2964 err = fib6_del(sibling, info);
2965 if (err)
WANG Conge3330032017-02-27 16:07:43 -08002966 goto out_unlock;
David Ahern0ae81332017-02-02 12:37:08 -08002967 }
2968 }
2969
2970 err = fib6_del(rt, info);
WANG Conge3330032017-02-27 16:07:43 -08002971out_unlock:
Wei Wang66f5d6c2017-10-06 12:06:10 -07002972 spin_unlock_bh(&table->tb6_lock);
WANG Conge3330032017-02-27 16:07:43 -08002973out_put:
David Ahern0ae81332017-02-02 12:37:08 -08002974 ip6_rt_put(rt);
David Ahern16a16cd2017-02-02 12:37:11 -08002975
2976 if (skb) {
WANG Conge3330032017-02-27 16:07:43 -08002977 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
David Ahern16a16cd2017-02-02 12:37:11 -08002978 info->nlh, gfp_any());
2979 }
David Ahern0ae81332017-02-02 12:37:08 -08002980 return err;
2981}
2982
David Ahern333c4302017-05-21 10:12:04 -06002983static int ip6_route_del(struct fib6_config *cfg,
2984 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002985{
Wei Wang2b760fc2017-10-06 12:06:03 -07002986 struct rt6_info *rt, *rt_cache;
Thomas Grafc71099a2006-08-04 23:20:06 -07002987 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002988 struct fib6_node *fn;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002989 int err = -ESRCH;
2990
Daniel Lezcano55786892008-03-04 13:47:47 -08002991 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
David Ahernd5d531c2017-05-21 10:12:05 -06002992 if (!table) {
2993 NL_SET_ERR_MSG(extack, "FIB table does not exist");
Thomas Grafc71099a2006-08-04 23:20:06 -07002994 return err;
David Ahernd5d531c2017-05-21 10:12:05 -06002995 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002996
Wei Wang66f5d6c2017-10-06 12:06:10 -07002997 rcu_read_lock();
Thomas Grafc71099a2006-08-04 23:20:06 -07002998
2999 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07003000 &cfg->fc_dst, cfg->fc_dst_len,
Wei Wang38fbeee2017-10-06 12:06:02 -07003001 &cfg->fc_src, cfg->fc_src_len,
Wei Wang2b760fc2017-10-06 12:06:03 -07003002 !(cfg->fc_flags & RTF_CACHE));
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09003003
Linus Torvalds1da177e2005-04-16 15:20:36 -07003004 if (fn) {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003005 for_each_fib6_node_rt_rcu(fn) {
Wei Wang2b760fc2017-10-06 12:06:03 -07003006 if (cfg->fc_flags & RTF_CACHE) {
3007 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3008 &cfg->fc_src);
3009 if (!rt_cache)
3010 continue;
3011 rt = rt_cache;
3012 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003013 if (cfg->fc_ifindex &&
David S. Millerd1918542011-12-28 20:19:20 -05003014 (!rt->dst.dev ||
3015 rt->dst.dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003016 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07003017 if (cfg->fc_flags & RTF_GATEWAY &&
3018 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003019 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07003020 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003021 continue;
Mantas Mc2ed1882016-12-16 10:30:59 +02003022 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3023 continue;
Wei Wangd3843fe2017-10-06 12:06:06 -07003024 if (!dst_hold_safe(&rt->dst))
3025 break;
Wei Wang66f5d6c2017-10-06 12:06:10 -07003026 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003027
David Ahern0ae81332017-02-02 12:37:08 -08003028 /* if gateway was specified only delete the one hop */
3029 if (cfg->fc_flags & RTF_GATEWAY)
3030 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3031
3032 return __ip6_del_rt_siblings(rt, cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003033 }
3034 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07003035 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003036
3037 return err;
3038}
3039
David S. Miller6700c272012-07-17 03:29:28 -07003040static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07003041{
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07003042 struct netevent_redirect netevent;
David S. Millere8599ff2012-07-11 23:43:53 -07003043 struct rt6_info *rt, *nrt = NULL;
David S. Millere8599ff2012-07-11 23:43:53 -07003044 struct ndisc_options ndopts;
3045 struct inet6_dev *in6_dev;
3046 struct neighbour *neigh;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003047 struct rd_msg *msg;
David S. Miller6e157b62012-07-12 00:05:02 -07003048 int optlen, on_link;
3049 u8 *lladdr;
David S. Millere8599ff2012-07-11 23:43:53 -07003050
Simon Horman29a3cad2013-05-28 20:34:26 +00003051 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003052 optlen -= sizeof(*msg);
David S. Millere8599ff2012-07-11 23:43:53 -07003053
3054 if (optlen < 0) {
David S. Miller6e157b62012-07-12 00:05:02 -07003055 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003056 return;
3057 }
3058
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003059 msg = (struct rd_msg *)icmp6_hdr(skb);
David S. Millere8599ff2012-07-11 23:43:53 -07003060
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003061 if (ipv6_addr_is_multicast(&msg->dest)) {
David S. Miller6e157b62012-07-12 00:05:02 -07003062 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003063 return;
3064 }
3065
David S. Miller6e157b62012-07-12 00:05:02 -07003066 on_link = 0;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003067 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
David S. Millere8599ff2012-07-11 23:43:53 -07003068 on_link = 1;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003069 } else if (ipv6_addr_type(&msg->target) !=
David S. Millere8599ff2012-07-11 23:43:53 -07003070 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
David S. Miller6e157b62012-07-12 00:05:02 -07003071 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
David S. Millere8599ff2012-07-11 23:43:53 -07003072 return;
3073 }
3074
3075 in6_dev = __in6_dev_get(skb->dev);
3076 if (!in6_dev)
3077 return;
3078 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3079 return;
3080
3081 /* RFC2461 8.1:
3082 * The IP source address of the Redirect MUST be the same as the current
3083 * first-hop router for the specified ICMP Destination Address.
3084 */
3085
Alexander Aringf997c552016-06-15 21:20:23 +02003086 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
David S. Millere8599ff2012-07-11 23:43:53 -07003087 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3088 return;
3089 }
David S. Miller6e157b62012-07-12 00:05:02 -07003090
3091 lladdr = NULL;
David S. Millere8599ff2012-07-11 23:43:53 -07003092 if (ndopts.nd_opts_tgt_lladdr) {
3093 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3094 skb->dev);
3095 if (!lladdr) {
3096 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3097 return;
3098 }
3099 }
3100
David S. Miller6e157b62012-07-12 00:05:02 -07003101 rt = (struct rt6_info *) dst;
Matthias Schifferec13ad12015-11-02 01:24:38 +01003102 if (rt->rt6i_flags & RTF_REJECT) {
David S. Miller6e157b62012-07-12 00:05:02 -07003103 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
3104 return;
3105 }
3106
3107 /* Redirect received -> path was valid.
3108 * Look, redirects are sent only in response to data packets,
3109 * so that this nexthop apparently is reachable. --ANK
3110 */
Julian Anastasov0dec8792017-02-06 23:14:16 +02003111 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
David S. Miller6e157b62012-07-12 00:05:02 -07003112
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003113 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
David S. Millere8599ff2012-07-11 23:43:53 -07003114 if (!neigh)
3115 return;
3116
Linus Torvalds1da177e2005-04-16 15:20:36 -07003117 /*
3118 * We have finally decided to accept it.
3119 */
3120
Alexander Aringf997c552016-06-15 21:20:23 +02003121 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07003122 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3123 NEIGH_UPDATE_F_OVERRIDE|
3124 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
Alexander Aringf997c552016-06-15 21:20:23 +02003125 NEIGH_UPDATE_F_ISROUTER)),
3126 NDISC_REDIRECT, &ndopts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003127
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003128 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
David S. Miller38308472011-12-03 18:02:47 -05003129 if (!nrt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003130 goto out;
3131
3132 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3133 if (on_link)
3134 nrt->rt6i_flags &= ~RTF_GATEWAY;
3135
Xin Longb91d5322017-08-03 14:13:46 +08003136 nrt->rt6i_protocol = RTPROT_REDIRECT;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003137 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003138
Wei Wang2b760fc2017-10-06 12:06:03 -07003139 /* No need to remove rt from the exception table if rt is
3140 * a cached route because rt6_insert_exception() will
3141 * takes care of it
3142 */
3143 if (rt6_insert_exception(nrt, rt)) {
3144 dst_release_immediate(&nrt->dst);
3145 goto out;
3146 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003147
Changli Gaod8d1f302010-06-10 23:31:35 -07003148 netevent.old = &rt->dst;
3149 netevent.new = &nrt->dst;
YOSHIFUJI Hideaki / 吉藤英明71bcdba2013-01-05 16:34:51 +00003150 netevent.daddr = &msg->dest;
YOSHIFUJI Hideaki / 吉藤英明60592832013-01-14 09:28:27 +00003151 netevent.neigh = neigh;
Tom Tucker8d717402006-07-30 20:43:36 -07003152 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3153
Linus Torvalds1da177e2005-04-16 15:20:36 -07003154out:
David S. Millere8599ff2012-07-11 23:43:53 -07003155 neigh_release(neigh);
David S. Miller6e157b62012-07-12 00:05:02 -07003156}
3157
Linus Torvalds1da177e2005-04-16 15:20:36 -07003158/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07003159 * Misc support functions
3160 */
3161
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07003162static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3163{
David Miller3a2232e2017-11-28 15:40:40 -05003164 BUG_ON(from->from);
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07003165
3166 rt->rt6i_flags &= ~RTF_EXPIRES;
3167 dst_hold(&from->dst);
David Miller3a2232e2017-11-28 15:40:40 -05003168 rt->from = from;
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07003169 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3170}
3171
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003172static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003173{
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003174 rt->dst.input = ort->dst.input;
3175 rt->dst.output = ort->dst.output;
3176 rt->rt6i_dst = ort->rt6i_dst;
3177 rt->dst.error = ort->dst.error;
3178 rt->rt6i_idev = ort->rt6i_idev;
3179 if (rt->rt6i_idev)
3180 in6_dev_hold(rt->rt6i_idev);
3181 rt->dst.lastuse = jiffies;
3182 rt->rt6i_gateway = ort->rt6i_gateway;
3183 rt->rt6i_flags = ort->rt6i_flags;
3184 rt6_set_from(rt, ort);
3185 rt->rt6i_metric = ort->rt6i_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003186#ifdef CONFIG_IPV6_SUBTREES
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003187 rt->rt6i_src = ort->rt6i_src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003188#endif
Martin KaFai Lau83a09ab2015-05-22 20:56:05 -07003189 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3190 rt->rt6i_table = ort->rt6i_table;
Jiri Benc61adedf2015-08-20 13:56:25 +02003191 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003192}
3193
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003194#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003195static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003196 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -07003197 const struct in6_addr *gwaddr,
3198 struct net_device *dev)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003199{
David Ahern830218c2016-10-24 10:52:35 -07003200 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3201 int ifindex = dev->ifindex;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003202 struct fib6_node *fn;
3203 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07003204 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003205
David Ahern830218c2016-10-24 10:52:35 -07003206 table = fib6_get_table(net, tb_id);
David S. Miller38308472011-12-03 18:02:47 -05003207 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07003208 return NULL;
3209
Wei Wang66f5d6c2017-10-06 12:06:10 -07003210 rcu_read_lock();
Wei Wang38fbeee2017-10-06 12:06:02 -07003211 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003212 if (!fn)
3213 goto out;
3214
Wei Wang66f5d6c2017-10-06 12:06:10 -07003215 for_each_fib6_node_rt_rcu(fn) {
David S. Millerd1918542011-12-28 20:19:20 -05003216 if (rt->dst.dev->ifindex != ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003217 continue;
3218 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3219 continue;
3220 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3221 continue;
Wei Wangd3843fe2017-10-06 12:06:06 -07003222 ip6_hold_safe(NULL, &rt, false);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003223 break;
3224 }
3225out:
Wei Wang66f5d6c2017-10-06 12:06:10 -07003226 rcu_read_unlock();
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003227 return rt;
3228}
3229
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003230static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003231 const struct in6_addr *prefix, int prefixlen,
David Ahern830218c2016-10-24 10:52:35 -07003232 const struct in6_addr *gwaddr,
3233 struct net_device *dev,
Eric Dumazet95c96172012-04-15 05:58:06 +00003234 unsigned int pref)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003235{
Thomas Graf86872cb2006-08-22 00:01:08 -07003236 struct fib6_config cfg = {
Rami Rosen238fc7e2008-02-09 23:43:11 -08003237 .fc_metric = IP6_RT_PRIO_USER,
David Ahern830218c2016-10-24 10:52:35 -07003238 .fc_ifindex = dev->ifindex,
Thomas Graf86872cb2006-08-22 00:01:08 -07003239 .fc_dst_len = prefixlen,
3240 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3241 RTF_UP | RTF_PREF(pref),
Xin Longb91d5322017-08-03 14:13:46 +08003242 .fc_protocol = RTPROT_RA,
Eric W. Biederman15e47302012-09-07 20:12:54 +00003243 .fc_nlinfo.portid = 0,
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08003244 .fc_nlinfo.nlh = NULL,
3245 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07003246 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003247
David Ahern830218c2016-10-24 10:52:35 -07003248 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003249 cfg.fc_dst = *prefix;
3250 cfg.fc_gateway = *gwaddr;
Thomas Graf86872cb2006-08-22 00:01:08 -07003251
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08003252 /* We should treat it as a default route if prefix length is 0. */
3253 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07003254 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003255
David Ahern333c4302017-05-21 10:12:04 -06003256 ip6_route_add(&cfg, NULL);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003257
David Ahern830218c2016-10-24 10:52:35 -07003258 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08003259}
3260#endif
3261
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003262struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09003263{
David Ahern830218c2016-10-24 10:52:35 -07003264 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003265 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07003266 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003267
David Ahern830218c2016-10-24 10:52:35 -07003268 table = fib6_get_table(dev_net(dev), tb_id);
David S. Miller38308472011-12-03 18:02:47 -05003269 if (!table)
Thomas Grafc71099a2006-08-04 23:20:06 -07003270 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003271
Wei Wang66f5d6c2017-10-06 12:06:10 -07003272 rcu_read_lock();
3273 for_each_fib6_node_rt_rcu(&table->tb6_root) {
David S. Millerd1918542011-12-28 20:19:20 -05003274 if (dev == rt->dst.dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08003275 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07003276 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3277 break;
3278 }
3279 if (rt)
Wei Wangd3843fe2017-10-06 12:06:06 -07003280 ip6_hold_safe(NULL, &rt, false);
Wei Wang66f5d6c2017-10-06 12:06:10 -07003281 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003282 return rt;
3283}
3284
Eric Dumazetb71d1d42011-04-22 04:53:02 +00003285struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08003286 struct net_device *dev,
3287 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003288{
Thomas Graf86872cb2006-08-22 00:01:08 -07003289 struct fib6_config cfg = {
David Ahernca254492015-10-12 11:47:10 -07003290 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08003291 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07003292 .fc_ifindex = dev->ifindex,
3293 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3294 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Xin Longb91d5322017-08-03 14:13:46 +08003295 .fc_protocol = RTPROT_RA,
Eric W. Biederman15e47302012-09-07 20:12:54 +00003296 .fc_nlinfo.portid = 0,
Daniel Lezcano55786892008-03-04 13:47:47 -08003297 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003298 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07003299 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07003300
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003301 cfg.fc_gateway = *gwaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003302
David Ahern333c4302017-05-21 10:12:04 -06003303 if (!ip6_route_add(&cfg, NULL)) {
David Ahern830218c2016-10-24 10:52:35 -07003304 struct fib6_table *table;
3305
3306 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3307 if (table)
3308 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3309 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003310
Linus Torvalds1da177e2005-04-16 15:20:36 -07003311 return rt6_get_dflt_router(gwaddr, dev);
3312}
3313
David Ahern830218c2016-10-24 10:52:35 -07003314static void __rt6_purge_dflt_routers(struct fib6_table *table)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003315{
3316 struct rt6_info *rt;
3317
3318restart:
Wei Wang66f5d6c2017-10-06 12:06:10 -07003319 rcu_read_lock();
3320 for_each_fib6_node_rt_rcu(&table->tb6_root) {
Lorenzo Colitti3e8b0ac2013-03-03 20:46:46 +00003321 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3322 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
Wei Wangd3843fe2017-10-06 12:06:06 -07003323 if (dst_hold_safe(&rt->dst)) {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003324 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07003325 ip6_del_rt(rt);
3326 } else {
Wei Wang66f5d6c2017-10-06 12:06:10 -07003327 rcu_read_unlock();
Wei Wangd3843fe2017-10-06 12:06:06 -07003328 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003329 goto restart;
3330 }
3331 }
Wei Wang66f5d6c2017-10-06 12:06:10 -07003332 rcu_read_unlock();
David Ahern830218c2016-10-24 10:52:35 -07003333
3334 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3335}
3336
3337void rt6_purge_dflt_routers(struct net *net)
3338{
3339 struct fib6_table *table;
3340 struct hlist_head *head;
3341 unsigned int h;
3342
3343 rcu_read_lock();
3344
3345 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3346 head = &net->ipv6.fib_table_hash[h];
3347 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3348 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3349 __rt6_purge_dflt_routers(table);
3350 }
3351 }
3352
3353 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003354}
3355
Daniel Lezcano55786892008-03-04 13:47:47 -08003356static void rtmsg_to_fib6_config(struct net *net,
3357 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07003358 struct fib6_config *cfg)
3359{
3360 memset(cfg, 0, sizeof(*cfg));
3361
David Ahernca254492015-10-12 11:47:10 -07003362 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3363 : RT6_TABLE_MAIN;
Thomas Graf86872cb2006-08-22 00:01:08 -07003364 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3365 cfg->fc_metric = rtmsg->rtmsg_metric;
3366 cfg->fc_expires = rtmsg->rtmsg_info;
3367 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3368 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3369 cfg->fc_flags = rtmsg->rtmsg_flags;
3370
Daniel Lezcano55786892008-03-04 13:47:47 -08003371 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08003372
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003373 cfg->fc_dst = rtmsg->rtmsg_dst;
3374 cfg->fc_src = rtmsg->rtmsg_src;
3375 cfg->fc_gateway = rtmsg->rtmsg_gateway;
Thomas Graf86872cb2006-08-22 00:01:08 -07003376}
3377
Daniel Lezcano55786892008-03-04 13:47:47 -08003378int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003379{
Thomas Graf86872cb2006-08-22 00:01:08 -07003380 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003381 struct in6_rtmsg rtmsg;
3382 int err;
3383
Ian Morris67ba4152014-08-24 21:53:10 +01003384 switch (cmd) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003385 case SIOCADDRT: /* Add a route */
3386 case SIOCDELRT: /* Delete a route */
Eric W. Biedermanaf31f412012-11-16 03:03:06 +00003387 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003388 return -EPERM;
3389 err = copy_from_user(&rtmsg, arg,
3390 sizeof(struct in6_rtmsg));
3391 if (err)
3392 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07003393
Daniel Lezcano55786892008-03-04 13:47:47 -08003394 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07003395
Linus Torvalds1da177e2005-04-16 15:20:36 -07003396 rtnl_lock();
3397 switch (cmd) {
3398 case SIOCADDRT:
David Ahern333c4302017-05-21 10:12:04 -06003399 err = ip6_route_add(&cfg, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003400 break;
3401 case SIOCDELRT:
David Ahern333c4302017-05-21 10:12:04 -06003402 err = ip6_route_del(&cfg, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003403 break;
3404 default:
3405 err = -EINVAL;
3406 }
3407 rtnl_unlock();
3408
3409 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07003410 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003411
3412 return -EINVAL;
3413}
3414
3415/*
3416 * Drop the packet on the floor
3417 */
3418
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07003419static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003420{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003421 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00003422 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003423 switch (ipstats_mib_noroutes) {
3424 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07003425 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00003426 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07003427 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3428 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003429 break;
3430 }
3431 /* FALLTHROUGH */
3432 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07003433 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3434 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003435 break;
3436 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00003437 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003438 kfree_skb(skb);
3439 return 0;
3440}
3441
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003442static int ip6_pkt_discard(struct sk_buff *skb)
3443{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003444 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003445}
3446
Eric W. Biedermanede20592015-10-07 16:48:47 -05003447static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003448{
Eric Dumazetadf30902009-06-02 05:19:30 +00003449 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003450 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003451}
3452
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003453static int ip6_pkt_prohibit(struct sk_buff *skb)
3454{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003455 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003456}
3457
Eric W. Biedermanede20592015-10-07 16:48:47 -05003458static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003459{
Eric Dumazetadf30902009-06-02 05:19:30 +00003460 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07003461 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07003462}
3463
Linus Torvalds1da177e2005-04-16 15:20:36 -07003464/*
3465 * Allocate a dst for local (unicast / anycast) address.
3466 */
3467
3468struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3469 const struct in6_addr *addr,
David S. Miller8f031512011-12-06 16:48:14 -05003470 bool anycast)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003471{
David Ahernca254492015-10-12 11:47:10 -07003472 u32 tb_id;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003473 struct net *net = dev_net(idev->dev);
David Ahern4832c302017-08-17 12:17:20 -07003474 struct net_device *dev = idev->dev;
David Ahern5f02ce242016-09-10 12:09:54 -07003475 struct rt6_info *rt;
3476
David Ahern5f02ce242016-09-10 12:09:54 -07003477 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
Hannes Frederic Sowaa3300ef2013-12-07 03:33:45 +01003478 if (!rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003479 return ERR_PTR(-ENOMEM);
3480
Linus Torvalds1da177e2005-04-16 15:20:36 -07003481 in6_dev_hold(idev);
3482
David S. Miller11d53b42011-06-24 15:23:34 -07003483 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07003484 rt->dst.input = ip6_input;
3485 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003486 rt->rt6i_idev = idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003487
David Ahern94b5e0f2017-02-02 08:52:21 -08003488 rt->rt6i_protocol = RTPROT_KERNEL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003489 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09003490 if (anycast)
3491 rt->rt6i_flags |= RTF_ANYCAST;
3492 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07003493 rt->rt6i_flags |= RTF_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003494
Julian Anastasov550bab42013-10-20 15:43:04 +03003495 rt->rt6i_gateway = *addr;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00003496 rt->rt6i_dst.addr = *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003497 rt->rt6i_dst.plen = 128;
David Ahernca254492015-10-12 11:47:10 -07003498 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3499 rt->rt6i_table = fib6_get_table(net, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003500
Linus Torvalds1da177e2005-04-16 15:20:36 -07003501 return rt;
3502}
3503
Daniel Walterc3968a82011-04-13 21:10:57 +00003504/* remove deleted ip from prefsrc entries */
3505struct arg_dev_net_ip {
3506 struct net_device *dev;
3507 struct net *net;
3508 struct in6_addr *addr;
3509};
3510
3511static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3512{
3513 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3514 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3515 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3516
David S. Millerd1918542011-12-28 20:19:20 -05003517 if (((void *)rt->dst.dev == dev || !dev) &&
Daniel Walterc3968a82011-04-13 21:10:57 +00003518 rt != net->ipv6.ip6_null_entry &&
3519 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
Wei Wang60006a42017-10-06 12:05:58 -07003520 spin_lock_bh(&rt6_exception_lock);
Daniel Walterc3968a82011-04-13 21:10:57 +00003521 /* remove prefsrc entry */
3522 rt->rt6i_prefsrc.plen = 0;
Wei Wang60006a42017-10-06 12:05:58 -07003523 /* need to update cache as well */
3524 rt6_exceptions_remove_prefsrc(rt);
3525 spin_unlock_bh(&rt6_exception_lock);
Daniel Walterc3968a82011-04-13 21:10:57 +00003526 }
3527 return 0;
3528}
3529
3530void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3531{
3532 struct net *net = dev_net(ifp->idev->dev);
3533 struct arg_dev_net_ip adni = {
3534 .dev = ifp->idev->dev,
3535 .net = net,
3536 .addr = &ifp->addr,
3537 };
Li RongQing0c3584d2013-12-27 16:32:38 +08003538 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
Daniel Walterc3968a82011-04-13 21:10:57 +00003539}
3540
Duan Jiongbe7a0102014-05-15 15:56:14 +08003541#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
Duan Jiongbe7a0102014-05-15 15:56:14 +08003542
3543/* Remove routers and update dst entries when gateway turn into host. */
3544static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3545{
3546 struct in6_addr *gateway = (struct in6_addr *)arg;
3547
Wei Wang2b760fc2017-10-06 12:06:03 -07003548 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3549 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
Duan Jiongbe7a0102014-05-15 15:56:14 +08003550 return -1;
3551 }
Wei Wangb16cb452017-10-06 12:06:00 -07003552
3553 /* Further clean up cached routes in exception table.
3554 * This is needed because cached route may have a different
3555 * gateway than its 'parent' in the case of an ip redirect.
3556 */
3557 rt6_exceptions_clean_tohost(rt, gateway);
3558
Duan Jiongbe7a0102014-05-15 15:56:14 +08003559 return 0;
3560}
3561
3562void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3563{
3564 fib6_clean_all(net, fib6_clean_tohost, gateway);
3565}
3566
Ido Schimmel2127d952018-01-07 12:45:03 +02003567struct arg_netdev_event {
3568 const struct net_device *dev;
Ido Schimmel4c981e22018-01-07 12:45:04 +02003569 union {
3570 unsigned int nh_flags;
3571 unsigned long event;
3572 };
Ido Schimmel2127d952018-01-07 12:45:03 +02003573};
3574
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003575static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3576{
3577 struct rt6_info *iter;
3578 struct fib6_node *fn;
3579
3580 fn = rcu_dereference_protected(rt->rt6i_node,
3581 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3582 iter = rcu_dereference_protected(fn->leaf,
3583 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3584 while (iter) {
3585 if (iter->rt6i_metric == rt->rt6i_metric &&
3586 rt6_qualify_for_ecmp(iter))
3587 return iter;
3588 iter = rcu_dereference_protected(iter->rt6_next,
3589 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3590 }
3591
3592 return NULL;
3593}
3594
3595static bool rt6_is_dead(const struct rt6_info *rt)
3596{
3597 if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3598 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3599 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3600 return true;
3601
3602 return false;
3603}
3604
3605static int rt6_multipath_total_weight(const struct rt6_info *rt)
3606{
3607 struct rt6_info *iter;
3608 int total = 0;
3609
3610 if (!rt6_is_dead(rt))
Ido Schimmel398958a2018-01-09 16:40:28 +02003611 total += rt->rt6i_nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003612
3613 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3614 if (!rt6_is_dead(iter))
Ido Schimmel398958a2018-01-09 16:40:28 +02003615 total += iter->rt6i_nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003616 }
3617
3618 return total;
3619}
3620
3621static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3622{
3623 int upper_bound = -1;
3624
3625 if (!rt6_is_dead(rt)) {
Ido Schimmel398958a2018-01-09 16:40:28 +02003626 *weight += rt->rt6i_nh_weight;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003627 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3628 total) - 1;
3629 }
3630 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3631}
3632
3633static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3634{
3635 struct rt6_info *iter;
3636 int weight = 0;
3637
3638 rt6_upper_bound_set(rt, &weight, total);
3639
3640 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3641 rt6_upper_bound_set(iter, &weight, total);
3642}
3643
3644void rt6_multipath_rebalance(struct rt6_info *rt)
3645{
3646 struct rt6_info *first;
3647 int total;
3648
3649 /* In case the entire multipath route was marked for flushing,
3650 * then there is no need to rebalance upon the removal of every
3651 * sibling route.
3652 */
3653 if (!rt->rt6i_nsiblings || rt->should_flush)
3654 return;
3655
3656 /* During lookup routes are evaluated in order, so we need to
3657 * make sure upper bounds are assigned from the first sibling
3658 * onwards.
3659 */
3660 first = rt6_multipath_first_sibling(rt);
3661 if (WARN_ON_ONCE(!first))
3662 return;
3663
3664 total = rt6_multipath_total_weight(first);
3665 rt6_multipath_upper_bound_set(first, total);
3666}
3667
Ido Schimmel2127d952018-01-07 12:45:03 +02003668static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3669{
3670 const struct arg_netdev_event *arg = p_arg;
3671 const struct net *net = dev_net(arg->dev);
3672
Ido Schimmel1de178e2018-01-07 12:45:15 +02003673 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
Ido Schimmel2127d952018-01-07 12:45:03 +02003674 rt->rt6i_nh_flags &= ~arg->nh_flags;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003675 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003676 rt6_multipath_rebalance(rt);
Ido Schimmel1de178e2018-01-07 12:45:15 +02003677 }
Ido Schimmel2127d952018-01-07 12:45:03 +02003678
3679 return 0;
3680}
3681
3682void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3683{
3684 struct arg_netdev_event arg = {
3685 .dev = dev,
Ido Schimmel6802f3a2018-01-12 22:07:36 +02003686 {
3687 .nh_flags = nh_flags,
3688 },
Ido Schimmel2127d952018-01-07 12:45:03 +02003689 };
3690
3691 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3692 arg.nh_flags |= RTNH_F_LINKDOWN;
3693
3694 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3695}
3696
Ido Schimmel1de178e2018-01-07 12:45:15 +02003697static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3698 const struct net_device *dev)
3699{
3700 struct rt6_info *iter;
3701
3702 if (rt->dst.dev == dev)
3703 return true;
3704 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3705 if (iter->dst.dev == dev)
3706 return true;
3707
3708 return false;
3709}
3710
3711static void rt6_multipath_flush(struct rt6_info *rt)
3712{
3713 struct rt6_info *iter;
3714
3715 rt->should_flush = 1;
3716 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3717 iter->should_flush = 1;
3718}
3719
3720static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3721 const struct net_device *down_dev)
3722{
3723 struct rt6_info *iter;
3724 unsigned int dead = 0;
3725
3726 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
3727 dead++;
3728 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3729 if (iter->dst.dev == down_dev ||
3730 iter->rt6i_nh_flags & RTNH_F_DEAD)
3731 dead++;
3732
3733 return dead;
3734}
3735
3736static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3737 const struct net_device *dev,
3738 unsigned int nh_flags)
3739{
3740 struct rt6_info *iter;
3741
3742 if (rt->dst.dev == dev)
3743 rt->rt6i_nh_flags |= nh_flags;
3744 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3745 if (iter->dst.dev == dev)
3746 iter->rt6i_nh_flags |= nh_flags;
3747}
3748
David Aherna1a22c12017-01-18 07:40:36 -08003749/* called with write lock held for table with rt */
Ido Schimmel4c981e22018-01-07 12:45:04 +02003750static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003751{
Ido Schimmel4c981e22018-01-07 12:45:04 +02003752 const struct arg_netdev_event *arg = p_arg;
3753 const struct net_device *dev = arg->dev;
3754 const struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003755
Ido Schimmel1de178e2018-01-07 12:45:15 +02003756 if (rt == net->ipv6.ip6_null_entry)
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003757 return 0;
3758
3759 switch (arg->event) {
3760 case NETDEV_UNREGISTER:
Ido Schimmel1de178e2018-01-07 12:45:15 +02003761 return rt->dst.dev == dev ? -1 : 0;
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003762 case NETDEV_DOWN:
Ido Schimmel1de178e2018-01-07 12:45:15 +02003763 if (rt->should_flush)
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003764 return -1;
Ido Schimmel1de178e2018-01-07 12:45:15 +02003765 if (!rt->rt6i_nsiblings)
3766 return rt->dst.dev == dev ? -1 : 0;
3767 if (rt6_multipath_uses_dev(rt, dev)) {
3768 unsigned int count;
3769
3770 count = rt6_multipath_dead_count(rt, dev);
3771 if (rt->rt6i_nsiblings + 1 == count) {
3772 rt6_multipath_flush(rt);
3773 return -1;
3774 }
3775 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3776 RTNH_F_LINKDOWN);
3777 fib6_update_sernum(rt);
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003778 rt6_multipath_rebalance(rt);
Ido Schimmel1de178e2018-01-07 12:45:15 +02003779 }
3780 return -2;
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003781 case NETDEV_CHANGE:
Ido Schimmel1de178e2018-01-07 12:45:15 +02003782 if (rt->dst.dev != dev ||
3783 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003784 break;
3785 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
Ido Schimmeld7dedee2018-01-09 16:40:25 +02003786 rt6_multipath_rebalance(rt);
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003787 break;
Ido Schimmel2b241362018-01-07 12:45:02 +02003788 }
David S. Millerc159d302011-12-26 15:24:36 -05003789
Linus Torvalds1da177e2005-04-16 15:20:36 -07003790 return 0;
3791}
3792
Ido Schimmel27c6fa72018-01-07 12:45:05 +02003793void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003794{
Ido Schimmel4c981e22018-01-07 12:45:04 +02003795 struct arg_netdev_event arg = {
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003796 .dev = dev,
Ido Schimmel6802f3a2018-01-12 22:07:36 +02003797 {
3798 .event = event,
3799 },
Daniel Lezcano8ed67782008-03-04 13:48:30 -08003800 };
3801
Ido Schimmel4c981e22018-01-07 12:45:04 +02003802 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3803}
3804
3805void rt6_disable_ip(struct net_device *dev, unsigned long event)
3806{
3807 rt6_sync_down_dev(dev, event);
3808 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3809 neigh_ifdown(&nd_tbl, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003810}
3811
Eric Dumazet95c96172012-04-15 05:58:06 +00003812struct rt6_mtu_change_arg {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003813 struct net_device *dev;
Eric Dumazet95c96172012-04-15 05:58:06 +00003814 unsigned int mtu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003815};
3816
3817static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3818{
3819 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3820 struct inet6_dev *idev;
3821
3822 /* In IPv6 pmtu discovery is not optional,
3823 so that RTAX_MTU lock cannot disable it.
3824 We still use this lock to block changes
3825 caused by addrconf/ndisc.
3826 */
3827
3828 idev = __in6_dev_get(arg->dev);
David S. Miller38308472011-12-03 18:02:47 -05003829 if (!idev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003830 return 0;
3831
3832 /* For administrative MTU increase, there is no way to discover
3833 IPv6 PMTU increase, so PMTU increase should be updated here.
3834 Since RFC 1981 doesn't include administrative MTU increase
3835 update PMTU increase is a MUST. (i.e. jumbo frame)
3836 */
David S. Millerd1918542011-12-28 20:19:20 -05003837 if (rt->dst.dev == arg->dev &&
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07003838 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
Wei Wangf5bbe7e2017-10-06 12:05:59 -07003839 spin_lock_bh(&rt6_exception_lock);
Stefano Brivioe9fa1492018-03-06 11:10:19 +01003840 if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
3841 rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07003842 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Stefano Brivioe9fa1492018-03-06 11:10:19 +01003843 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
Wei Wangf5bbe7e2017-10-06 12:05:59 -07003844 spin_unlock_bh(&rt6_exception_lock);
Simon Arlott566cfd82007-07-26 00:09:55 -07003845 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003846 return 0;
3847}
3848
Eric Dumazet95c96172012-04-15 05:58:06 +00003849void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003850{
Thomas Grafc71099a2006-08-04 23:20:06 -07003851 struct rt6_mtu_change_arg arg = {
3852 .dev = dev,
3853 .mtu = mtu,
3854 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07003855
Li RongQing0c3584d2013-12-27 16:32:38 +08003856 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003857}
3858
Patrick McHardyef7c79e2007-06-05 12:38:30 -07003859static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07003860 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07003861 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07003862 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07003863 [RTA_PRIORITY] = { .type = NLA_U32 },
3864 [RTA_METRICS] = { .type = NLA_NESTED },
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003865 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01003866 [RTA_PREF] = { .type = NLA_U8 },
Roopa Prabhu19e42e42015-07-21 10:43:48 +02003867 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3868 [RTA_ENCAP] = { .type = NLA_NESTED },
Xin Long32bc2012015-12-16 17:50:11 +08003869 [RTA_EXPIRES] = { .type = NLA_U32 },
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09003870 [RTA_UID] = { .type = NLA_U32 },
Liping Zhang3b45a412017-02-27 20:59:39 +08003871 [RTA_MARK] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07003872};
3873
3874static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
David Ahern333c4302017-05-21 10:12:04 -06003875 struct fib6_config *cfg,
3876 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003877{
Thomas Graf86872cb2006-08-22 00:01:08 -07003878 struct rtmsg *rtm;
3879 struct nlattr *tb[RTA_MAX+1];
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01003880 unsigned int pref;
Thomas Graf86872cb2006-08-22 00:01:08 -07003881 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003882
Johannes Bergfceb6432017-04-12 14:34:07 +02003883 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3884 NULL);
Thomas Graf86872cb2006-08-22 00:01:08 -07003885 if (err < 0)
3886 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003887
Thomas Graf86872cb2006-08-22 00:01:08 -07003888 err = -EINVAL;
3889 rtm = nlmsg_data(nlh);
3890 memset(cfg, 0, sizeof(*cfg));
3891
3892 cfg->fc_table = rtm->rtm_table;
3893 cfg->fc_dst_len = rtm->rtm_dst_len;
3894 cfg->fc_src_len = rtm->rtm_src_len;
3895 cfg->fc_flags = RTF_UP;
3896 cfg->fc_protocol = rtm->rtm_protocol;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00003897 cfg->fc_type = rtm->rtm_type;
Thomas Graf86872cb2006-08-22 00:01:08 -07003898
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00003899 if (rtm->rtm_type == RTN_UNREACHABLE ||
3900 rtm->rtm_type == RTN_BLACKHOLE ||
Nicolas Dichtelb4949ab2012-09-06 05:53:35 +00003901 rtm->rtm_type == RTN_PROHIBIT ||
3902 rtm->rtm_type == RTN_THROW)
Thomas Graf86872cb2006-08-22 00:01:08 -07003903 cfg->fc_flags |= RTF_REJECT;
3904
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00003905 if (rtm->rtm_type == RTN_LOCAL)
3906 cfg->fc_flags |= RTF_LOCAL;
3907
Martin KaFai Lau1f56a01f2015-04-28 13:03:03 -07003908 if (rtm->rtm_flags & RTM_F_CLONED)
3909 cfg->fc_flags |= RTF_CACHE;
3910
David Ahernfc1e64e2018-01-25 16:55:09 -08003911 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
3912
Eric W. Biederman15e47302012-09-07 20:12:54 +00003913 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
Thomas Graf86872cb2006-08-22 00:01:08 -07003914 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09003915 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07003916
3917 if (tb[RTA_GATEWAY]) {
Jiri Benc67b61f62015-03-29 16:59:26 +02003918 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
Thomas Graf86872cb2006-08-22 00:01:08 -07003919 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003920 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003921
3922 if (tb[RTA_DST]) {
3923 int plen = (rtm->rtm_dst_len + 7) >> 3;
3924
3925 if (nla_len(tb[RTA_DST]) < plen)
3926 goto errout;
3927
3928 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003929 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003930
3931 if (tb[RTA_SRC]) {
3932 int plen = (rtm->rtm_src_len + 7) >> 3;
3933
3934 if (nla_len(tb[RTA_SRC]) < plen)
3935 goto errout;
3936
3937 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003938 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003939
Daniel Walterc3968a82011-04-13 21:10:57 +00003940 if (tb[RTA_PREFSRC])
Jiri Benc67b61f62015-03-29 16:59:26 +02003941 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
Daniel Walterc3968a82011-04-13 21:10:57 +00003942
Thomas Graf86872cb2006-08-22 00:01:08 -07003943 if (tb[RTA_OIF])
3944 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3945
3946 if (tb[RTA_PRIORITY])
3947 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3948
3949 if (tb[RTA_METRICS]) {
3950 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3951 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003952 }
Thomas Graf86872cb2006-08-22 00:01:08 -07003953
3954 if (tb[RTA_TABLE])
3955 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3956
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003957 if (tb[RTA_MULTIPATH]) {
3958 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3959 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
David Ahern9ed59592017-01-17 14:57:36 -08003960
3961 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
David Ahernc255bd62017-05-27 16:19:27 -06003962 cfg->fc_mp_len, extack);
David Ahern9ed59592017-01-17 14:57:36 -08003963 if (err < 0)
3964 goto errout;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00003965 }
3966
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01003967 if (tb[RTA_PREF]) {
3968 pref = nla_get_u8(tb[RTA_PREF]);
3969 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3970 pref != ICMPV6_ROUTER_PREF_HIGH)
3971 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3972 cfg->fc_flags |= RTF_PREF(pref);
3973 }
3974
Roopa Prabhu19e42e42015-07-21 10:43:48 +02003975 if (tb[RTA_ENCAP])
3976 cfg->fc_encap = tb[RTA_ENCAP];
3977
David Ahern9ed59592017-01-17 14:57:36 -08003978 if (tb[RTA_ENCAP_TYPE]) {
Roopa Prabhu19e42e42015-07-21 10:43:48 +02003979 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3980
David Ahernc255bd62017-05-27 16:19:27 -06003981 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
David Ahern9ed59592017-01-17 14:57:36 -08003982 if (err < 0)
3983 goto errout;
3984 }
3985
Xin Long32bc2012015-12-16 17:50:11 +08003986 if (tb[RTA_EXPIRES]) {
3987 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3988
3989 if (addrconf_finite_timeout(timeout)) {
3990 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3991 cfg->fc_flags |= RTF_EXPIRES;
3992 }
3993 }
3994
Thomas Graf86872cb2006-08-22 00:01:08 -07003995 err = 0;
3996errout:
3997 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003998}
3999
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004000struct rt6_nh {
4001 struct rt6_info *rt6_info;
4002 struct fib6_config r_cfg;
4003 struct mx6_config mxc;
4004 struct list_head next;
4005};
4006
4007static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4008{
4009 struct rt6_nh *nh;
4010
4011 list_for_each_entry(nh, rt6_nh_list, next) {
David Ahern7d4d5062017-02-02 12:37:12 -08004012 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004013 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4014 nh->r_cfg.fc_ifindex);
4015 }
4016}
4017
4018static int ip6_route_info_append(struct list_head *rt6_nh_list,
4019 struct rt6_info *rt, struct fib6_config *r_cfg)
4020{
4021 struct rt6_nh *nh;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004022 int err = -EEXIST;
4023
4024 list_for_each_entry(nh, rt6_nh_list, next) {
4025 /* check if rt6_info already exists */
David Ahernf06b7542017-07-05 14:41:46 -06004026 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004027 return err;
4028 }
4029
4030 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4031 if (!nh)
4032 return -ENOMEM;
4033 nh->rt6_info = rt;
4034 err = ip6_convert_metrics(&nh->mxc, r_cfg);
4035 if (err) {
4036 kfree(nh);
4037 return err;
4038 }
4039 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4040 list_add_tail(&nh->next, rt6_nh_list);
4041
4042 return 0;
4043}
4044
David Ahern3b1137f2017-02-02 12:37:10 -08004045static void ip6_route_mpath_notify(struct rt6_info *rt,
4046 struct rt6_info *rt_last,
4047 struct nl_info *info,
4048 __u16 nlflags)
4049{
4050 /* if this is an APPEND route, then rt points to the first route
4051 * inserted and rt_last points to last route inserted. Userspace
4052 * wants a consistent dump of the route which starts at the first
4053 * nexthop. Since sibling routes are always added at the end of
4054 * the list, find the first sibling of the last route appended
4055 */
4056 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4057 rt = list_first_entry(&rt_last->rt6i_siblings,
4058 struct rt6_info,
4059 rt6i_siblings);
4060 }
4061
4062 if (rt)
4063 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4064}
4065
David Ahern333c4302017-05-21 10:12:04 -06004066static int ip6_route_multipath_add(struct fib6_config *cfg,
4067 struct netlink_ext_ack *extack)
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004068{
David Ahern3b1137f2017-02-02 12:37:10 -08004069 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4070 struct nl_info *info = &cfg->fc_nlinfo;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004071 struct fib6_config r_cfg;
4072 struct rtnexthop *rtnh;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004073 struct rt6_info *rt;
4074 struct rt6_nh *err_nh;
4075 struct rt6_nh *nh, *nh_safe;
David Ahern3b1137f2017-02-02 12:37:10 -08004076 __u16 nlflags;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004077 int remaining;
4078 int attrlen;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004079 int err = 1;
4080 int nhn = 0;
4081 int replace = (cfg->fc_nlinfo.nlh &&
4082 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4083 LIST_HEAD(rt6_nh_list);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004084
David Ahern3b1137f2017-02-02 12:37:10 -08004085 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4086 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4087 nlflags |= NLM_F_APPEND;
4088
Michal Kubeček35f1b4e2015-05-18 20:53:55 +02004089 remaining = cfg->fc_mp_len;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004090 rtnh = (struct rtnexthop *)cfg->fc_mp;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004091
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004092 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4093 * rt6_info structs per nexthop
4094 */
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004095 while (rtnh_ok(rtnh, remaining)) {
4096 memcpy(&r_cfg, cfg, sizeof(*cfg));
4097 if (rtnh->rtnh_ifindex)
4098 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4099
4100 attrlen = rtnh_attrlen(rtnh);
4101 if (attrlen > 0) {
4102 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4103
4104 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4105 if (nla) {
Jiri Benc67b61f62015-03-29 16:59:26 +02004106 r_cfg.fc_gateway = nla_get_in6_addr(nla);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004107 r_cfg.fc_flags |= RTF_GATEWAY;
4108 }
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004109 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4110 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4111 if (nla)
4112 r_cfg.fc_encap_type = nla_get_u16(nla);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004113 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004114
David Ahern333c4302017-05-21 10:12:04 -06004115 rt = ip6_route_info_create(&r_cfg, extack);
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07004116 if (IS_ERR(rt)) {
4117 err = PTR_ERR(rt);
4118 rt = NULL;
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004119 goto cleanup;
Roopa Prabhu8c5b83f2015-10-10 08:26:36 -07004120 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004121
Ido Schimmel398958a2018-01-09 16:40:28 +02004122 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
4123
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004124 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004125 if (err) {
Wei Wang587fea72017-06-17 10:42:36 -07004126 dst_release_immediate(&rt->dst);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004127 goto cleanup;
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004128 }
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004129
4130 rtnh = rtnh_next(rtnh, &remaining);
4131 }
4132
David Ahern3b1137f2017-02-02 12:37:10 -08004133 /* for add and replace send one notification with all nexthops.
4134 * Skip the notification in fib6_add_rt2node and send one with
4135 * the full route when done
4136 */
4137 info->skip_notify = 1;
4138
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004139 err_nh = NULL;
4140 list_for_each_entry(nh, &rt6_nh_list, next) {
David Ahern3b1137f2017-02-02 12:37:10 -08004141 rt_last = nh->rt6_info;
David Ahern333c4302017-05-21 10:12:04 -06004142 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
David Ahern3b1137f2017-02-02 12:37:10 -08004143 /* save reference to first route for notification */
4144 if (!rt_notif && !err)
4145 rt_notif = nh->rt6_info;
4146
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004147 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4148 nh->rt6_info = NULL;
4149 if (err) {
4150 if (replace && nhn)
4151 ip6_print_replace_route_err(&rt6_nh_list);
4152 err_nh = nh;
4153 goto add_errout;
4154 }
4155
Nicolas Dichtel1a724182012-11-01 22:58:22 +00004156 /* Because each route is added like a single route we remove
Michal Kubeček27596472015-05-18 20:54:00 +02004157 * these flags after the first nexthop: if there is a collision,
4158 * we have already failed to add the first nexthop:
4159 * fib6_add_rt2node() has rejected it; when replacing, old
4160 * nexthops have been replaced by first new, the rest should
4161 * be added to it.
Nicolas Dichtel1a724182012-11-01 22:58:22 +00004162 */
Michal Kubeček27596472015-05-18 20:54:00 +02004163 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4164 NLM_F_REPLACE);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004165 nhn++;
4166 }
4167
David Ahern3b1137f2017-02-02 12:37:10 -08004168 /* success ... tell user about new route */
4169 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004170 goto cleanup;
4171
4172add_errout:
David Ahern3b1137f2017-02-02 12:37:10 -08004173 /* send notification for routes that were added so that
4174 * the delete notifications sent by ip6_route_del are
4175 * coherent
4176 */
4177 if (rt_notif)
4178 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4179
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004180 /* Delete routes that were already added */
4181 list_for_each_entry(nh, &rt6_nh_list, next) {
4182 if (err_nh == nh)
4183 break;
David Ahern333c4302017-05-21 10:12:04 -06004184 ip6_route_del(&nh->r_cfg, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004185 }
4186
4187cleanup:
4188 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
Wei Wang587fea72017-06-17 10:42:36 -07004189 if (nh->rt6_info)
4190 dst_release_immediate(&nh->rt6_info->dst);
Wu Fengguang52fe51f2015-09-10 06:57:12 +08004191 kfree(nh->mxc.mx);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004192 list_del(&nh->next);
4193 kfree(nh);
4194 }
4195
4196 return err;
4197}
4198
David Ahern333c4302017-05-21 10:12:04 -06004199static int ip6_route_multipath_del(struct fib6_config *cfg,
4200 struct netlink_ext_ack *extack)
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004201{
4202 struct fib6_config r_cfg;
4203 struct rtnexthop *rtnh;
4204 int remaining;
4205 int attrlen;
4206 int err = 1, last_err = 0;
4207
4208 remaining = cfg->fc_mp_len;
4209 rtnh = (struct rtnexthop *)cfg->fc_mp;
4210
4211 /* Parse a Multipath Entry */
4212 while (rtnh_ok(rtnh, remaining)) {
4213 memcpy(&r_cfg, cfg, sizeof(*cfg));
4214 if (rtnh->rtnh_ifindex)
4215 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4216
4217 attrlen = rtnh_attrlen(rtnh);
4218 if (attrlen > 0) {
4219 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4220
4221 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4222 if (nla) {
4223 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4224 r_cfg.fc_flags |= RTF_GATEWAY;
4225 }
4226 }
David Ahern333c4302017-05-21 10:12:04 -06004227 err = ip6_route_del(&r_cfg, extack);
Roopa Prabhu6b9ea5a2015-09-08 10:53:04 -07004228 if (err)
4229 last_err = err;
4230
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004231 rtnh = rtnh_next(rtnh, &remaining);
4232 }
4233
4234 return last_err;
4235}
4236
David Ahernc21ef3e2017-04-16 09:48:24 -07004237static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4238 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004239{
Thomas Graf86872cb2006-08-22 00:01:08 -07004240 struct fib6_config cfg;
4241 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004242
David Ahern333c4302017-05-21 10:12:04 -06004243 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
Thomas Graf86872cb2006-08-22 00:01:08 -07004244 if (err < 0)
4245 return err;
4246
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004247 if (cfg.fc_mp)
David Ahern333c4302017-05-21 10:12:04 -06004248 return ip6_route_multipath_del(&cfg, extack);
David Ahern0ae81332017-02-02 12:37:08 -08004249 else {
4250 cfg.fc_delete_all_nh = 1;
David Ahern333c4302017-05-21 10:12:04 -06004251 return ip6_route_del(&cfg, extack);
David Ahern0ae81332017-02-02 12:37:08 -08004252 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004253}
4254
David Ahernc21ef3e2017-04-16 09:48:24 -07004255static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4256 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004257{
Thomas Graf86872cb2006-08-22 00:01:08 -07004258 struct fib6_config cfg;
4259 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004260
David Ahern333c4302017-05-21 10:12:04 -06004261 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
Thomas Graf86872cb2006-08-22 00:01:08 -07004262 if (err < 0)
4263 return err;
4264
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004265 if (cfg.fc_mp)
David Ahern333c4302017-05-21 10:12:04 -06004266 return ip6_route_multipath_add(&cfg, extack);
Nicolas Dichtel51ebd3182012-10-22 03:42:09 +00004267 else
David Ahern333c4302017-05-21 10:12:04 -06004268 return ip6_route_add(&cfg, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004269}
4270
David Ahernbeb1afac52017-02-02 12:37:09 -08004271static size_t rt6_nlmsg_size(struct rt6_info *rt)
Thomas Graf339bf982006-11-10 14:10:15 -08004272{
David Ahernbeb1afac52017-02-02 12:37:09 -08004273 int nexthop_len = 0;
4274
4275 if (rt->rt6i_nsiblings) {
4276 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4277 + NLA_ALIGN(sizeof(struct rtnexthop))
4278 + nla_total_size(16) /* RTA_GATEWAY */
David Ahernbeb1afac52017-02-02 12:37:09 -08004279 + lwtunnel_get_encap_size(rt->dst.lwtstate);
4280
4281 nexthop_len *= rt->rt6i_nsiblings;
4282 }
4283
Thomas Graf339bf982006-11-10 14:10:15 -08004284 return NLMSG_ALIGN(sizeof(struct rtmsg))
4285 + nla_total_size(16) /* RTA_SRC */
4286 + nla_total_size(16) /* RTA_DST */
4287 + nla_total_size(16) /* RTA_GATEWAY */
4288 + nla_total_size(16) /* RTA_PREFSRC */
4289 + nla_total_size(4) /* RTA_TABLE */
4290 + nla_total_size(4) /* RTA_IIF */
4291 + nla_total_size(4) /* RTA_OIF */
4292 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08004293 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Daniel Borkmannea697632015-01-05 23:57:47 +01004294 + nla_total_size(sizeof(struct rta_cacheinfo))
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004295 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004296 + nla_total_size(1) /* RTA_PREF */
David Ahernbeb1afac52017-02-02 12:37:09 -08004297 + lwtunnel_get_encap_size(rt->dst.lwtstate)
4298 + nexthop_len;
4299}
4300
4301static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
David Ahern5be083c2017-03-06 15:57:31 -08004302 unsigned int *flags, bool skip_oif)
David Ahernbeb1afac52017-02-02 12:37:09 -08004303{
Ido Schimmelf9d882e2018-01-07 12:45:10 +02004304 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
4305 *flags |= RTNH_F_DEAD;
4306
Ido Schimmel44c9f2f2018-01-07 12:45:08 +02004307 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
David Ahernbeb1afac52017-02-02 12:37:09 -08004308 *flags |= RTNH_F_LINKDOWN;
4309 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4310 *flags |= RTNH_F_DEAD;
4311 }
4312
4313 if (rt->rt6i_flags & RTF_GATEWAY) {
4314 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
4315 goto nla_put_failure;
4316 }
4317
David Ahernfc1e64e2018-01-25 16:55:09 -08004318 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
Ido Schimmelfe400792017-08-15 09:09:49 +02004319 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
Ido Schimmel61e4d012017-08-03 13:28:20 +02004320 *flags |= RTNH_F_OFFLOAD;
4321
David Ahern5be083c2017-03-06 15:57:31 -08004322 /* not needed for multipath encoding b/c it has a rtnexthop struct */
4323 if (!skip_oif && rt->dst.dev &&
David Ahernbeb1afac52017-02-02 12:37:09 -08004324 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
4325 goto nla_put_failure;
4326
4327 if (rt->dst.lwtstate &&
4328 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
4329 goto nla_put_failure;
4330
4331 return 0;
4332
4333nla_put_failure:
4334 return -EMSGSIZE;
4335}
4336
David Ahern5be083c2017-03-06 15:57:31 -08004337/* add multipath next hop */
David Ahernbeb1afac52017-02-02 12:37:09 -08004338static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4339{
4340 struct rtnexthop *rtnh;
4341 unsigned int flags = 0;
4342
4343 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4344 if (!rtnh)
4345 goto nla_put_failure;
4346
Ido Schimmel398958a2018-01-09 16:40:28 +02004347 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
David Ahernbeb1afac52017-02-02 12:37:09 -08004348 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4349
David Ahern5be083c2017-03-06 15:57:31 -08004350 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004351 goto nla_put_failure;
4352
4353 rtnh->rtnh_flags = flags;
4354
4355 /* length of rtnetlink header + attributes */
4356 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4357
4358 return 0;
4359
4360nla_put_failure:
4361 return -EMSGSIZE;
Thomas Graf339bf982006-11-10 14:10:15 -08004362}
4363
Brian Haley191cd582008-08-14 15:33:21 -07004364static int rt6_fill_node(struct net *net,
4365 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07004366 struct in6_addr *dst, struct in6_addr *src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00004367 int iif, int type, u32 portid, u32 seq,
David Ahernf8cfe2c2017-01-17 15:51:08 -08004368 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004369{
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07004370 u32 metrics[RTAX_MAX];
Linus Torvalds1da177e2005-04-16 15:20:36 -07004371 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004372 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08004373 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07004374 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004375
Eric W. Biederman15e47302012-09-07 20:12:54 +00004376 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
David S. Miller38308472011-12-03 18:02:47 -05004377 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08004378 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004379
4380 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004381 rtm->rtm_family = AF_INET6;
4382 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4383 rtm->rtm_src_len = rt->rt6i_src.plen;
4384 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07004385 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07004386 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07004387 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07004388 table = RT6_TABLE_UNSPEC;
4389 rtm->rtm_table = table;
David S. Millerc78679e2012-04-01 20:27:33 -04004390 if (nla_put_u32(skb, RTA_TABLE, table))
4391 goto nla_put_failure;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00004392 if (rt->rt6i_flags & RTF_REJECT) {
4393 switch (rt->dst.error) {
4394 case -EINVAL:
4395 rtm->rtm_type = RTN_BLACKHOLE;
4396 break;
4397 case -EACCES:
4398 rtm->rtm_type = RTN_PROHIBIT;
4399 break;
Nicolas Dichtelb4949ab2012-09-06 05:53:35 +00004400 case -EAGAIN:
4401 rtm->rtm_type = RTN_THROW;
4402 break;
Nicolas Dichtelef2c7d72012-09-05 02:12:42 +00004403 default:
4404 rtm->rtm_type = RTN_UNREACHABLE;
4405 break;
4406 }
4407 }
David S. Miller38308472011-12-03 18:02:47 -05004408 else if (rt->rt6i_flags & RTF_LOCAL)
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00004409 rtm->rtm_type = RTN_LOCAL;
David Ahern4ee39732017-03-15 18:14:33 -07004410 else if (rt->rt6i_flags & RTF_ANYCAST)
4411 rtm->rtm_type = RTN_ANYCAST;
David S. Millerd1918542011-12-28 20:19:20 -05004412 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07004413 rtm->rtm_type = RTN_LOCAL;
4414 else
4415 rtm->rtm_type = RTN_UNICAST;
4416 rtm->rtm_flags = 0;
4417 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4418 rtm->rtm_protocol = rt->rt6i_protocol;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004419
David S. Miller38308472011-12-03 18:02:47 -05004420 if (rt->rt6i_flags & RTF_CACHE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004421 rtm->rtm_flags |= RTM_F_CLONED;
4422
4423 if (dst) {
Jiri Benc930345e2015-03-29 16:59:25 +02004424 if (nla_put_in6_addr(skb, RTA_DST, dst))
David S. Millerc78679e2012-04-01 20:27:33 -04004425 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004426 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004427 } else if (rtm->rtm_dst_len)
Jiri Benc930345e2015-03-29 16:59:25 +02004428 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
David S. Millerc78679e2012-04-01 20:27:33 -04004429 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004430#ifdef CONFIG_IPV6_SUBTREES
4431 if (src) {
Jiri Benc930345e2015-03-29 16:59:25 +02004432 if (nla_put_in6_addr(skb, RTA_SRC, src))
David S. Millerc78679e2012-04-01 20:27:33 -04004433 goto nla_put_failure;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004434 rtm->rtm_src_len = 128;
David S. Millerc78679e2012-04-01 20:27:33 -04004435 } else if (rtm->rtm_src_len &&
Jiri Benc930345e2015-03-29 16:59:25 +02004436 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
David S. Millerc78679e2012-04-01 20:27:33 -04004437 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004438#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004439 if (iif) {
4440#ifdef CONFIG_IPV6_MROUTE
4441 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
David Ahernfd61c6b2017-01-17 15:51:07 -08004442 int err = ip6mr_get_route(net, skb, rtm, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02004443
David Ahernfd61c6b2017-01-17 15:51:07 -08004444 if (err == 0)
4445 return 0;
4446 if (err < 0)
4447 goto nla_put_failure;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004448 } else
4449#endif
David S. Millerc78679e2012-04-01 20:27:33 -04004450 if (nla_put_u32(skb, RTA_IIF, iif))
4451 goto nla_put_failure;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09004452 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004453 struct in6_addr saddr_buf;
David S. Millerc78679e2012-04-01 20:27:33 -04004454 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
Jiri Benc930345e2015-03-29 16:59:25 +02004455 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
David S. Millerc78679e2012-04-01 20:27:33 -04004456 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004457 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07004458
Daniel Walterc3968a82011-04-13 21:10:57 +00004459 if (rt->rt6i_prefsrc.plen) {
4460 struct in6_addr saddr_buf;
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004461 saddr_buf = rt->rt6i_prefsrc.addr;
Jiri Benc930345e2015-03-29 16:59:25 +02004462 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
David S. Millerc78679e2012-04-01 20:27:33 -04004463 goto nla_put_failure;
Daniel Walterc3968a82011-04-13 21:10:57 +00004464 }
4465
Martin KaFai Lau4b32b5a2015-04-28 13:03:06 -07004466 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4467 if (rt->rt6i_pmtu)
4468 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4469 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07004470 goto nla_put_failure;
4471
David S. Millerc78679e2012-04-01 20:27:33 -04004472 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4473 goto nla_put_failure;
Li Wei82539472012-07-29 16:01:30 +00004474
David Ahernbeb1afac52017-02-02 12:37:09 -08004475 /* For multipath routes, walk the siblings list and add
4476 * each as a nexthop within RTA_MULTIPATH.
4477 */
4478 if (rt->rt6i_nsiblings) {
4479 struct rt6_info *sibling, *next_sibling;
4480 struct nlattr *mp;
4481
4482 mp = nla_nest_start(skb, RTA_MULTIPATH);
4483 if (!mp)
4484 goto nla_put_failure;
4485
4486 if (rt6_add_nexthop(skb, rt) < 0)
4487 goto nla_put_failure;
4488
4489 list_for_each_entry_safe(sibling, next_sibling,
4490 &rt->rt6i_siblings, rt6i_siblings) {
4491 if (rt6_add_nexthop(skb, sibling) < 0)
4492 goto nla_put_failure;
4493 }
4494
4495 nla_nest_end(skb, mp);
4496 } else {
David Ahern5be083c2017-03-06 15:57:31 -08004497 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
David Ahernbeb1afac52017-02-02 12:37:09 -08004498 goto nla_put_failure;
4499 }
4500
Li Wei82539472012-07-29 16:01:30 +00004501 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07004502
David S. Miller87a50692012-07-10 05:06:14 -07004503 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08004504 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004505
Lubomir Rintelc78ba6d2015-03-11 15:39:21 +01004506 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4507 goto nla_put_failure;
4508
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004509
Johannes Berg053c0952015-01-16 22:09:00 +01004510 nlmsg_end(skb, nlh);
4511 return 0;
Thomas Graf2d7202b2006-08-22 00:01:27 -07004512
4513nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08004514 nlmsg_cancel(skb, nlh);
4515 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004516}
4517
Patrick McHardy1b43af52006-08-10 23:11:17 -07004518int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004519{
4520 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
David Ahern1f17e2f2017-01-26 13:54:08 -08004521 struct net *net = arg->net;
4522
4523 if (rt == net->ipv6.ip6_null_entry)
4524 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004525
Thomas Graf2d7202b2006-08-22 00:01:27 -07004526 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4527 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
David Ahernf8cfe2c2017-01-17 15:51:08 -08004528
4529 /* user wants prefix routes only */
4530 if (rtm->rtm_flags & RTM_F_PREFIX &&
4531 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4532 /* success since this is not a prefix route */
4533 return 1;
4534 }
4535 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004536
David Ahern1f17e2f2017-01-26 13:54:08 -08004537 return rt6_fill_node(net,
Brian Haley191cd582008-08-14 15:33:21 -07004538 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Eric W. Biederman15e47302012-09-07 20:12:54 +00004539 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
David Ahernf8cfe2c2017-01-17 15:51:08 -08004540 NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004541}
4542
David Ahernc21ef3e2017-04-16 09:48:24 -07004543static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4544 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004545{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09004546 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07004547 struct nlattr *tb[RTA_MAX+1];
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004548 int err, iif = 0, oif = 0;
4549 struct dst_entry *dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004550 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07004551 struct sk_buff *skb;
4552 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05004553 struct flowi6 fl6;
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004554 bool fibmatch;
Thomas Grafab364a62006-08-22 00:01:47 -07004555
Johannes Bergfceb6432017-04-12 14:34:07 +02004556 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
David Ahernc21ef3e2017-04-16 09:48:24 -07004557 extack);
Thomas Grafab364a62006-08-22 00:01:47 -07004558 if (err < 0)
4559 goto errout;
4560
4561 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05004562 memset(&fl6, 0, sizeof(fl6));
Hannes Frederic Sowa38b70972016-06-11 20:08:19 +02004563 rtm = nlmsg_data(nlh);
4564 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004565 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
Thomas Grafab364a62006-08-22 00:01:47 -07004566
4567 if (tb[RTA_SRC]) {
4568 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4569 goto errout;
4570
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004571 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
Thomas Grafab364a62006-08-22 00:01:47 -07004572 }
4573
4574 if (tb[RTA_DST]) {
4575 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4576 goto errout;
4577
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +00004578 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
Thomas Grafab364a62006-08-22 00:01:47 -07004579 }
4580
4581 if (tb[RTA_IIF])
4582 iif = nla_get_u32(tb[RTA_IIF]);
4583
4584 if (tb[RTA_OIF])
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004585 oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07004586
Lorenzo Colitti2e47b292014-05-15 16:38:41 -07004587 if (tb[RTA_MARK])
4588 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4589
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09004590 if (tb[RTA_UID])
4591 fl6.flowi6_uid = make_kuid(current_user_ns(),
4592 nla_get_u32(tb[RTA_UID]));
4593 else
4594 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4595
Thomas Grafab364a62006-08-22 00:01:47 -07004596 if (iif) {
4597 struct net_device *dev;
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004598 int flags = 0;
4599
Florian Westphal121622d2017-08-15 16:34:42 +02004600 rcu_read_lock();
4601
4602 dev = dev_get_by_index_rcu(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07004603 if (!dev) {
Florian Westphal121622d2017-08-15 16:34:42 +02004604 rcu_read_unlock();
Thomas Grafab364a62006-08-22 00:01:47 -07004605 err = -ENODEV;
4606 goto errout;
4607 }
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004608
4609 fl6.flowi6_iif = iif;
4610
4611 if (!ipv6_addr_any(&fl6.saddr))
4612 flags |= RT6_LOOKUP_F_HAS_SADDR;
4613
Ido Schimmel58acfd72017-12-20 12:28:25 +02004614 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
Florian Westphal121622d2017-08-15 16:34:42 +02004615
4616 rcu_read_unlock();
Shmulik Ladkani72331bc2012-04-01 04:03:45 +00004617 } else {
4618 fl6.flowi6_oif = oif;
4619
Ido Schimmel58acfd72017-12-20 12:28:25 +02004620 dst = ip6_route_output(net, NULL, &fl6);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004621 }
4622
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004623
4624 rt = container_of(dst, struct rt6_info, dst);
4625 if (rt->dst.error) {
4626 err = rt->dst.error;
4627 ip6_rt_put(rt);
4628 goto errout;
Thomas Grafab364a62006-08-22 00:01:47 -07004629 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004630
WANG Cong9d6acb32017-03-01 20:48:39 -08004631 if (rt == net->ipv6.ip6_null_entry) {
4632 err = rt->dst.error;
4633 ip6_rt_put(rt);
4634 goto errout;
4635 }
4636
David S. Millerfba961a2017-12-22 11:16:31 -05004637 if (fibmatch && rt->from) {
4638 struct rt6_info *ort = rt->from;
Ido Schimmel58acfd72017-12-20 12:28:25 +02004639
4640 dst_hold(&ort->dst);
4641 ip6_rt_put(rt);
4642 rt = ort;
4643 }
4644
Linus Torvalds1da177e2005-04-16 15:20:36 -07004645 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
David S. Miller38308472011-12-03 18:02:47 -05004646 if (!skb) {
Amerigo Wang94e187c2012-10-29 00:13:19 +00004647 ip6_rt_put(rt);
Thomas Grafab364a62006-08-22 00:01:47 -07004648 err = -ENOBUFS;
4649 goto errout;
4650 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004651
Changli Gaod8d1f302010-06-10 23:31:35 -07004652 skb_dst_set(skb, &rt->dst);
Roopa Prabhu18c3a612017-05-25 10:42:40 -07004653 if (fibmatch)
4654 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4655 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4656 nlh->nlmsg_seq, 0);
4657 else
4658 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4659 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4660 nlh->nlmsg_seq, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004661 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07004662 kfree_skb(skb);
4663 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004664 }
4665
Eric W. Biederman15e47302012-09-07 20:12:54 +00004666 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafab364a62006-08-22 00:01:47 -07004667errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07004668 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004669}
4670
Roopa Prabhu37a1d362015-09-13 10:18:33 -07004671void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4672 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004673{
4674 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08004675 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08004676 u32 seq;
4677 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004678
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08004679 err = -ENOBUFS;
David S. Miller38308472011-12-03 18:02:47 -05004680 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07004681
Roopa Prabhu19e42e42015-07-21 10:43:48 +02004682 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
David S. Miller38308472011-12-03 18:02:47 -05004683 if (!skb)
Thomas Graf21713eb2006-08-15 00:35:24 -07004684 goto errout;
4685
Brian Haley191cd582008-08-14 15:33:21 -07004686 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
David Ahernf8cfe2c2017-01-17 15:51:08 -08004687 event, info->portid, seq, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -08004688 if (err < 0) {
4689 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4690 WARN_ON(err == -EMSGSIZE);
4691 kfree_skb(skb);
4692 goto errout;
4693 }
Eric W. Biederman15e47302012-09-07 20:12:54 +00004694 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08004695 info->nlh, gfp_any());
4696 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07004697errout:
4698 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08004699 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004700}
4701
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004702static int ip6_route_dev_notify(struct notifier_block *this,
Jiri Pirko351638e2013-05-28 01:30:21 +00004703 unsigned long event, void *ptr)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004704{
Jiri Pirko351638e2013-05-28 01:30:21 +00004705 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004706 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004707
WANG Cong242d3a42017-05-08 10:12:13 -07004708 if (!(dev->flags & IFF_LOOPBACK))
4709 return NOTIFY_OK;
4710
4711 if (event == NETDEV_REGISTER) {
Changli Gaod8d1f302010-06-10 23:31:35 -07004712 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004713 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4714#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07004715 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004716 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07004717 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004718 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
4719#endif
WANG Cong76da0702017-06-20 11:42:27 -07004720 } else if (event == NETDEV_UNREGISTER &&
4721 dev->reg_state != NETREG_UNREGISTERED) {
4722 /* NETDEV_UNREGISTER could be fired for multiple times by
4723 * netdev_wait_allrefs(). Make sure we only call this once.
4724 */
Eric Dumazet12d94a82017-08-15 04:09:51 -07004725 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
WANG Cong242d3a42017-05-08 10:12:13 -07004726#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Eric Dumazet12d94a82017-08-15 04:09:51 -07004727 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4728 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
WANG Cong242d3a42017-05-08 10:12:13 -07004729#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004730 }
4731
4732 return NOTIFY_OK;
4733}
4734
Linus Torvalds1da177e2005-04-16 15:20:36 -07004735/*
4736 * /proc
4737 */
4738
4739#ifdef CONFIG_PROC_FS
4740
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004741static const struct file_operations ipv6_route_proc_fops = {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004742 .open = ipv6_route_open,
4743 .read = seq_read,
4744 .llseek = seq_lseek,
Hannes Frederic Sowa8d2ca1d2013-09-21 16:55:59 +02004745 .release = seq_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08004746};
4747
Linus Torvalds1da177e2005-04-16 15:20:36 -07004748static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4749{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004750 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004751 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004752 net->ipv6.rt6_stats->fib_nodes,
4753 net->ipv6.rt6_stats->fib_route_nodes,
Wei Wang81eb8442017-10-06 12:06:11 -07004754 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004755 net->ipv6.rt6_stats->fib_rt_entries,
4756 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00004757 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004758 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004759
4760 return 0;
4761}
4762
4763static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4764{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07004765 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08004766}
4767
Arjan van de Ven9a321442007-02-12 00:55:35 -08004768static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004769 .open = rt6_stats_seq_open,
4770 .read = seq_read,
4771 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07004772 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004773};
4774#endif /* CONFIG_PROC_FS */
4775
4776#ifdef CONFIG_SYSCTL
4777
Linus Torvalds1da177e2005-04-16 15:20:36 -07004778static
Joe Perchesfe2c6332013-06-11 23:04:25 -07004779int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004780 void __user *buffer, size_t *lenp, loff_t *ppos)
4781{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004782 struct net *net;
4783 int delay;
4784 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004785 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004786
4787 net = (struct net *)ctl->extra1;
4788 delay = net->ipv6.sysctl.flush_delay;
4789 proc_dointvec(ctl, write, buffer, lenp, ppos);
Michal Kubeček2ac3ac82013-08-01 10:04:14 +02004790 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004791 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004792}
4793
Joe Perchesfe2c6332013-06-11 23:04:25 -07004794struct ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09004795 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004796 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08004797 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004798 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07004799 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004800 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07004801 },
4802 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004803 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08004804 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004805 .maxlen = sizeof(int),
4806 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004807 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004808 },
4809 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004810 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08004811 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004812 .maxlen = sizeof(int),
4813 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004814 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004815 },
4816 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004817 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08004818 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004819 .maxlen = sizeof(int),
4820 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004821 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004822 },
4823 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004824 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08004825 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004826 .maxlen = sizeof(int),
4827 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004828 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004829 },
4830 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004831 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08004832 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004833 .maxlen = sizeof(int),
4834 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004835 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004836 },
4837 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004838 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08004839 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004840 .maxlen = sizeof(int),
4841 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07004842 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004843 },
4844 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004845 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08004846 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004847 .maxlen = sizeof(int),
4848 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004849 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004850 },
4851 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004852 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08004853 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004854 .maxlen = sizeof(int),
4855 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07004856 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004857 },
4858 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004859 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08004860 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004861 .maxlen = sizeof(int),
4862 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08004863 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07004864 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08004865 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004866};
4867
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00004868struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08004869{
4870 struct ctl_table *table;
4871
4872 table = kmemdup(ipv6_route_table_template,
4873 sizeof(ipv6_route_table_template),
4874 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09004875
4876 if (table) {
4877 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00004878 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00004879 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09004880 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
4881 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4882 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
4883 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
4884 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
4885 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
4886 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08004887 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
Eric W. Biederman464dc802012-11-16 03:02:59 +00004888
4889 /* Don't export sysctls to unprivileged users */
4890 if (net->user_ns != &init_user_ns)
4891 table[0].procname = NULL;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09004892 }
4893
Daniel Lezcano760f2d02008-01-10 02:53:43 -08004894 return table;
4895}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004896#endif
4897
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00004898static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004899{
Pavel Emelyanov633d424b2008-04-21 14:25:23 -07004900 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004901
Alexey Dobriyan86393e52009-08-29 01:34:49 +00004902 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
4903 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004904
Eric Dumazetfc66f952010-10-08 06:37:34 +00004905 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4906 goto out_ip6_dst_ops;
4907
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004908 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
4909 sizeof(*net->ipv6.ip6_null_entry),
4910 GFP_KERNEL);
4911 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00004912 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07004913 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08004914 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
4915 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004916
4917#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Vincent Bernatfeca7d82017-08-08 20:23:49 +02004918 net->ipv6.fib6_has_custom_rules = false;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004919 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
4920 sizeof(*net->ipv6.ip6_prohibit_entry),
4921 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07004922 if (!net->ipv6.ip6_prohibit_entry)
4923 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07004924 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08004925 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4926 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004927
4928 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4929 sizeof(*net->ipv6.ip6_blk_hole_entry),
4930 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07004931 if (!net->ipv6.ip6_blk_hole_entry)
4932 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07004933 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08004934 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4935 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004936#endif
4937
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07004938 net->ipv6.sysctl.flush_delay = 0;
4939 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4940 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4941 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4942 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4943 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4944 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4945 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4946
Benjamin Thery6891a342008-03-04 13:49:47 -08004947 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4948
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004949 ret = 0;
4950out:
4951 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004952
Peter Zijlstra68fffc62008-10-07 14:12:10 -07004953#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4954out_ip6_prohibit_entry:
4955 kfree(net->ipv6.ip6_prohibit_entry);
4956out_ip6_null_entry:
4957 kfree(net->ipv6.ip6_null_entry);
4958#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00004959out_ip6_dst_entries:
4960 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004961out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08004962 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004963}
4964
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00004965static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004966{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08004967 kfree(net->ipv6.ip6_null_entry);
4968#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4969 kfree(net->ipv6.ip6_prohibit_entry);
4970 kfree(net->ipv6.ip6_blk_hole_entry);
4971#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00004972 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004973}
4974
Thomas Grafd1896342012-06-18 12:08:33 +00004975static int __net_init ip6_route_net_init_late(struct net *net)
4976{
4977#ifdef CONFIG_PROC_FS
Gao fengd4beaa62013-02-18 01:34:54 +00004978 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4979 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
Thomas Grafd1896342012-06-18 12:08:33 +00004980#endif
4981 return 0;
4982}
4983
4984static void __net_exit ip6_route_net_exit_late(struct net *net)
4985{
4986#ifdef CONFIG_PROC_FS
Gao fengece31ff2013-02-18 01:34:56 +00004987 remove_proc_entry("ipv6_route", net->proc_net);
4988 remove_proc_entry("rt6_stats", net->proc_net);
Thomas Grafd1896342012-06-18 12:08:33 +00004989#endif
4990}
4991
Daniel Lezcanocdb18762008-03-04 13:45:33 -08004992static struct pernet_operations ip6_route_net_ops = {
4993 .init = ip6_route_net_init,
4994 .exit = ip6_route_net_exit,
4995};
4996
David S. Millerc3426b42012-06-09 16:27:05 -07004997static int __net_init ipv6_inetpeer_init(struct net *net)
4998{
4999 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5000
5001 if (!bp)
5002 return -ENOMEM;
5003 inet_peer_base_init(bp);
5004 net->ipv6.peers = bp;
5005 return 0;
5006}
5007
5008static void __net_exit ipv6_inetpeer_exit(struct net *net)
5009{
5010 struct inet_peer_base *bp = net->ipv6.peers;
5011
5012 net->ipv6.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07005013 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07005014 kfree(bp);
5015}
5016
David S. Miller2b823f72012-06-09 19:00:16 -07005017static struct pernet_operations ipv6_inetpeer_ops = {
David S. Millerc3426b42012-06-09 16:27:05 -07005018 .init = ipv6_inetpeer_init,
5019 .exit = ipv6_inetpeer_exit,
5020};
5021
Thomas Grafd1896342012-06-18 12:08:33 +00005022static struct pernet_operations ip6_route_net_late_ops = {
5023 .init = ip6_route_net_init_late,
5024 .exit = ip6_route_net_exit_late,
5025};
5026
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005027static struct notifier_block ip6_route_dev_notifier = {
5028 .notifier_call = ip6_route_dev_notify,
WANG Cong242d3a42017-05-08 10:12:13 -07005029 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005030};
5031
WANG Cong2f460932017-05-03 22:07:31 -07005032void __init ip6_route_init_special_entries(void)
5033{
5034 /* Registering of the loopback is done before this portion of code,
5035 * the loopback reference in rt6_info will not be taken, do it
5036 * manually for init_net */
5037 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5038 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5039 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5040 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5041 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5042 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5043 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5044 #endif
5045}
5046
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005047int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07005048{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005049 int ret;
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07005050 int cpu;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005051
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08005052 ret = -ENOMEM;
5053 ip6_dst_ops_template.kmem_cachep =
5054 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
5055 SLAB_HWCACHE_ALIGN, NULL);
5056 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08005057 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07005058
Eric Dumazetfc66f952010-10-08 06:37:34 +00005059 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005060 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08005061 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08005062
David S. Millerc3426b42012-06-09 16:27:05 -07005063 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5064 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07005065 goto out_dst_entries;
Thomas Graf2a0c4512012-06-14 23:00:17 +00005066
David S. Miller7e52b332012-06-15 15:51:55 -07005067 ret = register_pernet_subsys(&ip6_route_net_ops);
5068 if (ret)
5069 goto out_register_inetpeer;
David S. Millerc3426b42012-06-09 16:27:05 -07005070
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07005071 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5072
David S. Millere8803b62012-06-16 01:12:19 -07005073 ret = fib6_init();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005074 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005075 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005076
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005077 ret = xfrm6_init();
5078 if (ret)
David S. Millere8803b62012-06-16 01:12:19 -07005079 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08005080
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005081 ret = fib6_rules_init();
5082 if (ret)
5083 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08005084
Thomas Grafd1896342012-06-18 12:08:33 +00005085 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5086 if (ret)
5087 goto fib6_rules_init;
5088
Florian Westphal16feebc2017-12-02 21:44:08 +01005089 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5090 inet6_rtm_newroute, NULL, 0);
5091 if (ret < 0)
5092 goto out_register_late_subsys;
5093
5094 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5095 inet6_rtm_delroute, NULL, 0);
5096 if (ret < 0)
5097 goto out_register_late_subsys;
5098
5099 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5100 inet6_rtm_getroute, NULL,
5101 RTNL_FLAG_DOIT_UNLOCKED);
5102 if (ret < 0)
Thomas Grafd1896342012-06-18 12:08:33 +00005103 goto out_register_late_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005104
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005105 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08005106 if (ret)
Thomas Grafd1896342012-06-18 12:08:33 +00005107 goto out_register_late_subsys;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005108
Martin KaFai Lau8d0b94a2015-05-22 20:56:04 -07005109 for_each_possible_cpu(cpu) {
5110 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5111
5112 INIT_LIST_HEAD(&ul->head);
5113 spin_lock_init(&ul->lock);
5114 }
5115
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005116out:
5117 return ret;
5118
Thomas Grafd1896342012-06-18 12:08:33 +00005119out_register_late_subsys:
Florian Westphal16feebc2017-12-02 21:44:08 +01005120 rtnl_unregister_all(PF_INET6);
Thomas Grafd1896342012-06-18 12:08:33 +00005121 unregister_pernet_subsys(&ip6_route_net_late_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005122fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005123 fib6_rules_cleanup();
5124xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005125 xfrm6_fini();
Thomas Graf2a0c4512012-06-14 23:00:17 +00005126out_fib6_init:
5127 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005128out_register_subsys:
5129 unregister_pernet_subsys(&ip6_route_net_ops);
David S. Miller7e52b332012-06-15 15:51:55 -07005130out_register_inetpeer:
5131 unregister_pernet_subsys(&ipv6_inetpeer_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00005132out_dst_entries:
5133 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005134out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005135 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08005136 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07005137}
5138
5139void ip6_route_cleanup(void)
5140{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005141 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Grafd1896342012-06-18 12:08:33 +00005142 unregister_pernet_subsys(&ip6_route_net_late_ops);
Thomas Graf101367c2006-08-04 03:39:02 -07005143 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005144 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07005145 fib6_gc_cleanup();
David S. Millerc3426b42012-06-09 16:27:05 -07005146 unregister_pernet_subsys(&ipv6_inetpeer_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08005147 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00005148 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08005149 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005150}